Merge git://git.infradead.org/mtd-2.6 * git://git.infradead.org/mtd-2.6: mtd: fix physmap.h warnings

commit: 3f303103b884ca577908d3e5c0650ad12e40c586 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Wed Jun 01 21:47:39 2011 +0900
committer: Linus Torvalds <torvalds@linux-foundation.org> Wed Jun 01 21:47:39 2011 +0900
tree: 021b8baeba20df329b60798ad4fb55f280b0cbda
parent: 5c6cce92bc8aee751aafe82c5d9caf7553226a3d [diff]
parent: 63da029015b5255915cd6d61f19ffc276ad4635d [diff]
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index c078ad4..8173cec 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt

@@ -99,18 +99,11 @@
 
 o	"dt" is the current value of the dyntick counter that is incremented
 	when entering or leaving dynticks idle state, either by the
-	scheduler or by irq.  The number after the "/" is the interrupt
-	nesting depth when in dyntick-idle state, or one greater than
-	the interrupt-nesting depth otherwise.
-
-	This field is displayed only for CONFIG_NO_HZ kernels.
-
-o	"dn" is the current value of the dyntick counter that is incremented
-	when entering or leaving dynticks idle state via NMI.  If both
-	the "dt" and "dn" values are even, then this CPU is in dynticks
-	idle mode and may be ignored by RCU.  If either of these two
-	counters is odd, then RCU must be alert to the possibility of
-	an RCU read-side critical section running on this CPU.
+	scheduler or by irq.  This number is even if the CPU is in
+	dyntick idle mode and odd otherwise.  The number after the first
+	"/" is the interrupt nesting depth when in dyntick-idle state,
+	or one greater than the interrupt-nesting depth otherwise.
+	The number after the second "/" is the NMI nesting depth.
 
 	This field is displayed only for CONFIG_NO_HZ kernels.
 

diff --git a/Documentation/acpi/method-customizing.txt b/Documentation/acpi/method-customizing.txt
index 3e1d25a..5f55373 100644
--- a/Documentation/acpi/method-customizing.txt
+++ b/Documentation/acpi/method-customizing.txt

@@ -66,3 +66,8 @@
       But each individual write to debugfs can implement a SINGLE
       method override. i.e. if we want to insert/override multiple
       ACPI methods, we need to redo step c) ~ g) for multiple times.
+
+Note: Be aware that root can mis-use this driver to modify arbitrary
+      memory and gain additional rights, if root's privileges got
+      restricted (for example if root is not allowed to load additional
+      modules after boot).

diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt
index 0c1c2f6..5a0cb1e 100644
--- a/Documentation/dmaengine.txt
+++ b/Documentation/dmaengine.txt

@@ -1 +1,96 @@
-See Documentation/crypto/async-tx-api.txt
+			DMA Engine API Guide
+			====================
+
+		 Vinod Koul <vinod dot koul at intel.com>
+
+NOTE: For DMA Engine usage in async_tx please see:
+	Documentation/crypto/async-tx-api.txt
+
+
+Below is a guide to device driver writers on how to use the Slave-DMA API of the
+DMA Engine. This is applicable only for slave DMA usage only.
+
+The slave DMA usage consists of following steps
+1. Allocate a DMA slave channel
+2. Set slave and controller specific parameters
+3. Get a descriptor for transaction
+4. Submit the transaction and wait for callback notification
+
+1. Allocate a DMA slave channel
+Channel allocation is slightly different in the slave DMA context, client
+drivers typically need a channel from a particular DMA controller only and even
+in some cases a specific channel is desired. To request a channel
+dma_request_channel() API is used.
+
+Interface:
+struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
+		dma_filter_fn filter_fn,
+		void *filter_param);
+where dma_filter_fn is defined as:
+typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+
+When the optional 'filter_fn' parameter is set to NULL dma_request_channel
+simply returns the first channel that satisfies the capability mask.  Otherwise,
+when the mask parameter is insufficient for specifying the necessary channel,
+the filter_fn routine can be used to disposition the available channels in the
+system. The filter_fn routine is called once for each free channel in the
+system.  Upon seeing a suitable channel filter_fn returns DMA_ACK which flags
+that channel to be the return value from dma_request_channel.  A channel
+allocated via this interface is exclusive to the caller, until
+dma_release_channel() is called.
+
+2. Set slave and controller specific parameters
+Next step is always to pass some specific information to the DMA driver. Most of
+the generic information which a slave DMA can use is in struct dma_slave_config.
+It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA
+burst lengths etc. If some DMA controllers have more parameters to be sent then
+they should try to embed struct dma_slave_config in their controller specific
+structure. That gives flexibility to client to pass more parameters, if
+required.
+
+Interface:
+int dmaengine_slave_config(struct dma_chan *chan,
+					  struct dma_slave_config *config)
+
+3. Get a descriptor for transaction
+For slave usage the various modes of slave transfers supported by the
+DMA-engine are:
+slave_sg	- DMA a list of scatter gather buffers from/to a peripheral
+dma_cyclic	- Perform a cyclic DMA operation from/to a peripheral till the
+		  operation is explicitly stopped.
+The non NULL return of this transfer API represents a "descriptor" for the given
+transaction.
+
+Interface:
+struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)(
+		struct dma_chan *chan,
+		struct scatterlist *dst_sg, unsigned int dst_nents,
+		struct scatterlist *src_sg, unsigned int src_nents,
+		unsigned long flags);
+struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)(
+		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction);
+
+4. Submit the transaction and wait for callback notification
+To schedule the transaction to be scheduled by dma device, the "descriptor"
+returned in above (3) needs to be submitted.
+To tell the dma driver that a transaction is ready to be serviced, the
+descriptor->submit() callback needs to be invoked. This chains the descriptor to
+the pending queue.
+The transactions in the pending queue can be activated by calling the
+issue_pending API. If channel is idle then the first transaction in queue is
+started and subsequent ones queued up.
+On completion of the DMA operation the next in queue is submitted and a tasklet
+triggered. The tasklet would then call the client driver completion callback
+routine for notification, if set.
+Interface:
+void dma_async_issue_pending(struct dma_chan *chan);
+
+==============================================================================
+
+Additional usage notes for dma driver writers
+1/ Although DMA engine specifies that completion callback routines cannot submit
+any new operations, but typically for slave DMA subsequent transaction may not
+be available for submit prior to callback routine being called. This requirement
+is not a requirement for DMA-slave devices. But they should take care to drop
+the spin-lock they might be holding before calling the callback routine

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ff31b1c..1a9446b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt

@@ -6,6 +6,42 @@
 
 ---------------------------
 
+What:	x86 floppy disable_hlt
+When:	2012
+Why:	ancient workaround of dubious utility clutters the
+	code used by everybody else.
+Who:	Len Brown <len.brown@intel.com>
+
+---------------------------
+
+What:	CONFIG_APM_CPU_IDLE, and its ability to call APM BIOS in idle
+When:	2012
+Why:	This optional sub-feature of APM is of dubious reliability,
+	and ancient APM laptops are likely better served by calling HLT.
+	Deleting CONFIG_APM_CPU_IDLE allows x86 to stop exporting
+	the pm_idle function pointer to modules.
+Who:	Len Brown <len.brown@intel.com>
+
+----------------------------
+
+What:	x86_32 "no-hlt" cmdline param
+When:	2012
+Why:	remove a branch from idle path, simplify code used by everybody.
+	This option disabled the use of HLT in idle and machine_halt()
+	for hardware that was flakey 15-years ago.  Today we have
+	"idle=poll" that removed HLT from idle, and so if such a machine
+	is still running the upstream kernel, "idle=poll" is likely sufficient.
+Who:	Len Brown <len.brown@intel.com>
+
+----------------------------
+
+What:	x86 "idle=mwait" cmdline param
+When:	2012
+Why:	simplify x86 idle code
+Who:	Len Brown <len.brown@intel.com>
+
+----------------------------
+
 What:	PRISM54
 When:	2.6.34
 

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 61b31ac..57d827d 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking

@@ -104,7 +104,7 @@
 prototypes:
 	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
-	void (*dirty_inode) (struct inode *);
+	void (*dirty_inode) (struct inode *, int flags);
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
 	int (*drop_inode) (struct inode *);
 	void (*evict_inode) (struct inode *);
@@ -126,7 +126,7 @@
 			s_umount
 alloc_inode:
 destroy_inode:
-dirty_inode:				(must not sleep)
+dirty_inode:
 write_inode:
 drop_inode:				!!!inode->i_lock!!!
 evict_inode:

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 21a7dc4..88b9f55 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt

@@ -211,7 +211,7 @@
         struct inode *(*alloc_inode)(struct super_block *sb);
         void (*destroy_inode)(struct inode *);
 
-        void (*dirty_inode) (struct inode *);
+        void (*dirty_inode) (struct inode *, int flags);
         int (*write_inode) (struct inode *, int);
         void (*drop_inode) (struct inode *);
         void (*delete_inode) (struct inode *);

diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt
deleted file mode 100644
index 4beafa6..0000000
--- a/Documentation/laptops/acer-wmi.txt
+++ /dev/null

@@ -1,184 +0,0 @@
-Acer Laptop WMI Extras Driver
-http://code.google.com/p/aceracpi
-Version 0.3
-4th April 2009
-
-Copyright 2007-2009 Carlos Corbacho <carlos@strangeworlds.co.uk>
-
-acer-wmi is a driver to allow you to control various parts of your Acer laptop
-hardware under Linux which are exposed via ACPI-WMI.
-
-This driver completely replaces the old out-of-tree acer_acpi, which I am
-currently maintaining for bug fixes only on pre-2.6.25 kernels. All development
-work is now focused solely on acer-wmi.
-
-Disclaimer
-**********
-
-Acer and Wistron have provided nothing towards the development acer_acpi or
-acer-wmi. All information we have has been through the efforts of the developers
-and the users to discover as much as possible about the hardware.
-
-As such, I do warn that this could break your hardware - this is extremely
-unlikely of course, but please bear this in mind.
-
-Background
-**********
-
-acer-wmi is derived from acer_acpi, originally developed by Mark
-Smith in 2005, then taken over by Carlos Corbacho in 2007, in order to activate
-the wireless LAN card under a 64-bit version of Linux, as acerhk[1] (the
-previous solution to the problem) relied on making 32 bit BIOS calls which are
-not possible in kernel space from a 64 bit OS.
-
-[1] acerhk: http://www.cakey.de/acerhk/
-
-Supported Hardware
-******************
-
-NOTE: The Acer Aspire One is not supported hardware. It cannot work with
-acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been
-blacklisted until that happens.
-
-Please see the website for the current list of known working hardware:
-
-http://code.google.com/p/aceracpi/wiki/SupportedHardware
-
-If your laptop is not listed, or listed as unknown, and works with acer-wmi,
-please contact me with a copy of the DSDT.
-
-If your Acer laptop doesn't work with acer-wmi, I would also like to see the
-DSDT.
-
-To send me the DSDT, as root/sudo:
-
-cat /sys/firmware/acpi/tables/DSDT > dsdt
-
-And send me the resulting 'dsdt' file.
-
-Usage
-*****
-
-On Acer laptops, acer-wmi should already be autoloaded based on DMI matching.
-For non-Acer laptops, until WMI based autoloading support is added, you will
-need to manually load acer-wmi.
-
-acer-wmi creates /sys/devices/platform/acer-wmi, and fills it with various
-files whose usage is detailed below, which enables you to control some of the
-following (varies between models):
-
-* the wireless LAN card radio
-* inbuilt Bluetooth adapter
-* inbuilt 3G card
-* mail LED of your laptop
-* brightness of the LCD panel
-
-Wireless
-********
-
-With regards to wireless, all acer-wmi does is enable the radio on the card. It
-is not responsible for the wireless LED - once the radio is enabled, this is
-down to the wireless driver for your card. So the behaviour of the wireless LED,
-once you enable the radio, will depend on your hardware and driver combination.
-
-e.g. With the BCM4318 on the Acer Aspire 5020 series:
-
-ndiswrapper: Light blinks on when transmitting
-b43: Solid light, blinks off when transmitting
-
-Wireless radio control is unconditionally enabled - all Acer laptops that support
-acer-wmi come with built-in wireless. However, should you feel so inclined to
-ever wish to remove the card, or swap it out at some point, please get in touch
-with me, as we may well be able to gain some data on wireless card detection.
-
-The wireless radio is exposed through rfkill.
-
-Bluetooth
-*********
-
-For bluetooth, this is an internal USB dongle, so once enabled, you will get
-a USB device connection event, and a new USB device appears. When you disable
-bluetooth, you get the reverse - a USB device disconnect event, followed by the
-device disappearing again.
-
-Bluetooth is autodetected by acer-wmi, so if you do not have a bluetooth module
-installed in your laptop, this file won't exist (please be aware that it is
-quite common for Acer not to fit bluetooth to their laptops - so just because
-you have a bluetooth button on the laptop, doesn't mean that bluetooth is
-installed).
-
-For the adventurously minded - if you want to buy an internal bluetooth
-module off the internet that is compatible with your laptop and fit it, then
-it will work just fine with acer-wmi.
-
-Bluetooth is exposed through rfkill.
-
-3G
-**
-
-3G is currently not autodetected, so the 'threeg' file is always created under
-sysfs. So far, no-one in possession of an Acer laptop with 3G built-in appears to
-have tried Linux, or reported back, so we don't have any information on this.
-
-If you have an Acer laptop that does have a 3G card in, please contact me so we
-can properly detect these, and find out a bit more about them.
-
-To read the status of the 3G card (0=off, 1=on):
-cat /sys/devices/platform/acer-wmi/threeg
-
-To enable the 3G card:
-echo 1 > /sys/devices/platform/acer-wmi/threeg
-
-To disable the 3G card:
-echo 0 > /sys/devices/platform/acer-wmi/threeg
-
-To set the state of the 3G card when loading acer-wmi, pass:
-threeg=X (where X is 0 or 1)
-
-Mail LED
-********
-
-This can be found in most older Acer laptops supported by acer-wmi, and many
-newer ones - it is built into the 'mail' button, and blinks when active.
-
-On newer (WMID) laptops though, we have no way of detecting the mail LED. If
-your laptop identifies itself in dmesg as a WMID model, then please try loading
-acer_acpi with:
-
-force_series=2490
-
-This will use a known alternative method of reading/ writing the mail LED. If
-it works, please report back to me with the DMI data from your laptop so this
-can be added to acer-wmi.
-
-The LED is exposed through the LED subsystem, and can be found in:
-
-/sys/devices/platform/acer-wmi/leds/acer-wmi::mail/
-
-The mail LED is autodetected, so if you don't have one, the LED device won't
-be registered.
-
-Backlight
-*********
-
-The backlight brightness control is available on all acer-wmi supported
-hardware. The maximum brightness level is usually 15, but on some newer laptops
-it's 10 (this is again autodetected).
-
-The backlight is exposed through the backlight subsystem, and can be found in:
-
-/sys/devices/platform/acer-wmi/backlight/acer-wmi/
-
-Credits
-*******
-
-Olaf Tauber, who did the real hard work when he developed acerhk
-http://www.cakey.de/acerhk/
-All the authors of laptop ACPI modules in the kernel, whose work
-was an inspiration in the early days of acer_acpi
-Mathieu Segaud, who solved the problem with having to modprobe the driver
-twice in acer_acpi 0.2.
-Jim Ramsay, who added support for the WMID interface
-Mark Smith, who started the original acer_acpi
-
-And the many people who have used both acer_acpi and acer-wmi.

diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 9c0a80d..cef00d4 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt

@@ -12,8 +12,9 @@
 - HOW
 
 Lockdep already has hooks in the lock functions and maps lock instances to
-lock classes. We build on that. The graph below shows the relation between
-the lock functions and the various hooks therein.
+lock classes. We build on that (see Documentation/lockdep-design.txt).
+The graph below shows the relation between the lock functions and the various
+hooks therein.
 
         __acquire
             |
@@ -128,6 +129,37 @@
 
 The integer part of the time values is in us.
 
+Dealing with nested locks, subclasses may appear:
+
+32...............................................................................................................................................................................................
+33
+34                               &rq->lock:         13128          13128           0.43         190.53      103881.26          97454        3453404           0.00         401.11    13224683.11
+35                               ---------
+36                               &rq->lock            645          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+37                               &rq->lock            297          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+38                               &rq->lock            360          [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
+39                               &rq->lock            428          [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
+40                               ---------
+41                               &rq->lock             77          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+42                               &rq->lock            174          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+43                               &rq->lock           4715          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+44                               &rq->lock            893          [<ffffffff81340524>] schedule+0x157/0x7b8
+45
+46...............................................................................................................................................................................................
+47
+48                             &rq->lock/1:         11526          11488           0.33         388.73      136294.31          21461          38404           0.00          37.93      109388.53
+49                             -----------
+50                             &rq->lock/1          11526          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+51                             -----------
+52                             &rq->lock/1           5645          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+53                             &rq->lock/1           1224          [<ffffffff81340524>] schedule+0x157/0x7b8
+54                             &rq->lock/1           4336          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+55                             &rq->lock/1            181          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+
+Line 48 shows statistics for the second subclass (/1) of &rq->lock class
+(subclass starts from 0), since in this case, as line 50 suggests,
+double_rq_lock actually acquires a nested lock of two spinlocks.
+
 View the top contending locks:
 
 # grep : /proc/lock_stat | head

diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile
index bebac6b..0ac3420 100644
--- a/Documentation/virtual/lguest/Makefile
+++ b/Documentation/virtual/lguest/Makefile

@@ -1,5 +1,5 @@
 # This creates the demonstration utility "lguest" which runs a Linux guest.
-# Missing headers?  Add "-I../../include -I../../arch/x86/include"
+# Missing headers?  Add "-I../../../include -I../../../arch/x86/include"
 CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
 
 all: lguest

diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
index d9da7e1..cd9d6af 100644
--- a/Documentation/virtual/lguest/lguest.c
+++ b/Documentation/virtual/lguest/lguest.c

@@ -49,7 +49,7 @@
 #include <linux/virtio_rng.h>
 #include <linux/virtio_ring.h>
 #include <asm/bootparam.h>
-#include "../../include/linux/lguest_launcher.h"
+#include "../../../include/linux/lguest_launcher.h"
 /*L:110
  * We can ignore the 42 include files we need for this program, but I do want
  * to draw attention to the use of kernel-style types.
@@ -135,9 +135,6 @@
 	/* Is it operational */
 	bool running;
 
-	/* Does Guest want an intrrupt on empty? */
-	bool irq_on_empty;
-
 	/* Device-specific data. */
 	void *priv;
 };
@@ -637,10 +634,7 @@
 
 	/* If they don't want an interrupt, don't send one... */
 	if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
-		/* ... unless they've asked us to force one on empty. */
-		if (!vq->dev->irq_on_empty
-		    || lg_last_avail(vq) != vq->vring.avail->idx)
-			return;
+		return;
 	}
 
 	/* Send the Guest an interrupt tell them we used something up. */
@@ -1057,15 +1051,6 @@
 	close(vq->eventfd);
 }
 
-static bool accepted_feature(struct device *dev, unsigned int bit)
-{
-	const u8 *features = get_feature_bits(dev) + dev->feature_len;
-
-	if (dev->feature_len < bit / CHAR_BIT)
-		return false;
-	return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT));
-}
-
 static void start_device(struct device *dev)
 {
 	unsigned int i;
@@ -1079,8 +1064,6 @@
 		verbose(" %02x", get_feature_bits(dev)
 			[dev->feature_len+i]);
 
-	dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
-
 	for (vq = dev->vq; vq; vq = vq->next) {
 		if (vq->service)
 			create_thread(vq);
@@ -1564,7 +1547,6 @@
 	/* Set up the tun device. */
 	configure_device(ipfd, tapif, ip);
 
-	add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
 	/* Expect Guest to handle everything except UFO */
 	add_feature(dev, VIRTIO_NET_F_CSUM);
 	add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);

diff --git a/MAINTAINERS b/MAINTAINERS
index b9f5aee..29801f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -223,10 +223,8 @@
 F:	drivers/platform/x86/acerhdf.c
 
 ACER WMI LAPTOP EXTRAS
-M:	Carlos Corbacho <carlos@strangeworlds.co.uk>
-L:	aceracpi@googlegroups.com (subscribers-only)
+M:	Joey Lee <jlee@novell.com>
 L:	platform-driver-x86@vger.kernel.org
-W:	http://code.google.com/p/aceracpi
 S:	Maintained
 F:	drivers/platform/x86/acer-wmi.c
 
@@ -271,10 +269,8 @@
 F:	drivers/acpi/video.c
 
 ACPI WMI DRIVER
-M:	Carlos Corbacho <carlos@strangeworlds.co.uk>
 L:	platform-driver-x86@vger.kernel.org
-W:	http://www.lesswatts.org/projects/acpi/
-S:	Maintained
+S:	Orphan
 F:	drivers/platform/x86/wmi.c
 
 AD1889 ALSA SOUND DRIVER
@@ -2178,6 +2174,8 @@
 S:	Supported
 F:	drivers/dma/
 F:	include/linux/dma*
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx.git
+T:	git git://git.infradead.org/users/vkoul/slave-dma.git (slave-dma)
 
 DME1737 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
@@ -3031,9 +3029,8 @@
 F:	drivers/net/wireless/hostap/
 
 HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER
-M:	Carlos Corbacho <carlos@strangeworlds.co.uk>
 L:	platform-driver-x86@vger.kernel.org
-S:	Odd Fixes
+S:	Orphan
 F:	drivers/platform/x86/tc1100-wmi.c
 
 HP100:	Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series
@@ -5451,6 +5448,13 @@
 S:	Maintained
 F:	drivers/tty/serial
 
+SYNOPSYS DESIGNWARE DMAC DRIVER
+M:	Viresh Kumar <viresh.kumar@st.com>
+S:	Maintained
+F:	include/linux/dw_dmac.h
+F:	drivers/dma/dw_dmac_regs.h
+F:	drivers/dma/dw_dmac.c
+
 TIMEKEEPING, NTP
 M:	John Stultz <johnstul@us.ibm.com>
 M:	Thomas Gleixner <tglx@linutronix.de>

diff --git a/Makefile b/Makefile
index 529d93f..afb8e0d 100644
--- a/Makefile
+++ b/Makefile

@@ -1,8 +1,8 @@
-VERSION = 2
-PATCHLEVEL = 6
-SUBLEVEL = 39
-EXTRAVERSION =
-NAME = Flesh-Eating Bats with Fangs
+VERSION = 3
+PATCHLEVEL = 0
+SUBLEVEL = 0
+EXTRAVERSION = -rc1
+NAME = Sneaky Weasel
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"

diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index b183416..4ac48a0 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h

@@ -456,10 +456,11 @@
 #define __NR_open_by_handle_at		498
 #define __NR_clock_adjtime		499
 #define __NR_syncfs			500
+#define __NR_setns			501
 
 #ifdef __KERNEL__
 
-#define NR_SYSCALLS			501
+#define NR_SYSCALLS			502
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR

diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 15f999d..b9c28f3 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S

@@ -519,6 +519,7 @@
 	.quad sys_open_by_handle_at
 	.quad sys_clock_adjtime
 	.quad sys_syncfs			/* 500 */
+	.quad sys_setns
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 3de689a..2c04ed5 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h

@@ -401,6 +401,7 @@
 #define __NR_clock_adjtime		(__NR_SYSCALL_BASE+372)
 #define __NR_syncfs			(__NR_SYSCALL_BASE+373)
 #define __NR_sendmmsg			(__NR_SYSCALL_BASE+374)
+#define __NR_setns			(__NR_SYSCALL_BASE+375)
 
 /*
  * The following SWIs are ARM private.

diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 24cdac3..80f7896 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S

@@ -384,6 +384,7 @@
 		CALL(sys_clock_adjtime)
 		CALL(sys_syncfs)
 		CALL(sys_sendmmsg)
+/* 375 */	CALL(sys_setns)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted

diff --git a/arch/arm/mach-exynos4/Makefile b/arch/arm/mach-exynos4/Makefile
index 683fc38..a9bb94f 100644
--- a/arch/arm/mach-exynos4/Makefile
+++ b/arch/arm/mach-exynos4/Makefile

@@ -13,7 +13,7 @@
 # Core support for EXYNOS4 system
 
 obj-$(CONFIG_CPU_EXYNOS4210)	+= cpu.o init.o clock.o irq-combiner.o
-obj-$(CONFIG_CPU_EXYNOS4210)	+= setup-i2c0.o gpiolib.o irq-eint.o dma.o
+obj-$(CONFIG_CPU_EXYNOS4210)	+= setup-i2c0.o irq-eint.o dma.o
 obj-$(CONFIG_PM)		+= pm.o sleep.o
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o

diff --git a/arch/arm/mach-nomadik/Kconfig b/arch/arm/mach-nomadik/Kconfig
index 71f3ea6..3c5e0f5 100644
--- a/arch/arm/mach-nomadik/Kconfig
+++ b/arch/arm/mach-nomadik/Kconfig

@@ -6,7 +6,6 @@
 	bool "ST 8815 Nomadik Hardware Kit (evaluation board)"
 	select NOMADIK_8815
 	select HAS_MTU
-	select NOMADIK_GPIO
 
 endmenu
 

diff --git a/arch/arm/mach-s5pc100/Makefile b/arch/arm/mach-s5pc100/Makefile
index eecab57..a5e6e60 100644
--- a/arch/arm/mach-s5pc100/Makefile
+++ b/arch/arm/mach-s5pc100/Makefile

@@ -11,7 +11,7 @@
 
 # Core support for S5PC100 system
 
-obj-$(CONFIG_CPU_S5PC100)	+= cpu.o init.o clock.o gpiolib.o
+obj-$(CONFIG_CPU_S5PC100)	+= cpu.o init.o clock.o
 obj-$(CONFIG_CPU_S5PC100)	+= setup-i2c0.o
 obj-$(CONFIG_CPU_S5PC100)	+= dma.o
 

diff --git a/arch/arm/mach-s5pv210/Makefile b/arch/arm/mach-s5pv210/Makefile
index 11f1790..50907ac 100644
--- a/arch/arm/mach-s5pv210/Makefile
+++ b/arch/arm/mach-s5pv210/Makefile

@@ -12,7 +12,7 @@
 
 # Core support for S5PV210 system
 
-obj-$(CONFIG_CPU_S5PV210)	+= cpu.o init.o clock.o dma.o gpiolib.o
+obj-$(CONFIG_CPU_S5PV210)	+= cpu.o init.o clock.o dma.o
 obj-$(CONFIG_CPU_S5PV210)	+= setup-i2c0.o
 obj-$(CONFIG_S5PV210_PM)	+= pm.o sleep.o
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq.o

diff --git a/arch/arm/mach-u300/Makefile b/arch/arm/mach-u300/Makefile
index fab46fe..8fd354a 100644
--- a/arch/arm/mach-u300/Makefile
+++ b/arch/arm/mach-u300/Makefile

@@ -2,7 +2,7 @@
 # Makefile for the linux kernel, U300 machine.
 #
 
-obj-y		:= core.o clock.o timer.o gpio.o padmux.o
+obj-y		:= core.o clock.o timer.o padmux.o
 obj-m		:=
 obj-n		:=
 obj-		:=

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 54429d0..f8b9392 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig

@@ -5,7 +5,6 @@
 	default y
 	select ARM_GIC
 	select HAS_MTU
-	select NOMADIK_GPIO
 	select ARM_ERRATA_753970
 
 menu "Ux500 SoC"

diff --git a/arch/arm/plat-nomadik/Kconfig b/arch/arm/plat-nomadik/Kconfig
index 18296ee..ce65901 100644
--- a/arch/arm/plat-nomadik/Kconfig
+++ b/arch/arm/plat-nomadik/Kconfig

@@ -21,9 +21,4 @@
 	  to multiple interrupt generating programmable
 	  32-bit free running decrementing counters.
 
-config NOMADIK_GPIO
-	bool
-	help
-	  Support for the Nomadik GPIO controller.
-
 endif

diff --git a/arch/arm/plat-nomadik/Makefile b/arch/arm/plat-nomadik/Makefile
index c335473..37c7cdd 100644
--- a/arch/arm/plat-nomadik/Makefile
+++ b/arch/arm/plat-nomadik/Makefile

@@ -3,4 +3,3 @@
 # Licensed under GPLv2
 
 obj-$(CONFIG_HAS_MTU)	+= timer.o
-obj-$(CONFIG_NOMADIK_GPIO)	+= gpio.o

diff --git a/arch/arm/plat-nomadik/include/plat/gpio.h b/arch/arm/plat-nomadik/include/plat/gpio.h
index 1b9f6f0..ea19a5b 100644
--- a/arch/arm/plat-nomadik/include/plat/gpio.h
+++ b/arch/arm/plat-nomadik/include/plat/gpio.h

@@ -78,6 +78,8 @@
 extern void nmk_gpio_wakeups_suspend(void);
 extern void nmk_gpio_wakeups_resume(void);
 
+extern void nmk_gpio_read_pull(int gpio_bank, u32 *pull_up);
+
 /*
  * Platform data to register a block: only the initial gpio/irq number.
  */

diff --git a/arch/arm/plat-omap/Makefile b/arch/arm/plat-omap/Makefile
index a4a1285..f0233e6 100644
--- a/arch/arm/plat-omap/Makefile
+++ b/arch/arm/plat-omap/Makefile

@@ -3,7 +3,7 @@
 #
 
 # Common support
-obj-y := common.o sram.o clock.o devices.o dma.o mux.o gpio.o \
+obj-y := common.o sram.o clock.o devices.o dma.o mux.o \
 	 usb.o fb.o io.o counter_32k.o
 obj-m :=
 obj-n :=

diff --git a/arch/arm/plat-omap/include/plat/gpio.h b/arch/arm/plat-omap/include/plat/gpio.h
index cac2e8a..ec97e00 100644
--- a/arch/arm/plat-omap/include/plat/gpio.h
+++ b/arch/arm/plat-omap/include/plat/gpio.h

@@ -52,6 +52,109 @@
 
 #define OMAP34XX_NR_GPIOS		6
 
+/*
+ * OMAP1510 GPIO registers
+ */
+#define OMAP1510_GPIO_DATA_INPUT	0x00
+#define OMAP1510_GPIO_DATA_OUTPUT	0x04
+#define OMAP1510_GPIO_DIR_CONTROL	0x08
+#define OMAP1510_GPIO_INT_CONTROL	0x0c
+#define OMAP1510_GPIO_INT_MASK		0x10
+#define OMAP1510_GPIO_INT_STATUS	0x14
+#define OMAP1510_GPIO_PIN_CONTROL	0x18
+
+#define OMAP1510_IH_GPIO_BASE		64
+
+/*
+ * OMAP1610 specific GPIO registers
+ */
+#define OMAP1610_GPIO_REVISION		0x0000
+#define OMAP1610_GPIO_SYSCONFIG		0x0010
+#define OMAP1610_GPIO_SYSSTATUS		0x0014
+#define OMAP1610_GPIO_IRQSTATUS1	0x0018
+#define OMAP1610_GPIO_IRQENABLE1	0x001c
+#define OMAP1610_GPIO_WAKEUPENABLE	0x0028
+#define OMAP1610_GPIO_DATAIN		0x002c
+#define OMAP1610_GPIO_DATAOUT		0x0030
+#define OMAP1610_GPIO_DIRECTION		0x0034
+#define OMAP1610_GPIO_EDGE_CTRL1	0x0038
+#define OMAP1610_GPIO_EDGE_CTRL2	0x003c
+#define OMAP1610_GPIO_CLEAR_IRQENABLE1	0x009c
+#define OMAP1610_GPIO_CLEAR_WAKEUPENA	0x00a8
+#define OMAP1610_GPIO_CLEAR_DATAOUT	0x00b0
+#define OMAP1610_GPIO_SET_IRQENABLE1	0x00dc
+#define OMAP1610_GPIO_SET_WAKEUPENA	0x00e8
+#define OMAP1610_GPIO_SET_DATAOUT	0x00f0
+
+/*
+ * OMAP7XX specific GPIO registers
+ */
+#define OMAP7XX_GPIO_DATA_INPUT		0x00
+#define OMAP7XX_GPIO_DATA_OUTPUT	0x04
+#define OMAP7XX_GPIO_DIR_CONTROL	0x08
+#define OMAP7XX_GPIO_INT_CONTROL	0x0c
+#define OMAP7XX_GPIO_INT_MASK		0x10
+#define OMAP7XX_GPIO_INT_STATUS		0x14
+
+/*
+ * omap2+ specific GPIO registers
+ */
+#define OMAP24XX_GPIO_REVISION		0x0000
+#define OMAP24XX_GPIO_IRQSTATUS1	0x0018
+#define OMAP24XX_GPIO_IRQSTATUS2	0x0028
+#define OMAP24XX_GPIO_IRQENABLE2	0x002c
+#define OMAP24XX_GPIO_IRQENABLE1	0x001c
+#define OMAP24XX_GPIO_WAKE_EN		0x0020
+#define OMAP24XX_GPIO_CTRL		0x0030
+#define OMAP24XX_GPIO_OE		0x0034
+#define OMAP24XX_GPIO_DATAIN		0x0038
+#define OMAP24XX_GPIO_DATAOUT		0x003c
+#define OMAP24XX_GPIO_LEVELDETECT0	0x0040
+#define OMAP24XX_GPIO_LEVELDETECT1	0x0044
+#define OMAP24XX_GPIO_RISINGDETECT	0x0048
+#define OMAP24XX_GPIO_FALLINGDETECT	0x004c
+#define OMAP24XX_GPIO_DEBOUNCE_EN	0x0050
+#define OMAP24XX_GPIO_DEBOUNCE_VAL	0x0054
+#define OMAP24XX_GPIO_CLEARIRQENABLE1	0x0060
+#define OMAP24XX_GPIO_SETIRQENABLE1	0x0064
+#define OMAP24XX_GPIO_CLEARWKUENA	0x0080
+#define OMAP24XX_GPIO_SETWKUENA		0x0084
+#define OMAP24XX_GPIO_CLEARDATAOUT	0x0090
+#define OMAP24XX_GPIO_SETDATAOUT	0x0094
+
+#define OMAP4_GPIO_REVISION		0x0000
+#define OMAP4_GPIO_EOI			0x0020
+#define OMAP4_GPIO_IRQSTATUSRAW0	0x0024
+#define OMAP4_GPIO_IRQSTATUSRAW1	0x0028
+#define OMAP4_GPIO_IRQSTATUS0		0x002c
+#define OMAP4_GPIO_IRQSTATUS1		0x0030
+#define OMAP4_GPIO_IRQSTATUSSET0	0x0034
+#define OMAP4_GPIO_IRQSTATUSSET1	0x0038
+#define OMAP4_GPIO_IRQSTATUSCLR0	0x003c
+#define OMAP4_GPIO_IRQSTATUSCLR1	0x0040
+#define OMAP4_GPIO_IRQWAKEN0		0x0044
+#define OMAP4_GPIO_IRQWAKEN1		0x0048
+#define OMAP4_GPIO_IRQENABLE1		0x011c
+#define OMAP4_GPIO_WAKE_EN		0x0120
+#define OMAP4_GPIO_IRQSTATUS2		0x0128
+#define OMAP4_GPIO_IRQENABLE2		0x012c
+#define OMAP4_GPIO_CTRL			0x0130
+#define OMAP4_GPIO_OE			0x0134
+#define OMAP4_GPIO_DATAIN		0x0138
+#define OMAP4_GPIO_DATAOUT		0x013c
+#define OMAP4_GPIO_LEVELDETECT0		0x0140
+#define OMAP4_GPIO_LEVELDETECT1		0x0144
+#define OMAP4_GPIO_RISINGDETECT		0x0148
+#define OMAP4_GPIO_FALLINGDETECT	0x014c
+#define OMAP4_GPIO_DEBOUNCENABLE	0x0150
+#define OMAP4_GPIO_DEBOUNCINGTIME	0x0154
+#define OMAP4_GPIO_CLEARIRQENABLE1	0x0160
+#define OMAP4_GPIO_SETIRQENABLE1	0x0164
+#define OMAP4_GPIO_CLEARWKUENA		0x0180
+#define OMAP4_GPIO_SETWKUENA		0x0184
+#define OMAP4_GPIO_CLEARDATAOUT		0x0190
+#define OMAP4_GPIO_SETDATAOUT		0x0194
+
 #define OMAP_MPUIO(nr)		(OMAP_MAX_GPIO_LINES + (nr))
 #define OMAP_GPIO_IS_MPUIO(nr)	((nr) >= OMAP_MAX_GPIO_LINES)
 

diff --git a/arch/arm/plat-samsung/Makefile b/arch/arm/plat-samsung/Makefile
index e9de58a..53eb15b 100644
--- a/arch/arm/plat-samsung/Makefile
+++ b/arch/arm/plat-samsung/Makefile

@@ -19,7 +19,6 @@
 obj-y				+= gpio-config.o
 obj-y				+= dev-asocdma.o
 
-obj-$(CONFIG_SAMSUNG_GPIOLIB_4BIT)	+= gpiolib.o
 obj-$(CONFIG_SAMSUNG_CLKSRC)	+= clock-clksrc.o
 
 obj-$(CONFIG_SAMSUNG_IRQ_UART)	+= irq-uart.o

diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h
index 89861a2..f714544 100644
--- a/arch/avr32/include/asm/unistd.h
+++ b/arch/avr32/include/asm/unistd.h

@@ -299,9 +299,10 @@
 #define __NR_signalfd		279
 /* 280 was __NR_timerfd */
 #define __NR_eventfd		281
+#define __NR_setns		283
 
 #ifdef __KERNEL__
-#define NR_syscalls		282
+#define NR_syscalls		284
 
 /* Old stuff */
 #define __IGNORE_uselib

diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index e76bad1..c7fd394 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S

@@ -296,4 +296,5 @@
 	.long	sys_ni_syscall		/* 280, was sys_timerfd */
 	.long	sys_eventfd
 	.long	sys_recvmmsg
+	.long	sys_setns
 	.long	sys_ni_syscall		/* r8 is saturated at nr_syscalls */

diff --git a/arch/blackfin/include/asm/bfin_serial.h b/arch/blackfin/include/asm/bfin_serial.h
index 7dbc664..7fd0ec7 100644
--- a/arch/blackfin/include/asm/bfin_serial.h
+++ b/arch/blackfin/include/asm/bfin_serial.h

@@ -184,7 +184,7 @@
 #undef __BFP
 
 #ifndef port_membase
-# define port_membase(p) (((struct bfin_serial_port *)(p))->port.membase)
+# define port_membase(p) 0
 #endif
 
 #define UART_GET_CHAR(p)      bfin_read16(port_membase(p) + OFFSET_RBR)
@@ -235,10 +235,10 @@
 #define UART_SET_DLAB(p)      do { UART_PUT_LCR(p, UART_GET_LCR(p) | DLAB); SSYNC(); } while (0)
 
 #ifndef put_lsr_cache
-# define put_lsr_cache(p, v) (((struct bfin_serial_port *)(p))->lsr = (v))
+# define put_lsr_cache(p, v)
 #endif
 #ifndef get_lsr_cache
-# define get_lsr_cache(p)    (((struct bfin_serial_port *)(p))->lsr)
+# define get_lsr_cache(p) 0
 #endif
 
 /* The hardware clears the LSR bits upon read, so we need to cache

diff --git a/arch/blackfin/include/asm/gptimers.h b/arch/blackfin/include/asm/gptimers.h
index c722acd..38657da 100644
--- a/arch/blackfin/include/asm/gptimers.h
+++ b/arch/blackfin/include/asm/gptimers.h

@@ -193,4 +193,22 @@
 uint32_t get_gptimer_status(unsigned int group);
 void     set_gptimer_status(unsigned int group, uint32_t value);
 
+/*
+ * All Blackfin system MMRs are padded to 32bits even if the register
+ * itself is only 16bits.  So use a helper macro to streamline this.
+ */
+#define __BFP(m) u16 m; u16 __pad_##m
+
+/*
+ * bfin timer registers layout
+ */
+struct bfin_gptimer_regs {
+	__BFP(config);
+	u32 counter;
+	u32 period;
+	u32 width;
+};
+
+#undef __BFP
+
 #endif

diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index ff9a9f3..0ccba60 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h

@@ -397,8 +397,10 @@
 #define __NR_open_by_handle_at	376
 #define __NR_clock_adjtime	377
 #define __NR_syncfs		378
+#define __NR_setns		379
+#define __NR_sendmmsg		380
 
-#define __NR_syscall		379
+#define __NR_syscall		381
 #define NR_syscalls		__NR_syscall
 
 /* Old optional stuff no one actually uses */

diff --git a/arch/blackfin/kernel/debug-mmrs.c b/arch/blackfin/kernel/debug-mmrs.c
index 94b1d8a..fce4807 100644
--- a/arch/blackfin/kernel/debug-mmrs.c
+++ b/arch/blackfin/kernel/debug-mmrs.c

@@ -13,6 +13,7 @@
 
 #include <asm/blackfin.h>
 #include <asm/gpio.h>
+#include <asm/gptimers.h>
 #include <asm/bfin_can.h>
 #include <asm/bfin_dma.h>
 #include <asm/bfin_ppi.h>
@@ -230,8 +231,8 @@
 #define DMA(num)  _DMA(num, DMA##num##_NEXT_DESC_PTR, 0, "")
 #define _MDMA(num, x) \
 	do { \
-		_DMA(num, x##DMA_D##num##_CONFIG, 'D', #x); \
-		_DMA(num, x##DMA_S##num##_CONFIG, 'S', #x); \
+		_DMA(num, x##DMA_D##num##_NEXT_DESC_PTR, 'D', #x); \
+		_DMA(num, x##DMA_S##num##_NEXT_DESC_PTR, 'S', #x); \
 	} while (0)
 #define MDMA(num) _MDMA(num, M)
 #define IMDMA(num) _MDMA(num, IM)
@@ -264,20 +265,15 @@
 /*
  * General Purpose Timers
  */
-#define GPTIMER_OFF(mmr) (TIMER0_##mmr - TIMER0_CONFIG)
-#define __GPTIMER(name) \
-	do { \
-		strcpy(_buf, #name); \
-		debugfs_create_x16(buf, S_IRUSR|S_IWUSR, parent, (u16 *)(base + GPTIMER_OFF(name))); \
-	} while (0)
+#define __GPTIMER(uname, lname) __REGS(gptimer, #uname, lname)
 static void __init __maybe_unused
 bfin_debug_mmrs_gptimer(struct dentry *parent, unsigned long base, int num)
 {
 	char buf[32], *_buf = REGS_STR_PFX(buf, TIMER, num);
-	__GPTIMER(CONFIG);
-	__GPTIMER(COUNTER);
-	__GPTIMER(PERIOD);
-	__GPTIMER(WIDTH);
+	__GPTIMER(CONFIG, config);
+	__GPTIMER(COUNTER, counter);
+	__GPTIMER(PERIOD, period);
+	__GPTIMER(WIDTH, width);
 }
 #define GPTIMER(num) bfin_debug_mmrs_gptimer(parent, TIMER##num##_CONFIG, num)
 
@@ -355,7 +351,7 @@
 	__PPI(DELAY, delay);
 	__PPI(FRAME, frame);
 }
-#define PPI(num) bfin_debug_mmrs_ppi(parent, PPI##num##_STATUS, num)
+#define PPI(num) bfin_debug_mmrs_ppi(parent, PPI##num##_CONTROL, num)
 
 /*
  * SPI
@@ -1288,15 +1284,15 @@
 	D16(VR_CTL);
 	D32(CHIPID);	/* it's part of this hardware block */
 
-#if defined(PPI_STATUS) || defined(PPI0_STATUS) || defined(PPI1_STATUS)
+#if defined(PPI_CONTROL) || defined(PPI0_CONTROL) || defined(PPI1_CONTROL)
 	parent = debugfs_create_dir("ppi", top);
-# ifdef PPI_STATUS
-	bfin_debug_mmrs_ppi(parent, PPI_STATUS, -1);
+# ifdef PPI_CONTROL
+	bfin_debug_mmrs_ppi(parent, PPI_CONTROL, -1);
 # endif
-# ifdef PPI0_STATUS
+# ifdef PPI0_CONTROL
 	PPI(0);
 # endif
-# ifdef PPI1_STATUS
+# ifdef PPI1_CONTROL
 	PPI(1);
 # endif
 #endif
@@ -1341,6 +1337,10 @@
 	D16(RSI_PID1);
 	D16(RSI_PID2);
 	D16(RSI_PID3);
+	D16(RSI_PID4);
+	D16(RSI_PID5);
+	D16(RSI_PID6);
+	D16(RSI_PID7);
 	D16(RSI_PWR_CONTROL);
 	D16(RSI_RD_WAIT_EN);
 	D32(RSI_RESPONSE0);

diff --git a/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h
deleted file mode 100644
index f6d924a..0000000
--- a/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h
+++ /dev/null

@@ -1,79 +0,0 @@
-/*
- * Copyright 2008-2009 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later
- */
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-
-#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS)
-# define CONFIG_SERIAL_BFIN_CTSRTS
-
-# ifndef CONFIG_UART0_CTS_PIN
-#  define CONFIG_UART0_CTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART0_RTS_PIN
-#  define CONFIG_UART0_RTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART1_CTS_PIN
-#  define CONFIG_UART1_CTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART1_RTS_PIN
-#  define CONFIG_UART1_RTS_PIN -1
-# endif
-#endif
-
-struct bfin_serial_res {
-	unsigned long uart_base_addr;
-	int uart_irq;
-	int uart_status_irq;
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	unsigned int uart_tx_dma_channel;
-	unsigned int uart_rx_dma_channel;
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	int uart_cts_pin;
-	int uart_rts_pin;
-#endif
-};
-
-struct bfin_serial_res bfin_serial_resource[] = {
-#ifdef CONFIG_SERIAL_BFIN_UART0
-	{
-	 0xFFC00400,
-	 IRQ_UART0_RX,
-	 IRQ_UART0_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	 CH_UART0_TX,
-	 CH_UART0_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	 CONFIG_UART0_CTS_PIN,
-	 CONFIG_UART0_RTS_PIN,
-#endif
-	 },
-#endif
-#ifdef CONFIG_SERIAL_BFIN_UART1
-	{
-	 0xFFC02000,
-	 IRQ_UART1_RX,
-	 IRQ_UART1_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	 CH_UART1_TX,
-	 CH_UART1_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	 CONFIG_UART1_CTS_PIN,
-	 CONFIG_UART1_RTS_PIN,
-#endif
-	 },
-#endif
-};
-
-#define DRIVER_NAME "bfin-uart"
-
-#include <asm/bfin_serial.h>

diff --git a/arch/blackfin/mach-bf518/include/mach/defBF514.h b/arch/blackfin/mach-bf518/include/mach/defBF514.h
index 98a51c4..cfab428 100644
--- a/arch/blackfin/mach-bf518/include/mach/defBF514.h
+++ b/arch/blackfin/mach-bf518/include/mach/defBF514.h

@@ -36,13 +36,13 @@
 #define RSI_EMASK                      0xFFC038C4 /* RSI Exception Mask Register */
 #define RSI_CONFIG                     0xFFC038C8 /* RSI Configuration Register */
 #define RSI_RD_WAIT_EN                 0xFFC038CC /* RSI Read Wait Enable Register */
-#define RSI_PID0                       0xFFC03FE0 /* RSI Peripheral ID Register 0 */
-#define RSI_PID1                       0xFFC03FE4 /* RSI Peripheral ID Register 1 */
-#define RSI_PID2                       0xFFC03FE8 /* RSI Peripheral ID Register 2 */
-#define RSI_PID3                       0xFFC03FEC /* RSI Peripheral ID Register 3 */
-#define RSI_PID4                       0xFFC03FF0 /* RSI Peripheral ID Register 4 */
-#define RSI_PID5                       0xFFC03FF4 /* RSI Peripheral ID Register 5 */
-#define RSI_PID6                       0xFFC03FF8 /* RSI Peripheral ID Register 6 */
-#define RSI_PID7                       0xFFC03FFC /* RSI Peripheral ID Register 7 */
+#define RSI_PID0                       0xFFC038D0 /* RSI Peripheral ID Register 0 */
+#define RSI_PID1                       0xFFC038D4 /* RSI Peripheral ID Register 1 */
+#define RSI_PID2                       0xFFC038D8 /* RSI Peripheral ID Register 2 */
+#define RSI_PID3                       0xFFC038DC /* RSI Peripheral ID Register 3 */
+#define RSI_PID4                       0xFFC038E0 /* RSI Peripheral ID Register 0 */
+#define RSI_PID5                       0xFFC038E4 /* RSI Peripheral ID Register 1 */
+#define RSI_PID6                       0xFFC038E8 /* RSI Peripheral ID Register 2 */
+#define RSI_PID7                       0xFFC038EC /* RSI Peripheral ID Register 3 */
 
 #endif /* _DEF_BF514_H */

diff --git a/arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h
deleted file mode 100644
index 960e089..0000000
--- a/arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h
+++ /dev/null

@@ -1,79 +0,0 @@
-/*
- * Copyright 2007-2009 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later
- */
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-
-#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS)
-# define CONFIG_SERIAL_BFIN_CTSRTS
-
-# ifndef CONFIG_UART0_CTS_PIN
-#  define CONFIG_UART0_CTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART0_RTS_PIN
-#  define CONFIG_UART0_RTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART1_CTS_PIN
-#  define CONFIG_UART1_CTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART1_RTS_PIN
-#  define CONFIG_UART1_RTS_PIN -1
-# endif
-#endif
-
-struct bfin_serial_res {
-	unsigned long uart_base_addr;
-	int uart_irq;
-	int uart_status_irq;
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	unsigned int uart_tx_dma_channel;
-	unsigned int uart_rx_dma_channel;
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	int uart_cts_pin;
-	int uart_rts_pin;
-#endif
-};
-
-struct bfin_serial_res bfin_serial_resource[] = {
-#ifdef CONFIG_SERIAL_BFIN_UART0
-	{
-	 0xFFC00400,
-	 IRQ_UART0_RX,
-	 IRQ_UART0_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	 CH_UART0_TX,
-	 CH_UART0_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	 CONFIG_UART0_CTS_PIN,
-	 CONFIG_UART0_RTS_PIN,
-#endif
-	 },
-#endif
-#ifdef CONFIG_SERIAL_BFIN_UART1
-	{
-	 0xFFC02000,
-	 IRQ_UART1_RX,
-	 IRQ_UART1_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	 CH_UART1_TX,
-	 CH_UART1_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	 CONFIG_UART1_CTS_PIN,
-	 CONFIG_UART1_RTS_PIN,
-#endif
-	 },
-#endif
-};
-
-#define DRIVER_NAME "bfin-uart"
-
-#include <asm/bfin_serial.h>

diff --git a/arch/blackfin/mach-bf527/include/mach/defBF525.h b/arch/blackfin/mach-bf527/include/mach/defBF525.h
index cc383ad..aab80bb 100644
--- a/arch/blackfin/mach-bf527/include/mach/defBF525.h
+++ b/arch/blackfin/mach-bf527/include/mach/defBF525.h

@@ -185,8 +185,8 @@
 #define                USB_EP_NI7_TXTYPE  0xffc03bd4   /* Sets the transaction protocol and peripheral endpoint number for the Host Tx endpoint7 */
 #define            USB_EP_NI7_TXINTERVAL  0xffc03bd8   /* Sets the NAK response timeout on Endpoint7 */
 #define                USB_EP_NI7_RXTYPE  0xffc03bdc   /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint7 */
-#define            USB_EP_NI7_RXINTERVAL  0xffc03bf0   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */
-#define               USB_EP_NI7_TXCOUNT  0xffc03bf8   /* Number of bytes to be written to the endpoint7 Tx FIFO */
+#define            USB_EP_NI7_RXINTERVAL  0xffc03be0   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */
+#define               USB_EP_NI7_TXCOUNT  0xffc03be8   /* Number of bytes to be written to the endpoint7 Tx FIFO */
 
 #define                USB_DMA_INTERRUPT  0xffc03c00   /* Indicates pending interrupts for the DMA channels */
 

diff --git a/arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h
deleted file mode 100644
index 45dcaa4..0000000
--- a/arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h
+++ /dev/null

@@ -1,52 +0,0 @@
-/*
- * Copyright 2006-2009 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later
- */
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-
-#ifdef CONFIG_BFIN_UART0_CTSRTS
-# define CONFIG_SERIAL_BFIN_CTSRTS
-# ifndef CONFIG_UART0_CTS_PIN
-#  define CONFIG_UART0_CTS_PIN -1
-# endif
-# ifndef CONFIG_UART0_RTS_PIN
-#  define CONFIG_UART0_RTS_PIN -1
-# endif
-#endif
-
-struct bfin_serial_res {
-	unsigned long	uart_base_addr;
-	int		uart_irq;
-	int		uart_status_irq;
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	unsigned int	uart_tx_dma_channel;
-	unsigned int	uart_rx_dma_channel;
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	int		uart_cts_pin;
-	int		uart_rts_pin;
-#endif
-};
-
-struct bfin_serial_res bfin_serial_resource[] = {
-	{
-	0xFFC00400,
-	IRQ_UART0_RX,
-	IRQ_UART0_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART0_TX,
-	CH_UART0_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	CONFIG_UART0_CTS_PIN,
-	CONFIG_UART0_RTS_PIN,
-#endif
-	}
-};
-
-#define DRIVER_NAME "bfin-uart"
-
-#include <asm/bfin_serial.h>

diff --git a/arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h
deleted file mode 100644
index 3e955db..0000000
--- a/arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h
+++ /dev/null

@@ -1,79 +0,0 @@
-/*
- * Copyright 2006-2009 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later
- */
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-
-#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS)
-# define CONFIG_SERIAL_BFIN_CTSRTS
-
-# ifndef CONFIG_UART0_CTS_PIN
-#  define CONFIG_UART0_CTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART0_RTS_PIN
-#  define CONFIG_UART0_RTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART1_CTS_PIN
-#  define CONFIG_UART1_CTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART1_RTS_PIN
-#  define CONFIG_UART1_RTS_PIN -1
-# endif
-#endif
-
-struct bfin_serial_res {
-	unsigned long	uart_base_addr;
-	int		uart_irq;
-	int		uart_status_irq;
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	unsigned int	uart_tx_dma_channel;
-	unsigned int	uart_rx_dma_channel;
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	int	uart_cts_pin;
-	int	uart_rts_pin;
-#endif
-};
-
-struct bfin_serial_res bfin_serial_resource[] = {
-#ifdef CONFIG_SERIAL_BFIN_UART0
-	{
-	0xFFC00400,
-	IRQ_UART0_RX,
-	IRQ_UART0_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART0_TX,
-	CH_UART0_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	CONFIG_UART0_CTS_PIN,
-	CONFIG_UART0_RTS_PIN,
-#endif
-	},
-#endif
-#ifdef CONFIG_SERIAL_BFIN_UART1
-	{
-	0xFFC02000,
-	IRQ_UART1_RX,
-	IRQ_UART1_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART1_TX,
-	CH_UART1_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	CONFIG_UART1_CTS_PIN,
-	CONFIG_UART1_RTS_PIN,
-#endif
-	},
-#endif
-};
-
-#define DRIVER_NAME "bfin-uart"
-
-#include <asm/bfin_serial.h>

diff --git a/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h
deleted file mode 100644
index beb502e..0000000
--- a/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h
+++ /dev/null

@@ -1,93 +0,0 @@
-/*
- * Copyright 2008-2009 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-
-#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS)
-# define CONFIG_SERIAL_BFIN_CTSRTS
-
-# ifndef CONFIG_UART0_CTS_PIN
-#  define CONFIG_UART0_CTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART0_RTS_PIN
-#  define CONFIG_UART0_RTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART1_CTS_PIN
-#  define CONFIG_UART1_CTS_PIN -1
-# endif
-
-# ifndef CONFIG_UART1_RTS_PIN
-#  define CONFIG_UART1_RTS_PIN -1
-# endif
-#endif
-
-struct bfin_serial_res {
-	unsigned long	uart_base_addr;
-	int		uart_irq;
-	int		uart_status_irq;
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	unsigned int	uart_tx_dma_channel;
-	unsigned int	uart_rx_dma_channel;
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	int	uart_cts_pin;
-	int	uart_rts_pin;
-#endif
-};
-
-struct bfin_serial_res bfin_serial_resource[] = {
-#ifdef CONFIG_SERIAL_BFIN_UART0
-	{
-	0xFFC00400,
-	IRQ_UART0_RX,
-	IRQ_UART0_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART0_TX,
-	CH_UART0_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	CONFIG_UART0_CTS_PIN,
-	CONFIG_UART0_RTS_PIN,
-#endif
-	},
-#endif
-#ifdef CONFIG_SERIAL_BFIN_UART1
-	{
-	0xFFC02000,
-	IRQ_UART1_RX,
-	IRQ_UART1_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART1_TX,
-	CH_UART1_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	CONFIG_UART1_CTS_PIN,
-	CONFIG_UART1_RTS_PIN,
-#endif
-	},
-#endif
-#ifdef CONFIG_SERIAL_BFIN_UART2
-	{
-	0xFFC02100,
-	IRQ_UART2_RX,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART2_TX,
-	CH_UART2_RX,
-#endif
-#ifdef CONFIG_BFIN_UART2_CTSRTS
-	CONFIG_UART2_CTS_PIN,
-	CONFIG_UART2_RTS_PIN,
-#endif
-	},
-#endif
-};
-
-#define DRIVER_NAME "bfin-uart"
-
-#include <asm/bfin_serial.h>

diff --git a/arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h
deleted file mode 100644
index 0d94eda..0000000
--- a/arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h
+++ /dev/null

@@ -1,94 +0,0 @@
-/*
- * Copyright 2007-2009 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-
-#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) || \
-	defined(CONFIG_BFIN_UART2_CTSRTS) || defined(CONFIG_BFIN_UART3_CTSRTS)
-# define CONFIG_SERIAL_BFIN_HARD_CTSRTS
-#endif
-
-struct bfin_serial_res {
-	unsigned long	uart_base_addr;
-	int		uart_irq;
-	int		uart_status_irq;
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	unsigned int	uart_tx_dma_channel;
-	unsigned int	uart_rx_dma_channel;
-#endif
-#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
-	int		uart_cts_pin;
-	int		uart_rts_pin;
-#endif
-};
-
-struct bfin_serial_res bfin_serial_resource[] = {
-#ifdef CONFIG_SERIAL_BFIN_UART0
-	{
-	0xFFC00400,
-	IRQ_UART0_RX,
-	IRQ_UART0_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART0_TX,
-	CH_UART0_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
-	0,
-	0,
-#endif
-	},
-#endif
-#ifdef CONFIG_SERIAL_BFIN_UART1
-	{
-	0xFFC02000,
-	IRQ_UART1_RX,
-	IRQ_UART1_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART1_TX,
-	CH_UART1_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
-	GPIO_PE10,
-	GPIO_PE9,
-#endif
-	},
-#endif
-#ifdef CONFIG_SERIAL_BFIN_UART2
-	{
-	0xFFC02100,
-	IRQ_UART2_RX,
-	IRQ_UART2_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART2_TX,
-	CH_UART2_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
-	0,
-	0,
-#endif
-	},
-#endif
-#ifdef CONFIG_SERIAL_BFIN_UART3
-	{
-	0xFFC03100,
-	IRQ_UART3_RX,
-	IRQ_UART3_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART3_TX,
-	CH_UART3_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
-	GPIO_PB3,
-	GPIO_PB2,
-#endif
-	},
-#endif
-};
-
-#define DRIVER_NAME "bfin-uart"
-
-#include <asm/bfin_serial.h>

diff --git a/arch/blackfin/mach-bf548/include/mach/defBF547.h b/arch/blackfin/mach-bf548/include/mach/defBF547.h
index 1cbba11..1fa41ec 100644
--- a/arch/blackfin/mach-bf548/include/mach/defBF547.h
+++ b/arch/blackfin/mach-bf548/include/mach/defBF547.h

@@ -271,10 +271,10 @@
 #define            USB_EP_NI0_TXINTERVAL  0xffc03e18   /* Sets the NAK response timeout on Endpoint 0 */
 #define                USB_EP_NI0_RXTYPE  0xffc03e1c   /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint0 */
 #define            USB_EP_NI0_RXINTERVAL  0xffc03e20   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint0 */
+#define               USB_EP_NI0_TXCOUNT  0xffc03e28   /* Number of bytes to be written to the endpoint0 Tx FIFO */
 
 /* USB Endpoint 1 Control Registers */
 
-#define               USB_EP_NI0_TXCOUNT  0xffc03e28   /* Number of bytes to be written to the endpoint0 Tx FIFO */
 #define                USB_EP_NI1_TXMAXP  0xffc03e40   /* Maximum packet size for Host Tx endpoint1 */
 #define                 USB_EP_NI1_TXCSR  0xffc03e44   /* Control Status register for endpoint1 */
 #define                USB_EP_NI1_RXMAXP  0xffc03e48   /* Maximum packet size for Host Rx endpoint1 */
@@ -284,10 +284,10 @@
 #define            USB_EP_NI1_TXINTERVAL  0xffc03e58   /* Sets the NAK response timeout on Endpoint1 */
 #define                USB_EP_NI1_RXTYPE  0xffc03e5c   /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint1 */
 #define            USB_EP_NI1_RXINTERVAL  0xffc03e60   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint1 */
+#define               USB_EP_NI1_TXCOUNT  0xffc03e68   /* Number of bytes to be written to the+H102 endpoint1 Tx FIFO */
 
 /* USB Endpoint 2 Control Registers */
 
-#define               USB_EP_NI1_TXCOUNT  0xffc03e68   /* Number of bytes to be written to the+H102 endpoint1 Tx FIFO */
 #define                USB_EP_NI2_TXMAXP  0xffc03e80   /* Maximum packet size for Host Tx endpoint2 */
 #define                 USB_EP_NI2_TXCSR  0xffc03e84   /* Control Status register for endpoint2 */
 #define                USB_EP_NI2_RXMAXP  0xffc03e88   /* Maximum packet size for Host Rx endpoint2 */
@@ -297,10 +297,10 @@
 #define            USB_EP_NI2_TXINTERVAL  0xffc03e98   /* Sets the NAK response timeout on Endpoint2 */
 #define                USB_EP_NI2_RXTYPE  0xffc03e9c   /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint2 */
 #define            USB_EP_NI2_RXINTERVAL  0xffc03ea0   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint2 */
+#define               USB_EP_NI2_TXCOUNT  0xffc03ea8   /* Number of bytes to be written to the endpoint2 Tx FIFO */
 
 /* USB Endpoint 3 Control Registers */
 
-#define               USB_EP_NI2_TXCOUNT  0xffc03ea8   /* Number of bytes to be written to the endpoint2 Tx FIFO */
 #define                USB_EP_NI3_TXMAXP  0xffc03ec0   /* Maximum packet size for Host Tx endpoint3 */
 #define                 USB_EP_NI3_TXCSR  0xffc03ec4   /* Control Status register for endpoint3 */
 #define                USB_EP_NI3_RXMAXP  0xffc03ec8   /* Maximum packet size for Host Rx endpoint3 */
@@ -310,10 +310,10 @@
 #define            USB_EP_NI3_TXINTERVAL  0xffc03ed8   /* Sets the NAK response timeout on Endpoint3 */
 #define                USB_EP_NI3_RXTYPE  0xffc03edc   /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint3 */
 #define            USB_EP_NI3_RXINTERVAL  0xffc03ee0   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint3 */
+#define               USB_EP_NI3_TXCOUNT  0xffc03ee8   /* Number of bytes to be written to the H124endpoint3 Tx FIFO */
 
 /* USB Endpoint 4 Control Registers */
 
-#define               USB_EP_NI3_TXCOUNT  0xffc03ee8   /* Number of bytes to be written to the H124endpoint3 Tx FIFO */
 #define                USB_EP_NI4_TXMAXP  0xffc03f00   /* Maximum packet size for Host Tx endpoint4 */
 #define                 USB_EP_NI4_TXCSR  0xffc03f04   /* Control Status register for endpoint4 */
 #define                USB_EP_NI4_RXMAXP  0xffc03f08   /* Maximum packet size for Host Rx endpoint4 */
@@ -323,10 +323,10 @@
 #define            USB_EP_NI4_TXINTERVAL  0xffc03f18   /* Sets the NAK response timeout on Endpoint4 */
 #define                USB_EP_NI4_RXTYPE  0xffc03f1c   /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint4 */
 #define            USB_EP_NI4_RXINTERVAL  0xffc03f20   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint4 */
+#define               USB_EP_NI4_TXCOUNT  0xffc03f28   /* Number of bytes to be written to the endpoint4 Tx FIFO */
 
 /* USB Endpoint 5 Control Registers */
 
-#define               USB_EP_NI4_TXCOUNT  0xffc03f28   /* Number of bytes to be written to the endpoint4 Tx FIFO */
 #define                USB_EP_NI5_TXMAXP  0xffc03f40   /* Maximum packet size for Host Tx endpoint5 */
 #define                 USB_EP_NI5_TXCSR  0xffc03f44   /* Control Status register for endpoint5 */
 #define                USB_EP_NI5_RXMAXP  0xffc03f48   /* Maximum packet size for Host Rx endpoint5 */
@@ -336,10 +336,10 @@
 #define            USB_EP_NI5_TXINTERVAL  0xffc03f58   /* Sets the NAK response timeout on Endpoint5 */
 #define                USB_EP_NI5_RXTYPE  0xffc03f5c   /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint5 */
 #define            USB_EP_NI5_RXINTERVAL  0xffc03f60   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint5 */
+#define               USB_EP_NI5_TXCOUNT  0xffc03f68   /* Number of bytes to be written to the H145endpoint5 Tx FIFO */
 
 /* USB Endpoint 6 Control Registers */
 
-#define               USB_EP_NI5_TXCOUNT  0xffc03f68   /* Number of bytes to be written to the H145endpoint5 Tx FIFO */
 #define                USB_EP_NI6_TXMAXP  0xffc03f80   /* Maximum packet size for Host Tx endpoint6 */
 #define                 USB_EP_NI6_TXCSR  0xffc03f84   /* Control Status register for endpoint6 */
 #define                USB_EP_NI6_RXMAXP  0xffc03f88   /* Maximum packet size for Host Rx endpoint6 */
@@ -349,10 +349,10 @@
 #define            USB_EP_NI6_TXINTERVAL  0xffc03f98   /* Sets the NAK response timeout on Endpoint6 */
 #define                USB_EP_NI6_RXTYPE  0xffc03f9c   /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint6 */
 #define            USB_EP_NI6_RXINTERVAL  0xffc03fa0   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint6 */
+#define               USB_EP_NI6_TXCOUNT  0xffc03fa8   /* Number of bytes to be written to the endpoint6 Tx FIFO */
 
 /* USB Endpoint 7 Control Registers */
 
-#define               USB_EP_NI6_TXCOUNT  0xffc03fa8   /* Number of bytes to be written to the endpoint6 Tx FIFO */
 #define                USB_EP_NI7_TXMAXP  0xffc03fc0   /* Maximum packet size for Host Tx endpoint7 */
 #define                 USB_EP_NI7_TXCSR  0xffc03fc4   /* Control Status register for endpoint7 */
 #define                USB_EP_NI7_RXMAXP  0xffc03fc8   /* Maximum packet size for Host Rx endpoint7 */
@@ -361,8 +361,9 @@
 #define                USB_EP_NI7_TXTYPE  0xffc03fd4   /* Sets the transaction protocol and peripheral endpoint number for the Host Tx endpoint7 */
 #define            USB_EP_NI7_TXINTERVAL  0xffc03fd8   /* Sets the NAK response timeout on Endpoint7 */
 #define                USB_EP_NI7_RXTYPE  0xffc03fdc   /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint7 */
-#define            USB_EP_NI7_RXINTERVAL  0xffc03ff0   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */
-#define               USB_EP_NI7_TXCOUNT  0xffc03ff8   /* Number of bytes to be written to the endpoint7 Tx FIFO */
+#define            USB_EP_NI7_RXINTERVAL  0xffc03fe0   /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */
+#define               USB_EP_NI7_TXCOUNT  0xffc03fe8   /* Number of bytes to be written to the endpoint7 Tx FIFO */
+
 #define                USB_DMA_INTERRUPT  0xffc04000   /* Indicates pending interrupts for the DMA channels */
 
 /* USB Channel 0 Config Registers */

diff --git a/arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h
deleted file mode 100644
index 3a69474..0000000
--- a/arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h
+++ /dev/null

@@ -1,52 +0,0 @@
-/*
- * Copyright 2006-2009 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-
-#ifdef CONFIG_BFIN_UART0_CTSRTS
-# define CONFIG_SERIAL_BFIN_CTSRTS
-# ifndef CONFIG_UART0_CTS_PIN
-#  define CONFIG_UART0_CTS_PIN -1
-# endif
-# ifndef CONFIG_UART0_RTS_PIN
-#  define CONFIG_UART0_RTS_PIN -1
-# endif
-#endif
-
-struct bfin_serial_res {
-	unsigned long	uart_base_addr;
-	int		uart_irq;
-	int		uart_status_irq;
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	unsigned int	uart_tx_dma_channel;
-	unsigned int	uart_rx_dma_channel;
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	int		uart_cts_pin;
-	int		uart_rts_pin;
-#endif
-};
-
-struct bfin_serial_res bfin_serial_resource[] = {
-	{
-	0xFFC00400,
-	IRQ_UART_RX,
-	IRQ_UART_ERROR,
-#ifdef CONFIG_SERIAL_BFIN_DMA
-	CH_UART_TX,
-	CH_UART_RX,
-#endif
-#ifdef CONFIG_SERIAL_BFIN_CTSRTS
-	CONFIG_UART0_CTS_PIN,
-	CONFIG_UART0_RTS_PIN,
-#endif
-	}
-};
-
-#define DRIVER_NAME "bfin-uart"
-
-#include <asm/bfin_serial.h>

diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index f96933f..225d311 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S

@@ -1753,6 +1753,8 @@
 	.long _sys_open_by_handle_at
 	.long _sys_clock_adjtime
 	.long _sys_syncfs
+	.long _sys_setns
+	.long _sys_sendmmsg		/* 380 */
 
 	.rept NR_syscalls-(.-_sys_call_table)/4
 	.long _sys_ni_syscall

diff --git a/arch/blackfin/mm/maccess.c b/arch/blackfin/mm/maccess.c
index b71cebc..e253211 100644
--- a/arch/blackfin/mm/maccess.c
+++ b/arch/blackfin/mm/maccess.c

@@ -16,7 +16,7 @@
 	return bfin_mem_access_type(addr, size);
 }
 
-long probe_kernel_read(void *dst, void *src, size_t size)
+long probe_kernel_read(void *dst, const void *src, size_t size)
 {
 	unsigned long lsrc = (unsigned long)src;
 	int mem_type;
@@ -55,7 +55,7 @@
 	return -EFAULT;
 }
 
-long probe_kernel_write(void *dst, void *src, size_t size)
+long probe_kernel_write(void *dst, const void *src, size_t size)
 {
 	unsigned long ldst = (unsigned long)dst;
 	int mem_type;

diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 0d6420d..1161883 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S

@@ -937,6 +937,7 @@
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_setns			/* 335 */
 
         /*
          * NOTE!! This doesn't have to be exact - we just have

diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 3abf12c..84fed7e 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S

@@ -880,6 +880,7 @@
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_setns			/* 335 */
 
         /*
          * NOTE!! This doesn't have to be exact - we just have

diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index f6fad83..f921b8b 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h

@@ -339,10 +339,11 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_setns		335
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 335
+#define NR_syscalls 336
 
 #include <arch/unistd.h>
 

diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index b28da49..a569dff 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h

@@ -343,10 +343,11 @@
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
 #define __NR_perf_event_open	336
+#define __NR_setns		337
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 337
+#define NR_syscalls 338
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 /* #define __ARCH_WANT_OLD_READDIR */

diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 63d579b..017d6d7 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S

@@ -1526,5 +1526,6 @@
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
 	.long sys_perf_event_open
+	.long sys_setns
 
 syscall_table_size = (. - sys_call_table)

diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 50f2c5a..2c3f8e6 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h

@@ -325,10 +325,11 @@
 #define __NR_move_pages		317
 #define __NR_getcpu		318
 #define __NR_epoll_pwait	319
+#define __NR_setns		320
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 320
+#define NR_syscalls 321
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR

diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index faefaff..f4b2e67 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S

@@ -333,6 +333,7 @@
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_move_pages */
 	.long SYMBOL_NAME(sys_getcpu)
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_epoll_pwait */
+	.long SYMBOL_NAME(sys_setns)		/* 320 */
 
 	.macro	call_sp addr
 	mov.l	#SYMBOL_NAME(\addr),er6

diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 404d037..7c928da 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h

@@ -319,11 +319,13 @@
 #define __NR_open_by_handle_at  	1327
 #define __NR_clock_adjtime		1328
 #define __NR_syncfs			1329
+#define __NR_setns			1330
+#define __NR_sendmmsg			1331
 
 #ifdef __KERNEL__
 
 
-#define NR_syscalls			306 /* length of syscall table */
+#define NR_syscalls			308 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6de2e23..97dd2ab 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S

@@ -1775,6 +1775,8 @@
 	data8 sys_open_by_handle_at
 	data8 sys_clock_adjtime
 	data8 sys_syncfs
+	data8 sys_setns				// 1330
+	data8 sys_sendmmsg
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */

diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index c705456..3e1db56 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h

@@ -330,10 +330,11 @@
 /* #define __NR_timerfd		322 removed */
 #define __NR_eventfd		323
 #define __NR_fallocate		324
+#define __NR_setns		325
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 325
+#define NR_syscalls 326
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_STAT64

diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S
index 60536e2..528f2e6 100644
--- a/arch/m32r/kernel/syscall_table.S
+++ b/arch/m32r/kernel/syscall_table.S

@@ -324,3 +324,4 @@
 	.long sys_ni_syscall
 	.long sys_eventfd
 	.long sys_fallocate
+	.long sys_setns			/* 325 */

diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index f3b649d..43f984e 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h

@@ -349,10 +349,11 @@
 #define __NR_open_by_handle_at	341
 #define __NR_clock_adjtime	342
 #define __NR_syncfs		343
+#define __NR_setns		344
 
 #ifdef __KERNEL__
 
-#define NR_syscalls		344
+#define NR_syscalls		345
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR

diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 6f7b091..00d1452 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S

@@ -364,4 +364,5 @@
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_setns
 

diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 30edd61..7d7092b 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h

@@ -390,8 +390,9 @@
 #define __NR_open_by_handle_at	372
 #define __NR_clock_adjtime	373
 #define __NR_syncfs		374
+#define __NR_setns		375
 
-#define __NR_syscalls		375
+#define __NR_syscalls		376
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__

diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 85cea81..d915a12 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S

@@ -379,3 +379,4 @@
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_setns			/* 375 */

diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index fa2e37e..6fcfc48 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h

@@ -363,16 +363,17 @@
 #define __NR_open_by_handle_at		(__NR_Linux + 340)
 #define __NR_clock_adjtime		(__NR_Linux + 341)
 #define __NR_syncfs			(__NR_Linux + 342)
+#define __NR_setns			(__NR_Linux + 343)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		342
+#define __NR_Linux_syscalls		343
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		342
+#define __NR_O32_Linux_syscalls		343
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -682,16 +683,17 @@
 #define __NR_open_by_handle_at		(__NR_Linux + 299)
 #define __NR_clock_adjtime		(__NR_Linux + 300)
 #define __NR_syncfs			(__NR_Linux + 301)
+#define __NR_setns			(__NR_Linux + 302)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		301
+#define __NR_Linux_syscalls		302
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		301
+#define __NR_64_Linux_syscalls		302
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1006,16 +1008,17 @@
 #define __NR_open_by_handle_at		(__NR_Linux + 304)
 #define __NR_clock_adjtime		(__NR_Linux + 305)
 #define __NR_syncfs			(__NR_Linux + 306)
+#define __NR_setns			(__NR_Linux + 307)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		306
+#define __NR_Linux_syscalls		307
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		306
+#define __NR_N32_Linux_syscalls		307
 
 #ifdef __KERNEL__
 

diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7a8e1dd..99e656e 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S

@@ -589,6 +589,7 @@
 	sys	sys_open_by_handle_at	3	/* 4340 */
 	sys	sys_clock_adjtime	2
 	sys	sys_syncfs		1
+	sys	sys_setns		2
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to

diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 2d31c83..fb0575f 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S

@@ -428,4 +428,5 @@
 	PTR	sys_open_by_handle_at
 	PTR	sys_clock_adjtime		/* 5300 */
 	PTR	sys_syncfs
+	PTR	sys_setns
 	.size	sys_call_table,.-sys_call_table

diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 38a0503..4de0c55 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S

@@ -428,4 +428,5 @@
 	PTR	sys_open_by_handle_at
 	PTR	compat_sys_clock_adjtime	/* 6305 */
 	PTR	sys_syncfs
+	PTR	sys_setns
 	.size	sysn32_call_table,.-sysn32_call_table

diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 91ea5e4..4a387de 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S

@@ -546,4 +546,5 @@
 	PTR	compat_sys_open_by_handle_at	/* 4340 */
 	PTR	compat_sys_clock_adjtime
 	PTR	sys_syncfs
+	PTR	sys_setns
 	.size	sys_call_table,.-sys_call_table

diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index 9d056f5..9051f92 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h

@@ -349,10 +349,11 @@
 #define __NR_rt_tgsigqueueinfo	336
 #define __NR_perf_event_open	337
 #define __NR_recvmmsg		338
+#define __NR_setns		339
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 339
+#define NR_syscalls 340
 
 /*
  * specify the deprecated syscalls we want to support on this arch

diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index fb93ad7..ae435e1 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S

@@ -759,6 +759,7 @@
 	.long sys_rt_tgsigqueueinfo
 	.long sys_perf_event_open
 	.long sys_recvmmsg
+	.long sys_setns
 
 
 nr_syscalls=(.-sys_call_table)/4

diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 9cbc2c3..3392de3 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h

@@ -820,8 +820,9 @@
 #define __NR_name_to_handle_at	(__NR_Linux + 325)
 #define __NR_open_by_handle_at	(__NR_Linux + 326)
 #define __NR_syncfs		(__NR_Linux + 327)
+#define __NR_setns		(__NR_Linux + 328)
 
-#define __NR_Linux_syscalls	(__NR_syncfs + 1)
+#define __NR_Linux_syscalls	(__NR_setns + 1)
 
 
 #define __IGNORE_select		/* newselect */

diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index a5b02ce..34a4f5a 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S

@@ -426,6 +426,7 @@
 	ENTRY_SAME(name_to_handle_at)	/* 325 */
 	ENTRY_COMP(open_by_handle_at)
 	ENTRY_SAME(syncfs)
+	ENTRY_SAME(setns)
 
 	/* Nothing yet */
 

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 8489d37..f6736b7 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h

@@ -353,3 +353,4 @@
 COMPAT_SYS_SPU(clock_adjtime)
 SYSCALL_SPU(syncfs)
 COMPAT_SYS_SPU(sendmmsg)
+SYSCALL_SPU(setns)

diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 6d23c81..b8b3f59 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h

@@ -372,10 +372,11 @@
 #define __NR_clock_adjtime	347
 #define __NR_syncfs		348
 #define __NR_sendmmsg		349
+#define __NR_setns		350
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		350
+#define __NR_syscalls		351
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls

diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 9089b04..7667db4 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c

@@ -715,7 +715,8 @@
 
 static int __init init_pmacpic_syscore(void)
 {
-	register_syscore_ops(&pmacpic_syscore_ops);
+	if (pmac_irq_hw[0])
+		register_syscore_ops(&pmacpic_syscore_ops);
 	return 0;
 }
 

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index c4773a2..e4efacf 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h

@@ -577,16 +577,16 @@
 static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
-	unsigned long pfn, bits;
+	unsigned long address, bits;
 	unsigned char skey;
 
-	pfn = pte_val(*ptep) >> PAGE_SHIFT;
-	skey = page_get_storage_key(pfn);
+	address = pte_val(*ptep) & PAGE_MASK;
+	skey = page_get_storage_key(address);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 	/* Clear page changed & referenced bit in the storage key */
 	if (bits) {
 		skey ^= bits;
-		page_set_storage_key(pfn, skey, 1);
+		page_set_storage_key(address, skey, 1);
 	}
 	/* Transfer page changed & referenced bit to guest bits in pgste */
 	pgste_val(pgste) |= bits << 48;		/* RCP_GR_BIT & RCP_GC_BIT */
@@ -628,16 +628,16 @@
 static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
-	unsigned long pfn;
+	unsigned long address;
 	unsigned long okey, nkey;
 
-	pfn = pte_val(*ptep) >> PAGE_SHIFT;
-	okey = nkey = page_get_storage_key(pfn);
+	address = pte_val(*ptep) & PAGE_MASK;
+	okey = nkey = page_get_storage_key(address);
 	nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
 	/* Set page access key and fetch protection bit from pgste */
 	nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
 	if (okey != nkey)
-		page_set_storage_key(pfn, nkey, 1);
+		page_set_storage_key(address, nkey, 1);
 #endif
 }
 

diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 9208e69..404bdb9 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h

@@ -276,7 +276,8 @@
 #define __NR_open_by_handle_at	336
 #define __NR_clock_adjtime	337
 #define __NR_syncfs		338
-#define NR_syscalls 339
+#define __NR_setns		339
+#define NR_syscalls 340
 
 /* 
  * There are some system calls that are not present on 64 bit, some

diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 1dc96ea..1f5eb78 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S

@@ -1904,3 +1904,9 @@
 sys_syncfs_wrapper:
 	lgfr	%r2,%r2			# int
 	jg	sys_syncfs
+
+	.globl	sys_setns_wrapper
+sys_setns_wrapper:
+	lgfr	%r2,%r2			# int
+	lgfr	%r3,%r3			# int
+	jg	sys_setns

diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9c65fd4..6ee39ef 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S

@@ -347,3 +347,4 @@
 SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper)
 SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
 SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
+SYSCALL(sys_setns,sys_setns,sys_setns_wrapper)

diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 71a4b0d..51e5cd9 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c

@@ -19,7 +19,7 @@
  * using the stura instruction.
  * Returns the number of bytes copied or -EFAULT.
  */
-static long probe_kernel_write_odd(void *dst, void *src, size_t size)
+static long probe_kernel_write_odd(void *dst, const void *src, size_t size)
 {
 	unsigned long count, aligned;
 	int offset, mask;
@@ -45,7 +45,7 @@
 	return rc ? rc : count;
 }
 
-long probe_kernel_write(void *dst, void *src, size_t size)
+long probe_kernel_write(void *dst, const void *src, size_t size)
 {
 	long copied = 0;
 

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 14c6fae..b09763f 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c

@@ -71,12 +71,15 @@
 
 void rcu_table_freelist_finish(void)
 {
-	struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist);
+	struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist);
+	struct rcu_table_freelist *batch = *batchp;
 
 	if (!batch)
-		return;
+		goto out;
 	call_rcu(&batch->rcu, rcu_table_freelist_callback);
-	__get_cpu_var(rcu_table_freelist) = NULL;
+	*batchp = NULL;
+out:
+	put_cpu_var(rcu_table_freelist);
 }
 
 static void smp_sync(void *arg)
@@ -141,20 +144,23 @@
 {
 	struct rcu_table_freelist *batch;
 
+	preempt_disable();
 	if (atomic_read(&mm->mm_users) < 2 &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 		crst_table_free(mm, table);
-		return;
+		goto out;
 	}
 	batch = rcu_table_freelist_get(mm);
 	if (!batch) {
 		smp_call_function(smp_sync, NULL, 1);
 		crst_table_free(mm, table);
-		return;
+		goto out;
 	}
 	batch->table[--batch->crst_index] = table;
 	if (batch->pgt_index >= batch->crst_index)
 		rcu_table_freelist_finish();
+out:
+	preempt_enable();
 }
 
 #ifdef CONFIG_64BIT
@@ -323,16 +329,17 @@
 	struct page *page;
 	unsigned long bits;
 
+	preempt_disable();
 	if (atomic_read(&mm->mm_users) < 2 &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 		page_table_free(mm, table);
-		return;
+		goto out;
 	}
 	batch = rcu_table_freelist_get(mm);
 	if (!batch) {
 		smp_call_function(smp_sync, NULL, 1);
 		page_table_free(mm, table);
-		return;
+		goto out;
 	}
 	bits = (mm->context.has_pgste) ? 3UL : 1UL;
 	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
@@ -345,6 +352,8 @@
 	batch->table[batch->pgt_index++] = table;
 	if (batch->pgt_index >= batch->crst_index)
 		rcu_table_freelist_finish();
+out:
+	preempt_enable();
 }
 
 /*

diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index bb7d270..3432008 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h

@@ -374,8 +374,9 @@
 #define __NR_clock_adjtime	361
 #define __NR_syncfs		362
 #define __NR_sendmmsg		363
+#define __NR_setns		364
 
-#define NR_syscalls 364
+#define NR_syscalls 365
 
 #ifdef __KERNEL__
 

diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 46327ce..ec98986 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h

@@ -395,10 +395,11 @@
 #define __NR_clock_adjtime	372
 #define __NR_syncfs		373
 #define __NR_sendmmsg		374
+#define __NR_setns		375
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 375
+#define NR_syscalls 376
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR

diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 7c486f3..39b051d 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S

@@ -381,3 +381,4 @@
 	.long sys_clock_adjtime
 	.long sys_syncfs
 	.long sys_sendmmsg
+	.long sys_setns

diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index ba1a737..089c4d8 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S

@@ -401,3 +401,4 @@
 	.long sys_clock_adjtime
 	.long sys_syncfs
 	.long sys_sendmmsg
+	.long sys_setns			/* 375 */

diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index c5387ed..6260d5d 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h

@@ -405,8 +405,9 @@
 #define __NR_clock_adjtime	334
 #define __NR_syncfs		335
 #define __NR_sendmmsg		336
+#define __NR_setns		337
 
-#define NR_syscalls		337
+#define NR_syscalls		338
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,

diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 332c83f..6e492d5 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S

@@ -84,4 +84,4 @@
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 /*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/	.long sys_syncfs, sys_sendmmsg
+/*335*/	.long sys_syncfs, sys_sendmmsg, sys_setns

diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 43887ca..f566518 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S

@@ -85,7 +85,7 @@
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
 	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
-	.word sys_syncfs, compat_sys_sendmmsg
+	.word sys_syncfs, compat_sys_sendmmsg, sys_setns
 
 #endif /* CONFIG_COMPAT */
 
@@ -162,4 +162,4 @@
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-	.word sys_syncfs, sys_sendmmsg
+	.word sys_syncfs, sys_sendmmsg, sys_setns

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index e1e5010..0249b8b 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig

@@ -11,6 +11,7 @@
 	select GENERIC_IRQ_PROBE
 	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_IRQ_SHOW
+	select SYS_HYPERVISOR
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT

diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h
index 0bed3ec7..2ac4228 100644
--- a/arch/tile/include/asm/hardwall.h
+++ b/arch/tile/include/asm/hardwall.h

@@ -40,6 +40,10 @@
 #define HARDWALL_DEACTIVATE \
  _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE)
 
+#define _HARDWALL_GET_ID 4
+#define HARDWALL_GET_ID \
+ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID)
+
 #ifndef __KERNEL__
 
 /* This is the canonical name expected by userspace. */
@@ -47,9 +51,14 @@
 
 #else
 
-/* Hook for /proc/tile/hardwall. */
-struct seq_file;
-int proc_tile_hardwall_show(struct seq_file *sf, void *v);
+/* /proc hooks for hardwall. */
+struct proc_dir_entry;
+#ifdef CONFIG_HARDWALL
+void proc_tile_hardwall_init(struct proc_dir_entry *root);
+int proc_pid_hardwall(struct task_struct *task, char *buffer);
+#else
+static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {}
+#endif
 
 #endif
 

diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index b4c8e8e..b4dbc05 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile

@@ -5,7 +5,7 @@
 extra-y := vmlinux.lds head_$(BITS).o
 obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \
 	pci-dma.o proc.o process.o ptrace.o reboot.o \
-	setup.o signal.o single_step.o stack.o sys.o time.o traps.o \
+	setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \
 	intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
 
 obj-$(CONFIG_HARDWALL)		+= hardwall.o

diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c
index 3bddef7..8c41891 100644
--- a/arch/tile/kernel/hardwall.c
+++ b/arch/tile/kernel/hardwall.c

@@ -40,16 +40,25 @@
 struct hardwall_info {
 	struct list_head list;             /* "rectangles" list */
 	struct list_head task_head;        /* head of tasks in this hardwall */
+	struct cpumask cpumask;            /* cpus in the rectangle */
 	int ulhc_x;                        /* upper left hand corner x coord */
 	int ulhc_y;                        /* upper left hand corner y coord */
 	int width;                         /* rectangle width */
 	int height;                        /* rectangle height */
+	int id;                            /* integer id for this hardwall */
 	int teardown_in_progress;          /* are we tearing this one down? */
 };
 
 /* Currently allocated hardwall rectangles */
 static LIST_HEAD(rectangles);
 
+/* /proc/tile/hardwall */
+static struct proc_dir_entry *hardwall_proc_dir;
+
+/* Functions to manage files in /proc/tile/hardwall. */
+static void hardwall_add_proc(struct hardwall_info *rect);
+static void hardwall_remove_proc(struct hardwall_info *rect);
+
 /*
  * Guard changes to the hardwall data structures.
  * This could be finer grained (e.g. one lock for the list of hardwall
@@ -105,6 +114,8 @@
 	r->ulhc_y = cpu_y(ulhc);
 	r->width = cpu_x(lrhc) - r->ulhc_x + 1;
 	r->height = cpu_y(lrhc) - r->ulhc_y + 1;
+	cpumask_copy(&r->cpumask, mask);
+	r->id = ulhc;   /* The ulhc cpu id can be the hardwall id. */
 
 	/* Width and height must be positive */
 	if (r->width <= 0 || r->height <= 0)
@@ -388,6 +399,9 @@
 	/* Set up appropriate hardwalling on all affected cpus. */
 	hardwall_setup(rect);
 
+	/* Create a /proc/tile/hardwall entry. */
+	hardwall_add_proc(rect);
+
 	return rect;
 }
 
@@ -645,6 +659,9 @@
 	/* Restart switch and disable firewall. */
 	on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1);
 
+	/* Remove the /proc/tile/hardwall entry. */
+	hardwall_remove_proc(rect);
+
 	/* Now free the rectangle from the list. */
 	spin_lock_irqsave(&hardwall_lock, flags);
 	BUG_ON(!list_empty(&rect->task_head));
@@ -654,35 +671,57 @@
 }
 
 
-/*
- * Dump hardwall state via /proc; initialized in arch/tile/sys/proc.c.
- */
-int proc_tile_hardwall_show(struct seq_file *sf, void *v)
+static int hardwall_proc_show(struct seq_file *sf, void *v)
 {
-	struct hardwall_info *r;
+	struct hardwall_info *rect = sf->private;
+	char buf[256];
 
-	if (udn_disabled) {
-		seq_printf(sf, "%dx%d 0,0 pids:\n", smp_width, smp_height);
-		return 0;
-	}
-
-	spin_lock_irq(&hardwall_lock);
-	list_for_each_entry(r, &rectangles, list) {
-		struct task_struct *p;
-		seq_printf(sf, "%dx%d %d,%d pids:",
-			   r->width, r->height, r->ulhc_x, r->ulhc_y);
-		list_for_each_entry(p, &r->task_head, thread.hardwall_list) {
-			unsigned int cpu = cpumask_first(&p->cpus_allowed);
-			unsigned int x = cpu % smp_width;
-			unsigned int y = cpu / smp_width;
-			seq_printf(sf, " %d@%d,%d", p->pid, x, y);
-		}
-		seq_printf(sf, "\n");
-	}
-	spin_unlock_irq(&hardwall_lock);
+	int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask);
+	buf[rc++] = '\n';
+	seq_write(sf, buf, rc);
 	return 0;
 }
 
+static int hardwall_proc_open(struct inode *inode,
+			      struct file *file)
+{
+	return single_open(file, hardwall_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations hardwall_proc_fops = {
+	.open		= hardwall_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void hardwall_add_proc(struct hardwall_info *rect)
+{
+	char buf[64];
+	snprintf(buf, sizeof(buf), "%d", rect->id);
+	proc_create_data(buf, 0444, hardwall_proc_dir,
+			 &hardwall_proc_fops, rect);
+}
+
+static void hardwall_remove_proc(struct hardwall_info *rect)
+{
+	char buf[64];
+	snprintf(buf, sizeof(buf), "%d", rect->id);
+	remove_proc_entry(buf, hardwall_proc_dir);
+}
+
+int proc_pid_hardwall(struct task_struct *task, char *buffer)
+{
+	struct hardwall_info *rect = task->thread.hardwall;
+	return rect ? sprintf(buffer, "%d\n", rect->id) : 0;
+}
+
+void proc_tile_hardwall_init(struct proc_dir_entry *root)
+{
+	if (!udn_disabled)
+		hardwall_proc_dir = proc_mkdir("hardwall", root);
+}
+
 
 /*
  * Character device support via ioctl/close.
@@ -716,6 +755,9 @@
 			return -EINVAL;
 		return hardwall_deactivate(current);
 
+	case _HARDWALL_GET_ID:
+		return rect ? rect->id : -EINVAL;
+
 	default:
 		return -EINVAL;
 	}

diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index 2e02c41..62d8208 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c

@@ -27,6 +27,7 @@
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/homecache.h>
+#include <asm/hardwall.h>
 #include <arch/chip.h>
 
 
@@ -88,3 +89,75 @@
 	.stop	= c_stop,
 	.show	= show_cpuinfo,
 };
+
+/*
+ * Support /proc/tile directory
+ */
+
+static int __init proc_tile_init(void)
+{
+	struct proc_dir_entry *root = proc_mkdir("tile", NULL);
+	if (root == NULL)
+		return 0;
+
+	proc_tile_hardwall_init(root);
+
+	return 0;
+}
+
+arch_initcall(proc_tile_init);
+
+/*
+ * Support /proc/sys/tile directory
+ */
+
+#ifndef __tilegx__  /* FIXME: GX: no support for unaligned access yet */
+static ctl_table unaligned_subtable[] = {
+	{
+		.procname	= "enabled",
+		.data		= &unaligned_fixup,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.procname	= "printk",
+		.data		= &unaligned_printk,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.procname	= "count",
+		.data		= &unaligned_fixup_count,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{}
+};
+
+static ctl_table unaligned_table[] = {
+	{
+		.procname	= "unaligned_fixup",
+		.mode		= 0555,
+		.child		= unaligned_subtable
+	},
+	{}
+};
+#endif
+
+static struct ctl_path tile_path[] = {
+	{ .procname = "tile" },
+	{ }
+};
+
+static int __init proc_sys_tile_init(void)
+{
+#ifndef __tilegx__  /* FIXME: GX: no support for unaligned access yet */
+	register_sysctl_paths(tile_path, unaligned_table);
+#endif
+	return 0;
+}
+
+arch_initcall(proc_sys_tile_init);

diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c
new file mode 100644
index 0000000..b671a86
--- /dev/null
+++ b/arch/tile/kernel/sysfs.c

@@ -0,0 +1,185 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * /sys entry support.
+ */
+
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <hv/hypervisor.h>
+
+/* Return a string queried from the hypervisor, truncated to page size. */
+static ssize_t get_hv_confstr(char *page, int query)
+{
+	ssize_t n = hv_confstr(query, (unsigned long)page, PAGE_SIZE - 1);
+	n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1) - 1;
+	if (n)
+		page[n++] = '\n';
+	page[n] = '\0';
+	return n;
+}
+
+static ssize_t chip_width_show(struct sysdev_class *dev,
+			       struct sysdev_class_attribute *attr,
+			       char *page)
+{
+	return sprintf(page, "%u\n", smp_width);
+}
+static SYSDEV_CLASS_ATTR(chip_width, 0444, chip_width_show, NULL);
+
+static ssize_t chip_height_show(struct sysdev_class *dev,
+				struct sysdev_class_attribute *attr,
+				char *page)
+{
+	return sprintf(page, "%u\n", smp_height);
+}
+static SYSDEV_CLASS_ATTR(chip_height, 0444, chip_height_show, NULL);
+
+static ssize_t chip_serial_show(struct sysdev_class *dev,
+				struct sysdev_class_attribute *attr,
+				char *page)
+{
+	return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM);
+}
+static SYSDEV_CLASS_ATTR(chip_serial, 0444, chip_serial_show, NULL);
+
+static ssize_t chip_revision_show(struct sysdev_class *dev,
+				  struct sysdev_class_attribute *attr,
+				  char *page)
+{
+	return get_hv_confstr(page, HV_CONFSTR_CHIP_REV);
+}
+static SYSDEV_CLASS_ATTR(chip_revision, 0444, chip_revision_show, NULL);
+
+
+static ssize_t type_show(struct sysdev_class *dev,
+			    struct sysdev_class_attribute *attr,
+			    char *page)
+{
+	return sprintf(page, "tilera\n");
+}
+static SYSDEV_CLASS_ATTR(type, 0444, type_show, NULL);
+
+#define HV_CONF_ATTR(name, conf)					\
+	static ssize_t name ## _show(struct sysdev_class *dev,		\
+				     struct sysdev_class_attribute *attr, \
+				     char *page)			\
+	{								\
+		return get_hv_confstr(page, conf);			\
+	}								\
+	static SYSDEV_CLASS_ATTR(name, 0444, name ## _show, NULL);
+
+HV_CONF_ATTR(version,		HV_CONFSTR_HV_SW_VER)
+HV_CONF_ATTR(config_version,	HV_CONFSTR_HV_CONFIG_VER)
+
+HV_CONF_ATTR(board_part,	HV_CONFSTR_BOARD_PART_NUM)
+HV_CONF_ATTR(board_serial,	HV_CONFSTR_BOARD_SERIAL_NUM)
+HV_CONF_ATTR(board_revision,	HV_CONFSTR_BOARD_REV)
+HV_CONF_ATTR(board_description,	HV_CONFSTR_BOARD_DESC)
+HV_CONF_ATTR(mezz_part,		HV_CONFSTR_MEZZ_PART_NUM)
+HV_CONF_ATTR(mezz_serial,	HV_CONFSTR_MEZZ_SERIAL_NUM)
+HV_CONF_ATTR(mezz_revision,	HV_CONFSTR_MEZZ_REV)
+HV_CONF_ATTR(mezz_description,	HV_CONFSTR_MEZZ_DESC)
+HV_CONF_ATTR(switch_control,	HV_CONFSTR_SWITCH_CONTROL)
+
+static struct attribute *board_attrs[] = {
+	&attr_board_part.attr,
+	&attr_board_serial.attr,
+	&attr_board_revision.attr,
+	&attr_board_description.attr,
+	&attr_mezz_part.attr,
+	&attr_mezz_serial.attr,
+	&attr_mezz_revision.attr,
+	&attr_mezz_description.attr,
+	&attr_switch_control.attr,
+	NULL
+};
+
+static struct attribute_group board_attr_group = {
+	.name   = "board",
+	.attrs  = board_attrs,
+};
+
+
+static struct bin_attribute hvconfig_bin;
+
+static ssize_t
+hvconfig_bin_read(struct file *filp, struct kobject *kobj,
+		  struct bin_attribute *bin_attr,
+		  char *buf, loff_t off, size_t count)
+{
+	static size_t size;
+
+	/* Lazily learn the true size (minus the trailing NUL). */
+	if (size == 0)
+		size = hv_confstr(HV_CONFSTR_HV_CONFIG, 0, 0) - 1;
+
+	/* Check and adjust input parameters. */
+	if (off > size)
+		return -EINVAL;
+	if (count > size - off)
+		count = size - off;
+
+	if (count) {
+		/* Get a copy of the hvc and copy out the relevant portion. */
+		char *hvc;
+
+		size = off + count;
+		hvc = kmalloc(size, GFP_KERNEL);
+		if (hvc == NULL)
+			return -ENOMEM;
+		hv_confstr(HV_CONFSTR_HV_CONFIG, (unsigned long)hvc, size);
+		memcpy(buf, hvc + off, count);
+		kfree(hvc);
+	}
+
+	return count;
+}
+
+static int __init create_sysfs_entries(void)
+{
+	struct sysdev_class *cls = &cpu_sysdev_class;
+	int err = 0;
+
+#define create_cpu_attr(name)						\
+	if (!err)							\
+		err = sysfs_create_file(&cls->kset.kobj, &attr_##name.attr);
+	create_cpu_attr(chip_width);
+	create_cpu_attr(chip_height);
+	create_cpu_attr(chip_serial);
+	create_cpu_attr(chip_revision);
+
+#define create_hv_attr(name)						\
+	if (!err)							\
+		err = sysfs_create_file(hypervisor_kobj, &attr_##name.attr);
+	create_hv_attr(type);
+	create_hv_attr(version);
+	create_hv_attr(config_version);
+
+	if (!err)
+		err = sysfs_create_group(hypervisor_kobj, &board_attr_group);
+
+	if (!err) {
+		sysfs_bin_attr_init(&hvconfig_bin);
+		hvconfig_bin.attr.name = "hvconfig";
+		hvconfig_bin.attr.mode = S_IRUGO;
+		hvconfig_bin.read = hvconfig_bin_read;
+		hvconfig_bin.size = PAGE_SIZE;
+		err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin);
+	}
+
+	return err;
+}
+subsys_initcall(create_sysfs_entries);

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 95f5826..c1870dd 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S

@@ -849,4 +849,5 @@
 	.quad compat_sys_clock_adjtime
 	.quad sys_syncfs
 	.quad compat_sys_sendmmsg	/* 345 */
+	.quad sys_setns
 ia32_syscall_end:

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 416d865..610001d 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h

@@ -139,7 +139,7 @@
 	    boot_cpu_data.x86_model <= 0x05 &&
 	    boot_cpu_data.x86_mask < 0x0A)
 		return 1;
-	else if (c1e_detected)
+	else if (amd_e400_c1e_detected)
 		return 1;
 	else
 		return max_cstate;

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 5dc6acc..71cc380 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h

@@ -125,7 +125,7 @@
 #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
 #define X86_FEATURE_AVX		(4*32+28) /* Advanced Vector Extensions */
 #define X86_FEATURE_F16C	(4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRND	(4*32+30) /* The RDRAND instruction */
+#define X86_FEATURE_RDRAND	(4*32+30) /* The RDRAND instruction */
 #define X86_FEATURE_HYPERVISOR	(4*32+31) /* Running on a hypervisor */
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 617bd56..7b439d9 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h

@@ -4,30 +4,33 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+
 #include <linux/smp.h>
 
-static inline void fill_ldt(struct desc_struct *desc,
-			    const struct user_desc *info)
+static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
 {
-	desc->limit0 = info->limit & 0x0ffff;
-	desc->base0 = info->base_addr & 0x0000ffff;
+	desc->limit0		= info->limit & 0x0ffff;
 
-	desc->base1 = (info->base_addr & 0x00ff0000) >> 16;
-	desc->type = (info->read_exec_only ^ 1) << 1;
-	desc->type |= info->contents << 2;
-	desc->s = 1;
-	desc->dpl = 0x3;
-	desc->p = info->seg_not_present ^ 1;
-	desc->limit = (info->limit & 0xf0000) >> 16;
-	desc->avl = info->useable;
-	desc->d = info->seg_32bit;
-	desc->g = info->limit_in_pages;
-	desc->base2 = (info->base_addr & 0xff000000) >> 24;
+	desc->base0		= (info->base_addr & 0x0000ffff);
+	desc->base1		= (info->base_addr & 0x00ff0000) >> 16;
+
+	desc->type		= (info->read_exec_only ^ 1) << 1;
+	desc->type	       |= info->contents << 2;
+
+	desc->s			= 1;
+	desc->dpl		= 0x3;
+	desc->p			= info->seg_not_present ^ 1;
+	desc->limit		= (info->limit & 0xf0000) >> 16;
+	desc->avl		= info->useable;
+	desc->d			= info->seg_32bit;
+	desc->g			= info->limit_in_pages;
+
+	desc->base2		= (info->base_addr & 0xff000000) >> 24;
 	/*
 	 * Don't allow setting of the lm bit. It is useless anyway
 	 * because 64bit system calls require __USER_CS:
 	 */
-	desc->l = 0;
+	desc->l			= 0;
 }
 
 extern struct desc_ptr idt_descr;
@@ -36,6 +39,7 @@
 struct gdt_page {
 	struct desc_struct gdt[GDT_ENTRIES];
 } __attribute__((aligned(PAGE_SIZE)));
+
 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
 
 static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
@@ -48,16 +52,16 @@
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
 			     unsigned dpl, unsigned ist, unsigned seg)
 {
-	gate->offset_low = PTR_LOW(func);
-	gate->segment = __KERNEL_CS;
-	gate->ist = ist;
-	gate->p = 1;
-	gate->dpl = dpl;
-	gate->zero0 = 0;
-	gate->zero1 = 0;
-	gate->type = type;
-	gate->offset_middle = PTR_MIDDLE(func);
-	gate->offset_high = PTR_HIGH(func);
+	gate->offset_low	= PTR_LOW(func);
+	gate->segment		= __KERNEL_CS;
+	gate->ist		= ist;
+	gate->p			= 1;
+	gate->dpl		= dpl;
+	gate->zero0		= 0;
+	gate->zero1		= 0;
+	gate->type		= type;
+	gate->offset_middle	= PTR_MIDDLE(func);
+	gate->offset_high	= PTR_HIGH(func);
 }
 
 #else
@@ -66,8 +70,7 @@
 			     unsigned short seg)
 {
 	gate->a = (seg << 16) | (base & 0xffff);
-	gate->b = (base & 0xffff0000) |
-		  (((0x80 | type | (dpl << 5)) & 0xff) << 8);
+	gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
 }
 
 #endif
@@ -75,31 +78,29 @@
 static inline int desc_empty(const void *ptr)
 {
 	const u32 *desc = ptr;
+
 	return !(desc[0] | desc[1]);
 }
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
-#define load_TR_desc() native_load_tr_desc()
-#define load_gdt(dtr) native_load_gdt(dtr)
-#define load_idt(dtr) native_load_idt(dtr)
-#define load_tr(tr) asm volatile("ltr %0"::"m" (tr))
-#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
+#define load_TR_desc()				native_load_tr_desc()
+#define load_gdt(dtr)				native_load_gdt(dtr)
+#define load_idt(dtr)				native_load_idt(dtr)
+#define load_tr(tr)				asm volatile("ltr %0"::"m" (tr))
+#define load_ldt(ldt)				asm volatile("lldt %0"::"m" (ldt))
 
-#define store_gdt(dtr) native_store_gdt(dtr)
-#define store_idt(dtr) native_store_idt(dtr)
-#define store_tr(tr) (tr = native_store_tr())
+#define store_gdt(dtr)				native_store_gdt(dtr)
+#define store_idt(dtr)				native_store_idt(dtr)
+#define store_tr(tr)				(tr = native_store_tr())
 
-#define load_TLS(t, cpu) native_load_tls(t, cpu)
-#define set_ldt native_set_ldt
+#define load_TLS(t, cpu)			native_load_tls(t, cpu)
+#define set_ldt					native_set_ldt
 
-#define write_ldt_entry(dt, entry, desc)	\
-	native_write_ldt_entry(dt, entry, desc)
-#define write_gdt_entry(dt, entry, desc, type)		\
-	native_write_gdt_entry(dt, entry, desc, type)
-#define write_idt_entry(dt, entry, g)		\
-	native_write_idt_entry(dt, entry, g)
+#define write_ldt_entry(dt, entry, desc)	native_write_ldt_entry(dt, entry, desc)
+#define write_gdt_entry(dt, entry, desc, type)	native_write_gdt_entry(dt, entry, desc, type)
+#define write_idt_entry(dt, entry, g)		native_write_idt_entry(dt, entry, g)
 
 static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
 {
@@ -112,33 +113,27 @@
 
 #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
 
-static inline void native_write_idt_entry(gate_desc *idt, int entry,
-					  const gate_desc *gate)
+static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
 {
 	memcpy(&idt[entry], gate, sizeof(*gate));
 }
 
-static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry,
-					  const void *desc)
+static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc)
 {
 	memcpy(&ldt[entry], desc, 8);
 }
 
-static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry,
-					  const void *desc, int type)
+static inline void
+native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type)
 {
 	unsigned int size;
+
 	switch (type) {
-	case DESC_TSS:
-		size = sizeof(tss_desc);
-		break;
-	case DESC_LDT:
-		size = sizeof(ldt_desc);
-		break;
-	default:
-		size = sizeof(struct desc_struct);
-		break;
+	case DESC_TSS:	size = sizeof(tss_desc);	break;
+	case DESC_LDT:	size = sizeof(ldt_desc);	break;
+	default:	size = sizeof(*gdt);		break;
 	}
+
 	memcpy(&gdt[entry], desc, size);
 }
 
@@ -154,20 +149,21 @@
 }
 
 
-static inline void set_tssldt_descriptor(void *d, unsigned long addr,
-					 unsigned type, unsigned size)
+static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
 {
 #ifdef CONFIG_X86_64
 	struct ldttss_desc64 *desc = d;
+
 	memset(desc, 0, sizeof(*desc));
-	desc->limit0 = size & 0xFFFF;
-	desc->base0 = PTR_LOW(addr);
-	desc->base1 = PTR_MIDDLE(addr) & 0xFF;
-	desc->type = type;
-	desc->p = 1;
-	desc->limit1 = (size >> 16) & 0xF;
-	desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
-	desc->base3 = PTR_HIGH(addr);
+
+	desc->limit0		= size & 0xFFFF;
+	desc->base0		= PTR_LOW(addr);
+	desc->base1		= PTR_MIDDLE(addr) & 0xFF;
+	desc->type		= type;
+	desc->p			= 1;
+	desc->limit1		= (size >> 16) & 0xF;
+	desc->base2		= (PTR_MIDDLE(addr) >> 8) & 0xFF;
+	desc->base3		= PTR_HIGH(addr);
 #else
 	pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
 #endif
@@ -237,14 +233,16 @@
 static inline unsigned long native_store_tr(void)
 {
 	unsigned long tr;
+
 	asm volatile("str %0":"=r" (tr));
+
 	return tr;
 }
 
 static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	unsigned int i;
 	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+	unsigned int i;
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
 		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
@@ -313,6 +311,7 @@
 			     unsigned dpl, unsigned ist, unsigned seg)
 {
 	gate_desc s;
+
 	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
 	/*
 	 * does not need to be atomic because it is only done once at
@@ -343,8 +342,9 @@
 		set_bit(vector, used_vectors);
 		if (first_system_vector > vector)
 			first_system_vector = vector;
-	} else
+	} else {
 		BUG();
+	}
 }
 
 static inline void alloc_intr_gate(unsigned int n, void *addr)

diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index 38d8737..f49253d7 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h

@@ -16,6 +16,6 @@
 static inline void exit_idle(void) { }
 #endif /* CONFIG_X86_64 */
 
-void c1e_remove_cpu(int cpu);
+void amd_e400_remove_cpu(int cpu);
 
 #endif /* _ASM_X86_IDLE_H */

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index aeff3e8..5f55e69 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h

@@ -11,14 +11,14 @@
 typedef struct {
 	void *ldt;
 	int size;
-	struct mutex lock;
-	void *vdso;
 
 #ifdef CONFIG_X86_64
 	/* True if mm supports a task running in 32 bit compatibility mode. */
 	unsigned short ia32_compat;
 #endif
 
+	struct mutex lock;
+	void *vdso;
 } mm_context_t;
 
 #ifdef CONFIG_SMP

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4c25ab4..2193715 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h

@@ -754,10 +754,10 @@
 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
 
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
-extern void init_c1e_mask(void);
+extern void init_amd_e400_c1e_mask(void);
 
 extern unsigned long		boot_option_idle_override;
-extern bool			c1e_detected;
+extern bool			amd_e400_c1e_detected;
 
 enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
 			 IDLE_POLL, IDLE_FORCE_MWAIT};

diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index fb6a625..593485b 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h

@@ -351,10 +351,11 @@
 #define __NR_clock_adjtime	343
 #define __NR_syncfs             344
 #define __NR_sendmmsg		345
+#define __NR_setns		346
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 346
+#define NR_syscalls 347
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR

diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 79f90eb..705bf13 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h

@@ -679,6 +679,8 @@
 __SYSCALL(__NR_syncfs, sys_syncfs)
 #define __NR_sendmmsg				307
 __SYSCALL(__NR_sendmmsg, sys_sendmmsg)
+#define __NR_setns				308
+__SYSCALL(__NR_setns, sys_setns)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR

diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 130f1ee..a291c40 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h

@@ -5,7 +5,7 @@
  *
  * SGI UV Broadcast Assist Unit definitions
  *
- * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_BAU_H
@@ -35,17 +35,20 @@
 
 #define MAX_CPUS_PER_UVHUB		64
 #define MAX_CPUS_PER_SOCKET		32
-#define UV_ADP_SIZE			64 /* hardware-provided max. */
-#define UV_CPUS_PER_ACT_STATUS		32 /* hardware-provided max. */
-#define UV_ITEMS_PER_DESCRIPTOR		8
+#define ADP_SZ				64 /* hardware-provided max. */
+#define UV_CPUS_PER_AS			32 /* hardware-provided max. */
+#define ITEMS_PER_DESC			8
 /* the 'throttle' to prevent the hardware stay-busy bug */
 #define MAX_BAU_CONCURRENT		3
 #define UV_ACT_STATUS_MASK		0x3
 #define UV_ACT_STATUS_SIZE		2
 #define UV_DISTRIBUTION_SIZE		256
 #define UV_SW_ACK_NPENDING		8
-#define UV_NET_ENDPOINT_INTD		0x38
-#define UV_DESC_BASE_PNODE_SHIFT	49
+#define UV1_NET_ENDPOINT_INTD		0x38
+#define UV2_NET_ENDPOINT_INTD		0x28
+#define UV_NET_ENDPOINT_INTD		(is_uv1_hub() ?			\
+			UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD)
+#define UV_DESC_PSHIFT			49
 #define UV_PAYLOADQ_PNODE_SHIFT		49
 #define UV_PTC_BASENAME			"sgi_uv/ptc_statistics"
 #define UV_BAU_BASENAME			"sgi_uv/bau_tunables"
@@ -53,29 +56,64 @@
 #define UV_BAU_TUNABLES_FILE		"bau_tunables"
 #define WHITESPACE			" \t\n"
 #define uv_physnodeaddr(x)		((__pa((unsigned long)(x)) & uv_mmask))
-#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15
-#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16
-#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x0000000009UL
+#define cpubit_isset(cpu, bau_local_cpumask) \
+	test_bit((cpu), (bau_local_cpumask).bits)
+
 /* [19:16] SOFT_ACK timeout period  19: 1 is urgency 7  17:16 1 is multiplier */
-#define BAU_MISC_CONTROL_MULT_MASK 3
+/*
+ * UV2: Bit 19 selects between
+ *  (0): 10 microsecond timebase and
+ *  (1): 80 microseconds
+ *  we're using 655us, similar to UV1: 65 units of 10us
+ */
+#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL)
+#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (65*10UL)
 
-#define UVH_AGING_PRESCALE_SEL 0x000000b000UL
+#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD	(is_uv1_hub() ?			\
+		UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD :			\
+		UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD)
+
+#define BAU_MISC_CONTROL_MULT_MASK	3
+
+#define UVH_AGING_PRESCALE_SEL		0x000000b000UL
 /* [30:28] URGENCY_7  an index into a table of times */
-#define BAU_URGENCY_7_SHIFT 28
-#define BAU_URGENCY_7_MASK 7
+#define BAU_URGENCY_7_SHIFT		28
+#define BAU_URGENCY_7_MASK		7
 
-#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL
+#define UVH_TRANSACTION_TIMEOUT		0x000000b200UL
 /* [45:40] BAU - BAU transaction timeout select - a multiplier */
-#define BAU_TRANS_SHIFT 40
-#define BAU_TRANS_MASK 0x3f
+#define BAU_TRANS_SHIFT			40
+#define BAU_TRANS_MASK			0x3f
+
+/*
+ * shorten some awkward names
+ */
+#define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT
+#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT
+#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
+#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD
+#define write_gmmr	uv_write_global_mmr64
+#define write_lmmr	uv_write_local_mmr
+#define read_lmmr	uv_read_local_mmr
+#define read_gmmr	uv_read_global_mmr64
 
 /*
  * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
  */
-#define DESC_STATUS_IDLE		0
-#define DESC_STATUS_ACTIVE		1
-#define DESC_STATUS_DESTINATION_TIMEOUT	2
-#define DESC_STATUS_SOURCE_TIMEOUT	3
+#define DS_IDLE				0
+#define DS_ACTIVE			1
+#define DS_DESTINATION_TIMEOUT		2
+#define DS_SOURCE_TIMEOUT		3
+/*
+ * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2
+ * values 1 and 5 will not occur
+ */
+#define UV2H_DESC_IDLE			0
+#define UV2H_DESC_DEST_TIMEOUT		2
+#define UV2H_DESC_DEST_STRONG_NACK	3
+#define UV2H_DESC_BUSY			4
+#define UV2H_DESC_SOURCE_TIMEOUT	6
+#define UV2H_DESC_DEST_PUT_ERR		7
 
 /*
  * delay for 'plugged' timeout retries, in microseconds
@@ -86,15 +124,24 @@
  * threshholds at which to use IPI to free resources
  */
 /* after this # consecutive 'plugged' timeouts, use IPI to release resources */
-#define PLUGSB4RESET 100
+#define PLUGSB4RESET			100
 /* after this many consecutive timeouts, use IPI to release resources */
-#define TIMEOUTSB4RESET 1
+#define TIMEOUTSB4RESET			1
 /* at this number uses of IPI to release resources, giveup the request */
-#define IPI_RESET_LIMIT 1
+#define IPI_RESET_LIMIT			1
 /* after this # consecutive successes, bump up the throttle if it was lowered */
-#define COMPLETE_THRESHOLD 5
+#define COMPLETE_THRESHOLD		5
 
-#define UV_LB_SUBNODEID 0x10
+#define UV_LB_SUBNODEID			0x10
+
+/* these two are the same for UV1 and UV2: */
+#define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
+#define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK
+/* 4 bits of software ack period */
+#define UV2_ACK_MASK			0x7UL
+#define UV2_ACK_UNITS_SHFT		3
+#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT
+#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
 
 /*
  * number of entries in the destination side payload queue
@@ -115,9 +162,16 @@
 /*
  * tuning the action when the numalink network is extremely delayed
  */
-#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in microseconds */
-#define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */
-#define CONGESTED_PERIOD 30 /* time for the bau to be disabled, in seconds */
+#define CONGESTED_RESPONSE_US		1000	/* 'long' response time, in
+						   microseconds */
+#define CONGESTED_REPS			10	/* long delays averaged over
+						   this many broadcasts */
+#define CONGESTED_PERIOD		30	/* time for the bau to be
+						   disabled, in seconds */
+/* see msg_type: */
+#define MSG_NOOP			0
+#define MSG_REGULAR			1
+#define MSG_RETRY			2
 
 /*
  * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
@@ -129,8 +183,8 @@
  * 'base_dest_nasid' field of the header corresponds to the
  * destination nodeID associated with that specified bit.
  */
-struct bau_target_uvhubmask {
-	unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
+struct bau_targ_hubmask {
+	unsigned long		bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
 };
 
 /*
@@ -139,7 +193,7 @@
  *  enough bits for max. cpu's per uvhub)
  */
 struct bau_local_cpumask {
-	unsigned long bits;
+	unsigned long		bits;
 };
 
 /*
@@ -160,14 +214,14 @@
  * The payload is software-defined for INTD transactions
  */
 struct bau_msg_payload {
-	unsigned long address;		/* signifies a page or all TLB's
-						of the cpu */
+	unsigned long	address;		/* signifies a page or all
+						   TLB's of the cpu */
 	/* 64 bits */
-	unsigned short sending_cpu;	/* filled in by sender */
+	unsigned short	sending_cpu;		/* filled in by sender */
 	/* 16 bits */
-	unsigned short acknowledge_count;/* filled in by destination */
+	unsigned short	acknowledge_count;	/* filled in by destination */
 	/* 16 bits */
-	unsigned int reserved1:32;	/* not usable */
+	unsigned int	reserved1:32;		/* not usable */
 };
 
 
@@ -176,93 +230,96 @@
  * see table 4.2.3.0.1 in broacast_assist spec.
  */
 struct bau_msg_header {
-	unsigned int dest_subnodeid:6;	/* must be 0x10, for the LB */
+	unsigned int	dest_subnodeid:6;	/* must be 0x10, for the LB */
 	/* bits 5:0 */
-	unsigned int base_dest_nasid:15; /* nasid of the */
-	/* bits 20:6 */			  /* first bit in uvhub map */
-	unsigned int command:8;	/* message type */
+	unsigned int	base_dest_nasid:15;	/* nasid of the first bit */
+	/* bits 20:6 */				/* in uvhub map */
+	unsigned int	command:8;		/* message type */
 	/* bits 28:21 */
-				/* 0x38: SN3net EndPoint Message */
-	unsigned int rsvd_1:3;	/* must be zero */
+	/* 0x38: SN3net EndPoint Message */
+	unsigned int	rsvd_1:3;		/* must be zero */
 	/* bits 31:29 */
-				/* int will align on 32 bits */
-	unsigned int rsvd_2:9;	/* must be zero */
+	/* int will align on 32 bits */
+	unsigned int	rsvd_2:9;		/* must be zero */
 	/* bits 40:32 */
-				/* Suppl_A is 56-41 */
-	unsigned int sequence:16;/* message sequence number */
-	/* bits 56:41 */	/* becomes bytes 16-17 of msg */
-				/* Address field (96:57) is never used as an
-				   address (these are address bits 42:3) */
+	/* Suppl_A is 56-41 */
+	unsigned int	sequence:16;		/* message sequence number */
+	/* bits 56:41 */			/* becomes bytes 16-17 of msg */
+						/* Address field (96:57) is
+						   never used as an address
+						   (these are address bits
+						   42:3) */
 
-	unsigned int rsvd_3:1;	/* must be zero */
+	unsigned int	rsvd_3:1;		/* must be zero */
 	/* bit 57 */
-				/* address bits 27:4 are payload */
+	/* address bits 27:4 are payload */
 	/* these next 24  (58-81) bits become bytes 12-14 of msg */
-
 	/* bits 65:58 land in byte 12 */
-	unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
+	unsigned int	replied_to:1;		/* sent as 0 by the source to
+						   byte 12 */
 	/* bit 58 */
-	unsigned int msg_type:3; /* software type of the message*/
+	unsigned int	msg_type:3;		/* software type of the
+						   message */
 	/* bits 61:59 */
-	unsigned int canceled:1; /* message canceled, resource to be freed*/
+	unsigned int	canceled:1;		/* message canceled, resource
+						   is to be freed*/
 	/* bit 62 */
-	unsigned int payload_1a:1;/* not currently used */
+	unsigned int	payload_1a:1;		/* not currently used */
 	/* bit 63 */
-	unsigned int payload_1b:2;/* not currently used */
+	unsigned int	payload_1b:2;		/* not currently used */
 	/* bits 65:64 */
 
 	/* bits 73:66 land in byte 13 */
-	unsigned int payload_1ca:6;/* not currently used */
+	unsigned int	payload_1ca:6;		/* not currently used */
 	/* bits 71:66 */
-	unsigned int payload_1c:2;/* not currently used */
+	unsigned int	payload_1c:2;		/* not currently used */
 	/* bits 73:72 */
 
 	/* bits 81:74 land in byte 14 */
-	unsigned int payload_1d:6;/* not currently used */
+	unsigned int	payload_1d:6;		/* not currently used */
 	/* bits 79:74 */
-	unsigned int payload_1e:2;/* not currently used */
+	unsigned int	payload_1e:2;		/* not currently used */
 	/* bits 81:80 */
 
-	unsigned int rsvd_4:7;	/* must be zero */
+	unsigned int	rsvd_4:7;		/* must be zero */
 	/* bits 88:82 */
-	unsigned int sw_ack_flag:1;/* software acknowledge flag */
+	unsigned int	swack_flag:1;		/* software acknowledge flag */
 	/* bit 89 */
-				/* INTD trasactions at destination are to
-				   wait for software acknowledge */
-	unsigned int rsvd_5:6;	/* must be zero */
+						/* INTD trasactions at
+						   destination are to wait for
+						   software acknowledge */
+	unsigned int	rsvd_5:6;		/* must be zero */
 	/* bits 95:90 */
-	unsigned int rsvd_6:5;	/* must be zero */
+	unsigned int	rsvd_6:5;		/* must be zero */
 	/* bits 100:96 */
-	unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */
+	unsigned int	int_both:1;		/* if 1, interrupt both sockets
+						   on the uvhub */
 	/* bit 101*/
-	unsigned int fairness:3;/* usually zero */
+	unsigned int	fairness:3;		/* usually zero */
 	/* bits 104:102 */
-	unsigned int multilevel:1;	/* multi-level multicast format */
+	unsigned int	multilevel:1;		/* multi-level multicast
+						   format */
 	/* bit 105 */
-				/* 0 for TLB: endpoint multi-unicast messages */
-	unsigned int chaining:1;/* next descriptor is part of this activation*/
+	/* 0 for TLB: endpoint multi-unicast messages */
+	unsigned int	chaining:1;		/* next descriptor is part of
+						   this activation*/
 	/* bit 106 */
-	unsigned int rsvd_7:21;	/* must be zero */
+	unsigned int	rsvd_7:21;		/* must be zero */
 	/* bits 127:107 */
 };
 
-/* see msg_type: */
-#define MSG_NOOP 0
-#define MSG_REGULAR 1
-#define MSG_RETRY 2
-
 /*
  * The activation descriptor:
  * The format of the message to send, plus all accompanying control
  * Should be 64 bytes
  */
 struct bau_desc {
-	struct bau_target_uvhubmask distribution;
+	struct bau_targ_hubmask	distribution;
 	/*
 	 * message template, consisting of header and payload:
 	 */
-	struct bau_msg_header header;
-	struct bau_msg_payload payload;
+	struct bau_msg_header		header;
+	struct bau_msg_payload		payload;
 };
 /*
  *   -payload--    ---------header------
@@ -281,59 +338,51 @@
  * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
  * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
  * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
- *  sw_ack_vector and payload_2)
+ *  swack_vec and payload_2)
  * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
  *  Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
  *  operation."
  */
-struct bau_payload_queue_entry {
-	unsigned long address;		/* signifies a page or all TLB's
-						of the cpu */
+struct bau_pq_entry {
+	unsigned long	address;	/* signifies a page or all TLB's
+					   of the cpu */
 	/* 64 bits, bytes 0-7 */
-
-	unsigned short sending_cpu;	/* cpu that sent the message */
+	unsigned short	sending_cpu;	/* cpu that sent the message */
 	/* 16 bits, bytes 8-9 */
-
-	unsigned short acknowledge_count; /* filled in by destination */
+	unsigned short	acknowledge_count; /* filled in by destination */
 	/* 16 bits, bytes 10-11 */
-
 	/* these next 3 bytes come from bits 58-81 of the message header */
-	unsigned short replied_to:1;    /* sent as 0 by the source */
-	unsigned short msg_type:3;      /* software message type */
-	unsigned short canceled:1;      /* sent as 0 by the source */
-	unsigned short unused1:3;       /* not currently using */
+	unsigned short	replied_to:1;	/* sent as 0 by the source */
+	unsigned short	msg_type:3;	/* software message type */
+	unsigned short	canceled:1;	/* sent as 0 by the source */
+	unsigned short	unused1:3;	/* not currently using */
 	/* byte 12 */
-
-	unsigned char unused2a;		/* not currently using */
+	unsigned char	unused2a;	/* not currently using */
 	/* byte 13 */
-	unsigned char unused2;		/* not currently using */
+	unsigned char	unused2;	/* not currently using */
 	/* byte 14 */
-
-	unsigned char sw_ack_vector;	/* filled in by the hardware */
+	unsigned char	swack_vec;	/* filled in by the hardware */
 	/* byte 15 (bits 127:120) */
-
-	unsigned short sequence;	/* message sequence number */
+	unsigned short	sequence;	/* message sequence number */
 	/* bytes 16-17 */
-	unsigned char unused4[2];	/* not currently using bytes 18-19 */
+	unsigned char	unused4[2];	/* not currently using bytes 18-19 */
 	/* bytes 18-19 */
-
-	int number_of_cpus;		/* filled in at destination */
+	int		number_of_cpus;	/* filled in at destination */
 	/* 32 bits, bytes 20-23 (aligned) */
-
-	unsigned char unused5[8];       /* not using */
+	unsigned char	unused5[8];	/* not using */
 	/* bytes 24-31 */
 };
 
 struct msg_desc {
-	struct bau_payload_queue_entry *msg;
-	int msg_slot;
-	int sw_ack_slot;
-	struct bau_payload_queue_entry *va_queue_first;
-	struct bau_payload_queue_entry *va_queue_last;
+	struct bau_pq_entry	*msg;
+	int			msg_slot;
+	int			swack_slot;
+	struct bau_pq_entry	*queue_first;
+	struct bau_pq_entry	*queue_last;
 };
 
 struct reset_args {
-	int sender;
+	int			sender;
 };
 
 /*
@@ -341,112 +390,226 @@
  */
 struct ptc_stats {
 	/* sender statistics */
-	unsigned long s_giveup; /* number of fall backs to IPI-style flushes */
-	unsigned long s_requestor; /* number of shootdown requests */
-	unsigned long s_stimeout; /* source side timeouts */
-	unsigned long s_dtimeout; /* destination side timeouts */
-	unsigned long s_time; /* time spent in sending side */
-	unsigned long s_retriesok; /* successful retries */
-	unsigned long s_ntargcpu; /* total number of cpu's targeted */
-	unsigned long s_ntargself; /* times the sending cpu was targeted */
-	unsigned long s_ntarglocals; /* targets of cpus on the local blade */
-	unsigned long s_ntargremotes; /* targets of cpus on remote blades */
-	unsigned long s_ntarglocaluvhub; /* targets of the local hub */
-	unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */
-	unsigned long s_ntarguvhub; /* total number of uvhubs targeted */
-	unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/
-	unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */
-	unsigned long s_ntarguvhub4; /* number of times target hubs >= 4 */
-	unsigned long s_ntarguvhub2; /* number of times target hubs >= 2 */
-	unsigned long s_ntarguvhub1; /* number of times target hubs == 1 */
-	unsigned long s_resets_plug; /* ipi-style resets from plug state */
-	unsigned long s_resets_timeout; /* ipi-style resets from timeouts */
-	unsigned long s_busy; /* status stayed busy past s/w timer */
-	unsigned long s_throttles; /* waits in throttle */
-	unsigned long s_retry_messages; /* retry broadcasts */
-	unsigned long s_bau_reenabled; /* for bau enable/disable */
-	unsigned long s_bau_disabled; /* for bau enable/disable */
+	unsigned long	s_giveup;		/* number of fall backs to
+						   IPI-style flushes */
+	unsigned long	s_requestor;		/* number of shootdown
+						   requests */
+	unsigned long	s_stimeout;		/* source side timeouts */
+	unsigned long	s_dtimeout;		/* destination side timeouts */
+	unsigned long	s_time;			/* time spent in sending side */
+	unsigned long	s_retriesok;		/* successful retries */
+	unsigned long	s_ntargcpu;		/* total number of cpu's
+						   targeted */
+	unsigned long	s_ntargself;		/* times the sending cpu was
+						   targeted */
+	unsigned long	s_ntarglocals;		/* targets of cpus on the local
+						   blade */
+	unsigned long	s_ntargremotes;		/* targets of cpus on remote
+						   blades */
+	unsigned long	s_ntarglocaluvhub;	/* targets of the local hub */
+	unsigned long	s_ntargremoteuvhub;	/* remotes hubs targeted */
+	unsigned long	s_ntarguvhub;		/* total number of uvhubs
+						   targeted */
+	unsigned long	s_ntarguvhub16;		/* number of times target
+						   hubs >= 16*/
+	unsigned long	s_ntarguvhub8;		/* number of times target
+						   hubs >= 8 */
+	unsigned long	s_ntarguvhub4;		/* number of times target
+						   hubs >= 4 */
+	unsigned long	s_ntarguvhub2;		/* number of times target
+						   hubs >= 2 */
+	unsigned long	s_ntarguvhub1;		/* number of times target
+						   hubs == 1 */
+	unsigned long	s_resets_plug;		/* ipi-style resets from plug
+						   state */
+	unsigned long	s_resets_timeout;	/* ipi-style resets from
+						   timeouts */
+	unsigned long	s_busy;			/* status stayed busy past
+						   s/w timer */
+	unsigned long	s_throttles;		/* waits in throttle */
+	unsigned long	s_retry_messages;	/* retry broadcasts */
+	unsigned long	s_bau_reenabled;	/* for bau enable/disable */
+	unsigned long	s_bau_disabled;		/* for bau enable/disable */
 	/* destination statistics */
-	unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */
-	unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */
-	unsigned long d_multmsg; /* interrupts with multiple messages */
-	unsigned long d_nomsg; /* interrupts with no message */
-	unsigned long d_time; /* time spent on destination side */
-	unsigned long d_requestee; /* number of messages processed */
-	unsigned long d_retries; /* number of retry messages processed */
-	unsigned long d_canceled; /* number of messages canceled by retries */
-	unsigned long d_nocanceled; /* retries that found nothing to cancel */
-	unsigned long d_resets; /* number of ipi-style requests processed */
-	unsigned long d_rcanceled; /* number of messages canceled by resets */
+	unsigned long	d_alltlb;		/* times all tlb's on this
+						   cpu were flushed */
+	unsigned long	d_onetlb;		/* times just one tlb on this
+						   cpu was flushed */
+	unsigned long	d_multmsg;		/* interrupts with multiple
+						   messages */
+	unsigned long	d_nomsg;		/* interrupts with no message */
+	unsigned long	d_time;			/* time spent on destination
+						   side */
+	unsigned long	d_requestee;		/* number of messages
+						   processed */
+	unsigned long	d_retries;		/* number of retry messages
+						   processed */
+	unsigned long	d_canceled;		/* number of messages canceled
+						   by retries */
+	unsigned long	d_nocanceled;		/* retries that found nothing
+						   to cancel */
+	unsigned long	d_resets;		/* number of ipi-style requests
+						   processed */
+	unsigned long	d_rcanceled;		/* number of messages canceled
+						   by resets */
+};
+
+struct tunables {
+	int			*tunp;
+	int			deflt;
 };
 
 struct hub_and_pnode {
-	short uvhub;
-	short pnode;
+	short			uvhub;
+	short			pnode;
 };
+
+struct socket_desc {
+	short			num_cpus;
+	short			cpu_number[MAX_CPUS_PER_SOCKET];
+};
+
+struct uvhub_desc {
+	unsigned short		socket_mask;
+	short			num_cpus;
+	short			uvhub;
+	short			pnode;
+	struct socket_desc	socket[2];
+};
+
 /*
  * one per-cpu; to locate the software tables
  */
 struct bau_control {
-	struct bau_desc *descriptor_base;
-	struct bau_payload_queue_entry *va_queue_first;
-	struct bau_payload_queue_entry *va_queue_last;
-	struct bau_payload_queue_entry *bau_msg_head;
-	struct bau_control *uvhub_master;
-	struct bau_control *socket_master;
-	struct ptc_stats *statp;
-	unsigned long timeout_interval;
-	unsigned long set_bau_on_time;
-	atomic_t active_descriptor_count;
-	int plugged_tries;
-	int timeout_tries;
-	int ipi_attempts;
-	int conseccompletes;
-	int baudisabled;
-	int set_bau_off;
-	short cpu;
-	short osnode;
-	short uvhub_cpu;
-	short uvhub;
-	short cpus_in_socket;
-	short cpus_in_uvhub;
-	short partition_base_pnode;
-	unsigned short message_number;
-	unsigned short uvhub_quiesce;
-	short socket_acknowledge_count[DEST_Q_SIZE];
-	cycles_t send_message;
-	spinlock_t uvhub_lock;
-	spinlock_t queue_lock;
+	struct bau_desc		*descriptor_base;
+	struct bau_pq_entry	*queue_first;
+	struct bau_pq_entry	*queue_last;
+	struct bau_pq_entry	*bau_msg_head;
+	struct bau_control	*uvhub_master;
+	struct bau_control	*socket_master;
+	struct ptc_stats	*statp;
+	unsigned long		timeout_interval;
+	unsigned long		set_bau_on_time;
+	atomic_t		active_descriptor_count;
+	int			plugged_tries;
+	int			timeout_tries;
+	int			ipi_attempts;
+	int			conseccompletes;
+	int			baudisabled;
+	int			set_bau_off;
+	short			cpu;
+	short			osnode;
+	short			uvhub_cpu;
+	short			uvhub;
+	short			cpus_in_socket;
+	short			cpus_in_uvhub;
+	short			partition_base_pnode;
+	unsigned short		message_number;
+	unsigned short		uvhub_quiesce;
+	short			socket_acknowledge_count[DEST_Q_SIZE];
+	cycles_t		send_message;
+	spinlock_t		uvhub_lock;
+	spinlock_t		queue_lock;
 	/* tunables */
-	int max_bau_concurrent;
-	int max_bau_concurrent_constant;
-	int plugged_delay;
-	int plugsb4reset;
-	int timeoutsb4reset;
-	int ipi_reset_limit;
-	int complete_threshold;
-	int congested_response_us;
-	int congested_reps;
-	int congested_period;
-	cycles_t period_time;
-	long period_requests;
-	struct hub_and_pnode *target_hub_and_pnode;
+	int			max_concurr;
+	int			max_concurr_const;
+	int			plugged_delay;
+	int			plugsb4reset;
+	int			timeoutsb4reset;
+	int			ipi_reset_limit;
+	int			complete_threshold;
+	int			cong_response_us;
+	int			cong_reps;
+	int			cong_period;
+	cycles_t		period_time;
+	long			period_requests;
+	struct hub_and_pnode	*thp;
 };
 
-static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
+static unsigned long read_mmr_uv2_status(void)
+{
+	return read_lmmr(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2);
+}
+
+static void write_mmr_data_broadcast(int pnode, unsigned long mmr_image)
+{
+	write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image);
+}
+
+static void write_mmr_descriptor_base(int pnode, unsigned long mmr_image)
+{
+	write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image);
+}
+
+static void write_mmr_activation(unsigned long index)
+{
+	write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
+}
+
+static void write_gmmr_activation(int pnode, unsigned long mmr_image)
+{
+	write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image);
+}
+
+static void write_mmr_payload_first(int pnode, unsigned long mmr_image)
+{
+	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image);
+}
+
+static void write_mmr_payload_tail(int pnode, unsigned long mmr_image)
+{
+	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image);
+}
+
+static void write_mmr_payload_last(int pnode, unsigned long mmr_image)
+{
+	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image);
+}
+
+static void write_mmr_misc_control(int pnode, unsigned long mmr_image)
+{
+	write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+}
+
+static unsigned long read_mmr_misc_control(int pnode)
+{
+	return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL);
+}
+
+static void write_mmr_sw_ack(unsigned long mr)
+{
+	uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
+}
+
+static unsigned long read_mmr_sw_ack(void)
+{
+	return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
+}
+
+static unsigned long read_gmmr_sw_ack(int pnode)
+{
+	return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
+}
+
+static void write_mmr_data_config(int pnode, unsigned long mr)
+{
+	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr);
+}
+
+static inline int bau_uvhub_isset(int uvhub, struct bau_targ_hubmask *dstp)
 {
 	return constant_test_bit(uvhub, &dstp->bits[0]);
 }
-static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
+static inline void bau_uvhub_set(int pnode, struct bau_targ_hubmask *dstp)
 {
 	__set_bit(pnode, &dstp->bits[0]);
 }
-static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
+static inline void bau_uvhubs_clear(struct bau_targ_hubmask *dstp,
 				    int nbits)
 {
 	bitmap_zero(&dstp->bits[0], nbits);
 }
-static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp)
+static inline int bau_uvhub_weight(struct bau_targ_hubmask *dstp)
 {
 	return bitmap_weight((unsigned long *)&dstp->bits[0],
 				UV_DISTRIBUTION_SIZE);
@@ -457,9 +620,6 @@
 	bitmap_zero(&dstp->bits, nbits);
 }
 
-#define cpubit_isset(cpu, bau_local_cpumask) \
-	test_bit((cpu), (bau_local_cpumask).bits)
-
 extern void uv_bau_message_intr1(void);
 extern void uv_bau_timeout_intr1(void);
 
@@ -467,7 +627,7 @@
 	short counter;
 };
 
-/**
+/*
  * atomic_read_short - read a short atomic variable
  * @v: pointer of type atomic_short
  *
@@ -478,14 +638,14 @@
 	return v->counter;
 }
 
-/**
- * atomic_add_short_return - add and return a short int
+/*
+ * atom_asr - add and return a short int
  * @i: short value to add
  * @v: pointer of type atomic_short
  *
  * Atomically adds @i to @v and returns @i + @v
  */
-static inline int atomic_add_short_return(short i, struct atomic_short *v)
+static inline int atom_asr(short i, struct atomic_short *v)
 {
 	short __i = i;
 	asm volatile(LOCK_PREFIX "xaddw %0, %1"
@@ -494,4 +654,26 @@
 	return i + __i;
 }
 
+/*
+ * conditionally add 1 to *v, unless *v is >= u
+ * return 0 if we cannot add 1 to *v because it is >= u
+ * return 1 if we can add 1 to *v because it is < u
+ * the add is atomic
+ *
+ * This is close to atomic_add_unless(), but this allows the 'u' value
+ * to be lowered below the current 'v'.  atomic_add_unless can only stop
+ * on equal.
+ */
+static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
+{
+	spin_lock(lock);
+	if (atomic_read(v) >= u) {
+		spin_unlock(lock);
+		return 0;
+	}
+	atomic_inc(v);
+	spin_unlock(lock);
+	return 1;
+}
+
 #endif /* _ASM_X86_UV_UV_BAU_H */

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 4298002..f26544a 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h

@@ -77,8 +77,9 @@
  *
  *		1111110000000000
  *		5432109876543210
- *		pppppppppplc0cch	Nehalem-EX
- *		ppppppppplcc0cch	Westmere-EX
+ *		pppppppppplc0cch	Nehalem-EX (12 bits in hdw reg)
+ *		ppppppppplcc0cch	Westmere-EX (12 bits in hdw reg)
+ *		pppppppppppcccch	SandyBridge (15 bits in hdw reg)
  *		sssssssssss
  *
  *			p  = pnode bits
@@ -87,7 +88,7 @@
  *			h  = hyperthread
  *			s  = bits that are in the SOCKET_ID CSR
  *
- *	Note: Processor only supports 12 bits in the APICID register. The ACPI
+ *	Note: Processor may support fewer bits in the APICID register. The ACPI
  *	      tables hold all 16 bits. Software needs to be aware of this.
  *
  *	      Unless otherwise specified, all references to APICID refer to
@@ -138,6 +139,8 @@
 	unsigned long		global_mmr_base;
 	unsigned long		gpa_mask;
 	unsigned int		gnode_extra;
+	unsigned char		hub_revision;
+	unsigned char		apic_pnode_shift;
 	unsigned long		gnode_upper;
 	unsigned long		lowmem_remap_top;
 	unsigned long		lowmem_remap_base;
@@ -149,13 +152,31 @@
 	unsigned char		m_val;
 	unsigned char		n_val;
 	struct uv_scir_s	scir;
-	unsigned char		apic_pnode_shift;
 };
 
 DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 #define uv_hub_info		(&__get_cpu_var(__uv_hub_info))
 #define uv_cpu_hub_info(cpu)	(&per_cpu(__uv_hub_info, cpu))
 
+/*
+ * Hub revisions less than UV2_HUB_REVISION_BASE are UV1 hubs. All UV2
+ * hubs have revision numbers greater than or equal to UV2_HUB_REVISION_BASE.
+ * This is a software convention - NOT the hardware revision numbers in
+ * the hub chip.
+ */
+#define UV1_HUB_REVISION_BASE		1
+#define UV2_HUB_REVISION_BASE		3
+
+static inline int is_uv1_hub(void)
+{
+	return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE;
+}
+
+static inline int is_uv2_hub(void)
+{
+	return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE;
+}
+
 union uvh_apicid {
     unsigned long       v;
     struct uvh_apicid_s {
@@ -180,11 +201,25 @@
 #define UV_PNODE_TO_GNODE(p)		((p) |uv_hub_info->gnode_extra)
 #define UV_PNODE_TO_NASID(p)		(UV_PNODE_TO_GNODE(p) << 1)
 
-#define UV_LOCAL_MMR_BASE		0xf4000000UL
-#define UV_GLOBAL_MMR32_BASE		0xf8000000UL
+#define UV1_LOCAL_MMR_BASE		0xf4000000UL
+#define UV1_GLOBAL_MMR32_BASE		0xf8000000UL
+#define UV1_LOCAL_MMR_SIZE		(64UL * 1024 * 1024)
+#define UV1_GLOBAL_MMR32_SIZE		(64UL * 1024 * 1024)
+
+#define UV2_LOCAL_MMR_BASE		0xfa000000UL
+#define UV2_GLOBAL_MMR32_BASE		0xfc000000UL
+#define UV2_LOCAL_MMR_SIZE		(32UL * 1024 * 1024)
+#define UV2_GLOBAL_MMR32_SIZE		(32UL * 1024 * 1024)
+
+#define UV_LOCAL_MMR_BASE		(is_uv1_hub() ? UV1_LOCAL_MMR_BASE     \
+						: UV2_LOCAL_MMR_BASE)
+#define UV_GLOBAL_MMR32_BASE		(is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE  \
+						: UV2_GLOBAL_MMR32_BASE)
+#define UV_LOCAL_MMR_SIZE		(is_uv1_hub() ? UV1_LOCAL_MMR_SIZE :   \
+						UV2_LOCAL_MMR_SIZE)
+#define UV_GLOBAL_MMR32_SIZE		(is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\
+						UV2_GLOBAL_MMR32_SIZE)
 #define UV_GLOBAL_MMR64_BASE		(uv_hub_info->global_mmr_base)
-#define UV_LOCAL_MMR_SIZE		(64UL * 1024 * 1024)
-#define UV_GLOBAL_MMR32_SIZE		(64UL * 1024 * 1024)
 
 #define UV_GLOBAL_GRU_MMR_BASE		0x4000000
 
@@ -301,6 +336,17 @@
 }
 
 /*
+ * Convert an apicid to the socket number on the blade
+ */
+static inline int uv_apicid_to_socket(int apicid)
+{
+	if (is_uv1_hub())
+		return (apicid >> (uv_hub_info->apic_pnode_shift - 1)) & 1;
+	else
+		return 0;
+}
+
+/*
  * Access global MMRs using the low memory MMR32 space. This region supports
  * faster MMR access but not all MMRs are accessible in this space.
  */
@@ -519,14 +565,13 @@
 
 /*
  * Get the minimum revision number of the hub chips within the partition.
- *     1 - initial rev 1.0 silicon
- *     2 - rev 2.0 production silicon
+ *     1 - UV1 rev 1.0 initial silicon
+ *     2 - UV1 rev 2.0 production silicon
+ *     3 - UV2 rev 1.0 initial silicon
  */
 static inline int uv_get_min_hub_revision_id(void)
 {
-	extern int uv_min_hub_revision_id;
-
-	return uv_min_hub_revision_id;
+	return uv_hub_info->hub_revision;
 }
 
 #endif /* CONFIG_X86_64 */

diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index f5bb64a..4be52c8 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h

@@ -11,13 +11,64 @@
 #ifndef _ASM_X86_UV_UV_MMRS_H
 #define _ASM_X86_UV_UV_MMRS_H
 
+/*
+ * This file contains MMR definitions for both UV1 & UV2 hubs.
+ *
+ * In general, MMR addresses and structures are identical on both hubs.
+ * These MMRs are identified as:
+ *	#define UVH_xxx		<address>
+ *	union uvh_xxx {
+ *		unsigned long       v;
+ *		struct uvh_int_cmpd_s {
+ *		} s;
+ *	};
+ *
+ * If the MMR exists on both hub type but has different addresses or
+ * contents, the MMR definition is similar to:
+ *	#define UV1H_xxx	<uv1 address>
+ *	#define UV2H_xxx	<uv2address>
+ *	#define UVH_xxx		(is_uv1_hub() ? UV1H_xxx : UV2H_xxx)
+ *	union uvh_xxx {
+ *		unsigned long       v;
+ *		struct uv1h_int_cmpd_s {	 (Common fields only)
+ *		} s;
+ *		struct uv1h_int_cmpd_s {	 (Full UV1 definition)
+ *		} s1;
+ *		struct uv2h_int_cmpd_s {	 (Full UV2 definition)
+ *		} s2;
+ *	};
+ *
+ * Only essential difference are enumerated. For example, if the address is
+ * the same for both UV1 & UV2, only a single #define is generated. Likewise,
+ * if the contents is the same for both hubs, only the "s" structure is
+ * generated.
+ *
+ * If the MMR exists on ONLY 1 type of hub, no generic definition is
+ * generated:
+ *	#define UVnH_xxx	<uvn address>
+ *	union uvnh_xxx {
+ *		unsigned long       v;
+ *		struct uvh_int_cmpd_s {
+ *		} sn;
+ *	};
+ */
+
 #define UV_MMR_ENABLE		(1UL << 63)
 
+#define UV1_HUB_PART_NUMBER	0x88a5
+#define UV2_HUB_PART_NUMBER	0x8eb8
+
+/* Compat: if this #define is present, UV headers support UV2 */
+#define UV2_HUB_IS_SUPPORTED	1
+
+/* KABI compat: if this #define is present, KABI hacks are present */
+#define UV2_HUB_KABI_HACKS	1
+
 /* ========================================================================= */
 /*                          UVH_BAU_DATA_BROADCAST                           */
 /* ========================================================================= */
 #define UVH_BAU_DATA_BROADCAST 0x61688UL
-#define UVH_BAU_DATA_BROADCAST_32 0x0440
+#define UVH_BAU_DATA_BROADCAST_32 0x440
 
 #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0
 #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL
@@ -34,7 +85,7 @@
 /*                           UVH_BAU_DATA_CONFIG                             */
 /* ========================================================================= */
 #define UVH_BAU_DATA_CONFIG 0x61680UL
-#define UVH_BAU_DATA_CONFIG_32 0x0438
+#define UVH_BAU_DATA_CONFIG_32 0x438
 
 #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
 #define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL
@@ -73,125 +124,245 @@
 /*                           UVH_EVENT_OCCURRED0                             */
 /* ========================================================================= */
 #define UVH_EVENT_OCCURRED0 0x70000UL
-#define UVH_EVENT_OCCURRED0_32 0x005e8
+#define UVH_EVENT_OCCURRED0_32 0x5e8
 
-#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
-#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
-#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
-#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
-#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
-#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
-#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3
-#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
-#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4
-#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL
-#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5
-#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL
-#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6
-#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL
-#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7
-#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL
-#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
-#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
-#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
-#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
-#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
-#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
-#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
-#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
-#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
-#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
-#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
-#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
-#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15
-#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL
-#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16
-#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL
-#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17
-#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL
-#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18
-#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL
-#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19
-#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL
-#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20
-#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL
-#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21
-#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL
-#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22
-#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38
-#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL
-#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39
-#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL
-#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40
-#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL
-#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41
-#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL
-#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42
-#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL
-#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43
-#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL
-#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44
-#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL
-#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45
-#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL
-#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46
-#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL
-#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47
-#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL
-#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48
-#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL
-#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49
-#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL
-#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50
-#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL
-#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51
-#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL
-#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52
-#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL
-#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53
-#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL
-#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54
-#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL
-#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55
-#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
-#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
-#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
+#define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0
+#define UV1H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
+#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
+#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
+#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
+#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
+#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3
+#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
+#define UV1H_EVENT_OCCURRED0_RH_HCERR_SHFT 4
+#define UV1H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL
+#define UV1H_EVENT_OCCURRED0_XN_HCERR_SHFT 5
+#define UV1H_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL
+#define UV1H_EVENT_OCCURRED0_SI_HCERR_SHFT 6
+#define UV1H_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL
+#define UV1H_EVENT_OCCURRED0_LB_AOERR0_SHFT 7
+#define UV1H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL
+#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
+#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
+#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
+#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
+#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
+#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
+#define UV1H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
+#define UV1H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
+#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
+#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
+#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
+#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
+#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
+#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
+#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15
+#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL
+#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16
+#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL
+#define UV1H_EVENT_OCCURRED0_LH_AOERR1_SHFT 17
+#define UV1H_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL
+#define UV1H_EVENT_OCCURRED0_RH_AOERR1_SHFT 18
+#define UV1H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL
+#define UV1H_EVENT_OCCURRED0_XN_AOERR1_SHFT 19
+#define UV1H_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL
+#define UV1H_EVENT_OCCURRED0_SI_AOERR1_SHFT 20
+#define UV1H_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL
+#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21
+#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL
+#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22
+#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38
+#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL
+#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39
+#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL
+#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40
+#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL
+#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41
+#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL
+#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42
+#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL
+#define UV1H_EVENT_OCCURRED0_LTC_INT_SHFT 43
+#define UV1H_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL
+#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44
+#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL
+#define UV1H_EVENT_OCCURRED0_IPI_INT_SHFT 45
+#define UV1H_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL
+#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46
+#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL
+#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47
+#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL
+#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48
+#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL
+#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49
+#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL
+#define UV1H_EVENT_OCCURRED0_PROFILE_INT_SHFT 50
+#define UV1H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL
+#define UV1H_EVENT_OCCURRED0_RTC0_SHFT 51
+#define UV1H_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL
+#define UV1H_EVENT_OCCURRED0_RTC1_SHFT 52
+#define UV1H_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL
+#define UV1H_EVENT_OCCURRED0_RTC2_SHFT 53
+#define UV1H_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL
+#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54
+#define UV1H_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL
+#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55
+#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
+#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
+#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
+
+#define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0
+#define UV2H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2
+#define UV2H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
+#define UV2H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
+#define UV2H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5
+#define UV2H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6
+#define UV2H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
+#define UV2H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
+#define UV2H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
+#define UV2H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
+#define UV2H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
+#define UV2H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
+#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53
+#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
+
 union uvh_event_occurred0_u {
     unsigned long	v;
-    struct uvh_event_occurred0_s {
+    struct uv1h_event_occurred0_s {
 	unsigned long	lb_hcerr             :  1;  /* RW, W1C */
 	unsigned long	gr0_hcerr            :  1;  /* RW, W1C */
 	unsigned long	gr1_hcerr            :  1;  /* RW, W1C */
@@ -250,14 +421,76 @@
 	unsigned long	bau_data             :  1;  /* RW, W1C */
 	unsigned long	power_management_req :  1;  /* RW, W1C */
 	unsigned long	rsvd_57_63           :  7;  /*    */
-    } s;
+    } s1;
+    struct uv2h_event_occurred0_s {
+	unsigned long	lb_hcerr            :  1;  /* RW */
+	unsigned long	qp_hcerr            :  1;  /* RW */
+	unsigned long	rh_hcerr            :  1;  /* RW */
+	unsigned long	lh0_hcerr           :  1;  /* RW */
+	unsigned long	lh1_hcerr           :  1;  /* RW */
+	unsigned long	gr0_hcerr           :  1;  /* RW */
+	unsigned long	gr1_hcerr           :  1;  /* RW */
+	unsigned long	ni0_hcerr           :  1;  /* RW */
+	unsigned long	ni1_hcerr           :  1;  /* RW */
+	unsigned long	lb_aoerr0           :  1;  /* RW */
+	unsigned long	qp_aoerr0           :  1;  /* RW */
+	unsigned long	rh_aoerr0           :  1;  /* RW */
+	unsigned long	lh0_aoerr0          :  1;  /* RW */
+	unsigned long	lh1_aoerr0          :  1;  /* RW */
+	unsigned long	gr0_aoerr0          :  1;  /* RW */
+	unsigned long	gr1_aoerr0          :  1;  /* RW */
+	unsigned long	xb_aoerr0           :  1;  /* RW */
+	unsigned long	rt_aoerr0           :  1;  /* RW */
+	unsigned long	ni0_aoerr0          :  1;  /* RW */
+	unsigned long	ni1_aoerr0          :  1;  /* RW */
+	unsigned long	lb_aoerr1           :  1;  /* RW */
+	unsigned long	qp_aoerr1           :  1;  /* RW */
+	unsigned long	rh_aoerr1           :  1;  /* RW */
+	unsigned long	lh0_aoerr1          :  1;  /* RW */
+	unsigned long	lh1_aoerr1          :  1;  /* RW */
+	unsigned long	gr0_aoerr1          :  1;  /* RW */
+	unsigned long	gr1_aoerr1          :  1;  /* RW */
+	unsigned long	xb_aoerr1           :  1;  /* RW */
+	unsigned long	rt_aoerr1           :  1;  /* RW */
+	unsigned long	ni0_aoerr1          :  1;  /* RW */
+	unsigned long	ni1_aoerr1          :  1;  /* RW */
+	unsigned long	system_shutdown_int :  1;  /* RW */
+	unsigned long	lb_irq_int_0        :  1;  /* RW */
+	unsigned long	lb_irq_int_1        :  1;  /* RW */
+	unsigned long	lb_irq_int_2        :  1;  /* RW */
+	unsigned long	lb_irq_int_3        :  1;  /* RW */
+	unsigned long	lb_irq_int_4        :  1;  /* RW */
+	unsigned long	lb_irq_int_5        :  1;  /* RW */
+	unsigned long	lb_irq_int_6        :  1;  /* RW */
+	unsigned long	lb_irq_int_7        :  1;  /* RW */
+	unsigned long	lb_irq_int_8        :  1;  /* RW */
+	unsigned long	lb_irq_int_9        :  1;  /* RW */
+	unsigned long	lb_irq_int_10       :  1;  /* RW */
+	unsigned long	lb_irq_int_11       :  1;  /* RW */
+	unsigned long	lb_irq_int_12       :  1;  /* RW */
+	unsigned long	lb_irq_int_13       :  1;  /* RW */
+	unsigned long	lb_irq_int_14       :  1;  /* RW */
+	unsigned long	lb_irq_int_15       :  1;  /* RW */
+	unsigned long	l1_nmi_int          :  1;  /* RW */
+	unsigned long	stop_clock          :  1;  /* RW */
+	unsigned long	asic_to_l1          :  1;  /* RW */
+	unsigned long	l1_to_asic          :  1;  /* RW */
+	unsigned long	la_seq_trigger      :  1;  /* RW */
+	unsigned long	ipi_int             :  1;  /* RW */
+	unsigned long	extio_int0          :  1;  /* RW */
+	unsigned long	extio_int1          :  1;  /* RW */
+	unsigned long	extio_int2          :  1;  /* RW */
+	unsigned long	extio_int3          :  1;  /* RW */
+	unsigned long	profile_int         :  1;  /* RW */
+	unsigned long	rsvd_59_63          :  5;  /*    */
+    } s2;
 };
 
 /* ========================================================================= */
 /*                        UVH_EVENT_OCCURRED0_ALIAS                          */
 /* ========================================================================= */
 #define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL
-#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0
+#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0
 
 /* ========================================================================= */
 /*                         UVH_GR0_TLB_INT0_CONFIG                           */
@@ -432,8 +665,16 @@
 /* ========================================================================= */
 #define UVH_INT_CMPC 0x22100UL
 
-#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0
-#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
+#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT	0
+#define UV2H_INT_CMPC_REAL_TIME_CMPC_SHFT	0
+#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT	(is_uv1_hub() ?		\
+			UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT :	\
+			UV2H_INT_CMPC_REAL_TIME_CMPC_SHFT)
+#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK	0xffffffffffffffUL
+#define UV2H_INT_CMPC_REAL_TIME_CMPC_MASK	0xffffffffffffffUL
+#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK	(is_uv1_hub() ?		\
+			UV1H_INT_CMPC_REAL_TIME_CMPC_MASK :	\
+			UV2H_INT_CMPC_REAL_TIME_CMPC_MASK)
 
 union uvh_int_cmpc_u {
     unsigned long	v;
@@ -448,8 +689,16 @@
 /* ========================================================================= */
 #define UVH_INT_CMPD 0x22180UL
 
-#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0
-#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
+#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT	0
+#define UV2H_INT_CMPD_REAL_TIME_CMPD_SHFT	0
+#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT	(is_uv1_hub() ?		\
+			UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT :	\
+			UV2H_INT_CMPD_REAL_TIME_CMPD_SHFT)
+#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK	0xffffffffffffffUL
+#define UV2H_INT_CMPD_REAL_TIME_CMPD_MASK	0xffffffffffffffUL
+#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK	(is_uv1_hub() ?		\
+			UV1H_INT_CMPD_REAL_TIME_CMPD_MASK :	\
+			UV2H_INT_CMPD_REAL_TIME_CMPD_MASK)
 
 union uvh_int_cmpd_u {
     unsigned long	v;
@@ -463,7 +712,7 @@
 /*                               UVH_IPI_INT                                 */
 /* ========================================================================= */
 #define UVH_IPI_INT 0x60500UL
-#define UVH_IPI_INT_32 0x0348
+#define UVH_IPI_INT_32 0x348
 
 #define UVH_IPI_INT_VECTOR_SHFT 0
 #define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL
@@ -493,7 +742,7 @@
 /*                   UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST                     */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009c0
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
 
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
@@ -515,7 +764,7 @@
 /*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST                     */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009c8
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
 
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
@@ -533,7 +782,7 @@
 /*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL                     */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x009d0
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
 
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
@@ -551,7 +800,7 @@
 /*                   UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE                    */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0a68
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
 
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
@@ -585,6 +834,7 @@
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
+
 union uvh_lb_bau_intd_software_acknowledge_u {
     unsigned long	v;
     struct uvh_lb_bau_intd_software_acknowledge_s {
@@ -612,13 +862,13 @@
 /*                UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS                 */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
 
 /* ========================================================================= */
 /*                         UVH_LB_BAU_MISC_CONTROL                           */
 /* ========================================================================= */
 #define UVH_LB_BAU_MISC_CONTROL 0x320170UL
-#define UVH_LB_BAU_MISC_CONTROL_32 0x00a10
+#define UVH_LB_BAU_MISC_CONTROL_32 0xa10
 
 #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
 #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
@@ -628,8 +878,8 @@
 #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
 #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
 #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
+#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
+#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
 #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
 #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
 #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
@@ -650,8 +900,86 @@
 #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
 #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
 #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
+
+#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
+#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
+#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
+#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
+#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
+#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
+#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
+#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
+#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
+#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
+#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
+#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
+#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
+#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
+#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
+#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
+#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
+#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
+#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
+#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
+#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
+#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
+#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
+#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
+#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
+#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
+#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
+#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
+#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
+#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
+#define UV1H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
+#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
+
+#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
+#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
+#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
+#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
+#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
+#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
+#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
+#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
+#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
+#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
+#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
+#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
+#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
+#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
+#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
+#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
+#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
+#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
+#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
+#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
+#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
+#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
+#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
+#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
+#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
+#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
+#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
+#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
+#define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
+#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
 
 union uvh_lb_bau_misc_control_u {
     unsigned long	v;
@@ -660,7 +988,25 @@
 	unsigned long	apic_mode                          :  1;  /* RW */
 	unsigned long	force_broadcast                    :  1;  /* RW */
 	unsigned long	force_lock_nop                     :  1;  /* RW */
-	unsigned long	csi_agent_presence_vector          :  3;  /* RW */
+	unsigned long	qpi_agent_presence_vector          :  3;  /* RW */
+	unsigned long	descriptor_fetch_mode              :  1;  /* RW */
+	unsigned long	enable_intd_soft_ack_mode          :  1;  /* RW */
+	unsigned long	intd_soft_ack_timeout_period       :  4;  /* RW */
+	unsigned long	enable_dual_mapping_mode           :  1;  /* RW */
+	unsigned long	vga_io_port_decode_enable          :  1;  /* RW */
+	unsigned long	vga_io_port_16_bit_decode          :  1;  /* RW */
+	unsigned long	suppress_dest_registration         :  1;  /* RW */
+	unsigned long	programmed_initial_priority        :  3;  /* RW */
+	unsigned long	use_incoming_priority              :  1;  /* RW */
+	unsigned long	enable_programmed_initial_priority :  1;  /* RW */
+	unsigned long	rsvd_29_63    : 35;
+    } s;
+    struct uv1h_lb_bau_misc_control_s {
+	unsigned long	rejection_delay                    :  8;  /* RW */
+	unsigned long	apic_mode                          :  1;  /* RW */
+	unsigned long	force_broadcast                    :  1;  /* RW */
+	unsigned long	force_lock_nop                     :  1;  /* RW */
+	unsigned long	qpi_agent_presence_vector          :  3;  /* RW */
 	unsigned long	descriptor_fetch_mode              :  1;  /* RW */
 	unsigned long	enable_intd_soft_ack_mode          :  1;  /* RW */
 	unsigned long	intd_soft_ack_timeout_period       :  4;  /* RW */
@@ -673,14 +1019,40 @@
 	unsigned long	enable_programmed_initial_priority :  1;  /* RW */
 	unsigned long	rsvd_29_47                         : 19;  /*    */
 	unsigned long	fun                                : 16;  /* RW */
-    } s;
+    } s1;
+    struct uv2h_lb_bau_misc_control_s {
+	unsigned long	rejection_delay                      :  8;  /* RW */
+	unsigned long	apic_mode                            :  1;  /* RW */
+	unsigned long	force_broadcast                      :  1;  /* RW */
+	unsigned long	force_lock_nop                       :  1;  /* RW */
+	unsigned long	qpi_agent_presence_vector            :  3;  /* RW */
+	unsigned long	descriptor_fetch_mode                :  1;  /* RW */
+	unsigned long	enable_intd_soft_ack_mode            :  1;  /* RW */
+	unsigned long	intd_soft_ack_timeout_period         :  4;  /* RW */
+	unsigned long	enable_dual_mapping_mode             :  1;  /* RW */
+	unsigned long	vga_io_port_decode_enable            :  1;  /* RW */
+	unsigned long	vga_io_port_16_bit_decode            :  1;  /* RW */
+	unsigned long	suppress_dest_registration           :  1;  /* RW */
+	unsigned long	programmed_initial_priority          :  3;  /* RW */
+	unsigned long	use_incoming_priority                :  1;  /* RW */
+	unsigned long	enable_programmed_initial_priority   :  1;  /* RW */
+	unsigned long	enable_automatic_apic_mode_selection :  1;  /* RW */
+	unsigned long	apic_mode_status                     :  1;  /* RO */
+	unsigned long	suppress_interrupts_to_self          :  1;  /* RW */
+	unsigned long	enable_lock_based_system_flush       :  1;  /* RW */
+	unsigned long	enable_extended_sb_status            :  1;  /* RW */
+	unsigned long	suppress_int_prio_udt_to_self        :  1;  /* RW */
+	unsigned long	use_legacy_descriptor_formats        :  1;  /* RW */
+	unsigned long	rsvd_36_47                           : 12;  /*    */
+	unsigned long	fun                                  : 16;  /* RW */
+    } s2;
 };
 
 /* ========================================================================= */
 /*                     UVH_LB_BAU_SB_ACTIVATION_CONTROL                      */
 /* ========================================================================= */
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009a8
+#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
 
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL
@@ -703,7 +1075,7 @@
 /*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_0                      */
 /* ========================================================================= */
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009b0
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
 
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL
@@ -719,7 +1091,7 @@
 /*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_1                      */
 /* ========================================================================= */
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009b8
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
 
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL
@@ -735,7 +1107,7 @@
 /*                      UVH_LB_BAU_SB_DESCRIPTOR_BASE                        */
 /* ========================================================================= */
 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009a0
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
 
 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
@@ -754,23 +1126,6 @@
 };
 
 /* ========================================================================= */
-/*                   UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK                     */
-/* ========================================================================= */
-#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL
-#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0
-
-#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
-#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
-
-union uvh_lb_target_physical_apic_id_mask_u {
-	unsigned long v;
-	struct uvh_lb_target_physical_apic_id_mask_s {
-		unsigned long bit_enables : 32;  /* RW */
-		unsigned long rsvd_32_63  : 32;  /*    */
-	} s;
-};
-
-/* ========================================================================= */
 /*                               UVH_NODE_ID                                 */
 /* ========================================================================= */
 #define UVH_NODE_ID 0x0UL
@@ -785,10 +1140,36 @@
 #define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
 #define UVH_NODE_ID_NODE_ID_SHFT 32
 #define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
-#define UVH_NODE_ID_NODES_PER_BIT_SHFT 48
-#define UVH_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL
-#define UVH_NODE_ID_NI_PORT_SHFT 56
-#define UVH_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL
+
+#define UV1H_NODE_ID_FORCE1_SHFT 0
+#define UV1H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
+#define UV1H_NODE_ID_MANUFACTURER_SHFT 1
+#define UV1H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
+#define UV1H_NODE_ID_PART_NUMBER_SHFT 12
+#define UV1H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
+#define UV1H_NODE_ID_REVISION_SHFT 28
+#define UV1H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
+#define UV1H_NODE_ID_NODE_ID_SHFT 32
+#define UV1H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UV1H_NODE_ID_NODES_PER_BIT_SHFT 48
+#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL
+#define UV1H_NODE_ID_NI_PORT_SHFT 56
+#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL
+
+#define UV2H_NODE_ID_FORCE1_SHFT 0
+#define UV2H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
+#define UV2H_NODE_ID_MANUFACTURER_SHFT 1
+#define UV2H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
+#define UV2H_NODE_ID_PART_NUMBER_SHFT 12
+#define UV2H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
+#define UV2H_NODE_ID_REVISION_SHFT 28
+#define UV2H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
+#define UV2H_NODE_ID_NODE_ID_SHFT 32
+#define UV2H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UV2H_NODE_ID_NODES_PER_BIT_SHFT 50
+#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
+#define UV2H_NODE_ID_NI_PORT_SHFT 57
+#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
 
 union uvh_node_id_u {
     unsigned long	v;
@@ -798,12 +1179,31 @@
 	unsigned long	part_number   : 16;  /* RO */
 	unsigned long	revision      :  4;  /* RO */
 	unsigned long	node_id       : 15;  /* RW */
+	unsigned long	rsvd_47_63    : 17;
+    } s;
+    struct uv1h_node_id_s {
+	unsigned long	force1        :  1;  /* RO */
+	unsigned long	manufacturer  : 11;  /* RO */
+	unsigned long	part_number   : 16;  /* RO */
+	unsigned long	revision      :  4;  /* RO */
+	unsigned long	node_id       : 15;  /* RW */
 	unsigned long	rsvd_47       :  1;  /*    */
 	unsigned long	nodes_per_bit :  7;  /* RW */
 	unsigned long	rsvd_55       :  1;  /*    */
 	unsigned long	ni_port       :  4;  /* RO */
 	unsigned long	rsvd_60_63    :  4;  /*    */
-    } s;
+    } s1;
+    struct uv2h_node_id_s {
+	unsigned long	force1        :  1;  /* RO */
+	unsigned long	manufacturer  : 11;  /* RO */
+	unsigned long	part_number   : 16;  /* RO */
+	unsigned long	revision      :  4;  /* RO */
+	unsigned long	node_id       : 15;  /* RW */
+	unsigned long	rsvd_47_49    :  3;  /*    */
+	unsigned long	nodes_per_bit :  7;  /* RO */
+	unsigned long	ni_port       :  5;  /* RO */
+	unsigned long	rsvd_62_63    :  2;  /*    */
+    } s2;
 };
 
 /* ========================================================================= */
@@ -954,18 +1354,38 @@
 #define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
 #define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
 #define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12
-#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL
+
+#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
+#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
+#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
+#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
+#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12
+#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL
+
+#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
+#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
+#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
+#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
 
 union uvh_rh_gam_config_mmr_u {
     unsigned long	v;
     struct uvh_rh_gam_config_mmr_s {
 	unsigned long	m_skt     :  6;  /* RW */
 	unsigned long	n_skt     :  4;  /* RW */
+	unsigned long	rsvd_10_63    : 54;
+    } s;
+    struct uv1h_rh_gam_config_mmr_s {
+	unsigned long	m_skt     :  6;  /* RW */
+	unsigned long	n_skt     :  4;  /* RW */
 	unsigned long	rsvd_10_11:  2;  /*    */
 	unsigned long	mmiol_cfg :  1;  /* RW */
 	unsigned long	rsvd_13_63: 51;  /*    */
-    } s;
+    } s1;
+    struct uv2h_rh_gam_config_mmr_s {
+	unsigned long	m_skt :  6;  /* RW */
+	unsigned long	n_skt :  4;  /* RW */
+	unsigned long	rsvd_10_63: 54;  /*    */
+    } s2;
 };
 
 /* ========================================================================= */
@@ -975,25 +1395,49 @@
 
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
+#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
+#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
+#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL
+#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
+#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
+#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
 
 union uvh_rh_gam_gru_overlay_config_mmr_u {
     unsigned long	v;
     struct uvh_rh_gam_gru_overlay_config_mmr_s {
 	unsigned long	rsvd_0_27: 28;  /*    */
 	unsigned long	base   : 18;  /* RW */
+	unsigned long	rsvd_46_62    : 17;
+	unsigned long	enable :  1;  /* RW */
+    } s;
+    struct uv1h_rh_gam_gru_overlay_config_mmr_s {
+	unsigned long	rsvd_0_27: 28;  /*    */
+	unsigned long	base   : 18;  /* RW */
 	unsigned long	rsvd_46_47:  2;  /*    */
 	unsigned long	gr4    :  1;  /* RW */
 	unsigned long	rsvd_49_51:  3;  /*    */
 	unsigned long	n_gru  :  4;  /* RW */
 	unsigned long	rsvd_56_62:  7;  /*    */
 	unsigned long	enable :  1;  /* RW */
-    } s;
+    } s1;
+    struct uv2h_rh_gam_gru_overlay_config_mmr_s {
+	unsigned long	rsvd_0_27: 28;  /*    */
+	unsigned long	base   : 18;  /* RW */
+	unsigned long	rsvd_46_51:  6;  /*    */
+	unsigned long	n_gru  :  4;  /* RW */
+	unsigned long	rsvd_56_62:  7;  /*    */
+	unsigned long	enable :  1;  /* RW */
+    } s2;
 };
 
 /* ========================================================================= */
@@ -1001,25 +1445,42 @@
 /* ========================================================================= */
 #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
 
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff8000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
 
 union uvh_rh_gam_mmioh_overlay_config_mmr_u {
     unsigned long	v;
-    struct uvh_rh_gam_mmioh_overlay_config_mmr_s {
+    struct uv1h_rh_gam_mmioh_overlay_config_mmr_s {
 	unsigned long	rsvd_0_29: 30;  /*    */
 	unsigned long	base   : 16;  /* RW */
 	unsigned long	m_io   :  6;  /* RW */
 	unsigned long	n_io   :  4;  /* RW */
 	unsigned long	rsvd_56_62:  7;  /*    */
 	unsigned long	enable :  1;  /* RW */
-    } s;
+    } s1;
+    struct uv2h_rh_gam_mmioh_overlay_config_mmr_s {
+	unsigned long	rsvd_0_26: 27;  /*    */
+	unsigned long	base   : 19;  /* RW */
+	unsigned long	m_io   :  6;  /* RW */
+	unsigned long	n_io   :  4;  /* RW */
+	unsigned long	rsvd_56_62:  7;  /*    */
+	unsigned long	enable :  1;  /* RW */
+    } s2;
 };
 
 /* ========================================================================= */
@@ -1029,20 +1490,40 @@
 
 #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
 #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46
+#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL
+#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
 
 union uvh_rh_gam_mmr_overlay_config_mmr_u {
     unsigned long	v;
     struct uvh_rh_gam_mmr_overlay_config_mmr_s {
 	unsigned long	rsvd_0_25: 26;  /*    */
 	unsigned long	base     : 20;  /* RW */
+	unsigned long	rsvd_46_62    : 17;
+	unsigned long	enable   :  1;  /* RW */
+    } s;
+    struct uv1h_rh_gam_mmr_overlay_config_mmr_s {
+	unsigned long	rsvd_0_25: 26;  /*    */
+	unsigned long	base     : 20;  /* RW */
 	unsigned long	dual_hub :  1;  /* RW */
 	unsigned long	rsvd_47_62: 16;  /*    */
 	unsigned long	enable   :  1;  /* RW */
-    } s;
+    } s1;
+    struct uv2h_rh_gam_mmr_overlay_config_mmr_s {
+	unsigned long	rsvd_0_25: 26;  /*    */
+	unsigned long	base   : 20;  /* RW */
+	unsigned long	rsvd_46_62: 17;  /*    */
+	unsigned long	enable :  1;  /* RW */
+    } s2;
 };
 
 /* ========================================================================= */
@@ -1103,10 +1584,11 @@
 /*                               UVH_SCRATCH5                                */
 /* ========================================================================= */
 #define UVH_SCRATCH5 0x2d0200UL
-#define UVH_SCRATCH5_32 0x00778
+#define UVH_SCRATCH5_32 0x778
 
 #define UVH_SCRATCH5_SCRATCH5_SHFT 0
 #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+
 union uvh_scratch5_u {
     unsigned long	v;
     struct uvh_scratch5_s {
@@ -1114,4 +1596,154 @@
     } s;
 };
 
+/* ========================================================================= */
+/*                           UV2H_EVENT_OCCURRED2                            */
+/* ========================================================================= */
+#define UV2H_EVENT_OCCURRED2 0x70100UL
+#define UV2H_EVENT_OCCURRED2_32 0xb68
+
+#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0
+#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1
+#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2
+#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3
+#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4
+#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5
+#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6
+#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7
+#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8
+#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9
+#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10
+#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11
+#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12
+#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13
+#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14
+#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15
+#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16
+#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17
+#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18
+#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19
+#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20
+#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21
+#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22
+#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23
+#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24
+#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25
+#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26
+#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27
+#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28
+#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29
+#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30
+#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31
+#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+
+union uv2h_event_occurred2_u {
+    unsigned long	v;
+    struct uv2h_event_occurred2_s {
+	unsigned long	rtc_0  :  1;  /* RW */
+	unsigned long	rtc_1  :  1;  /* RW */
+	unsigned long	rtc_2  :  1;  /* RW */
+	unsigned long	rtc_3  :  1;  /* RW */
+	unsigned long	rtc_4  :  1;  /* RW */
+	unsigned long	rtc_5  :  1;  /* RW */
+	unsigned long	rtc_6  :  1;  /* RW */
+	unsigned long	rtc_7  :  1;  /* RW */
+	unsigned long	rtc_8  :  1;  /* RW */
+	unsigned long	rtc_9  :  1;  /* RW */
+	unsigned long	rtc_10 :  1;  /* RW */
+	unsigned long	rtc_11 :  1;  /* RW */
+	unsigned long	rtc_12 :  1;  /* RW */
+	unsigned long	rtc_13 :  1;  /* RW */
+	unsigned long	rtc_14 :  1;  /* RW */
+	unsigned long	rtc_15 :  1;  /* RW */
+	unsigned long	rtc_16 :  1;  /* RW */
+	unsigned long	rtc_17 :  1;  /* RW */
+	unsigned long	rtc_18 :  1;  /* RW */
+	unsigned long	rtc_19 :  1;  /* RW */
+	unsigned long	rtc_20 :  1;  /* RW */
+	unsigned long	rtc_21 :  1;  /* RW */
+	unsigned long	rtc_22 :  1;  /* RW */
+	unsigned long	rtc_23 :  1;  /* RW */
+	unsigned long	rtc_24 :  1;  /* RW */
+	unsigned long	rtc_25 :  1;  /* RW */
+	unsigned long	rtc_26 :  1;  /* RW */
+	unsigned long	rtc_27 :  1;  /* RW */
+	unsigned long	rtc_28 :  1;  /* RW */
+	unsigned long	rtc_29 :  1;  /* RW */
+	unsigned long	rtc_30 :  1;  /* RW */
+	unsigned long	rtc_31 :  1;  /* RW */
+	unsigned long	rsvd_32_63: 32;  /*    */
+    } s1;
+};
+
+/* ========================================================================= */
+/*                        UV2H_EVENT_OCCURRED2_ALIAS                         */
+/* ========================================================================= */
+#define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL
+#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70
+
+/* ========================================================================= */
+/*                    UV2H_LB_BAU_SB_ACTIVATION_STATUS_2                     */
+/* ========================================================================= */
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
+
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+
+union uv2h_lb_bau_sb_activation_status_2_u {
+    unsigned long	v;
+    struct uv2h_lb_bau_sb_activation_status_2_s {
+	unsigned long	aux_error : 64;  /* RW */
+    } s1;
+};
+
+/* ========================================================================= */
+/*                   UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK                    */
+/* ========================================================================= */
+#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL
+#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x9f0
+
+#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
+#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
+
+union uv1h_lb_target_physical_apic_id_mask_u {
+    unsigned long	v;
+    struct uv1h_lb_target_physical_apic_id_mask_s {
+	unsigned long	bit_enables : 32;  /* RW */
+	unsigned long	rsvd_32_63  : 32;  /*    */
+    } s1;
+};
+
+
 #endif /* __ASM_UV_MMRS_X86_H__ */

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f5abe3a..90b06d4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile

@@ -8,6 +8,7 @@
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
+CFLAGS_REMOVE_tsc.o = -pg
 CFLAGS_REMOVE_rtc.o = -pg
 CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 CFLAGS_REMOVE_pvclock.o = -pg
@@ -28,6 +29,7 @@
 GCOV_PROFILE_vsyscall_64.o	:= n
 GCOV_PROFILE_hpet.o		:= n
 GCOV_PROFILE_tsc.o		:= n
+GCOV_PROFILE_vread_tsc_64.o	:= n
 GCOV_PROFILE_paravirt.o		:= n
 
 # vread_tsc_64 is hot and should be fully optimized:

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f450b68..b511a01 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c

@@ -91,6 +91,10 @@
 	m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
 	uv_min_hub_revision_id = node_id.s.revision;
 
+	if (node_id.s.part_number == UV2_HUB_PART_NUMBER)
+		uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
+
+	uv_hub_info->hub_revision = uv_min_hub_revision_id;
 	pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
 	return pnode;
 }
@@ -112,17 +116,25 @@
  */
 static void __init uv_set_apicid_hibit(void)
 {
-	union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
+	union uv1h_lb_target_physical_apic_id_mask_u apicid_mask;
 
-	apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK);
-	uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
+	if (is_uv1_hub()) {
+		apicid_mask.v =
+			uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK);
+		uv_apicid_hibits =
+			apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK;
+	}
 }
 
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-	int pnodeid;
+	int pnodeid, is_uv1, is_uv2;
 
-	if (!strcmp(oem_id, "SGI")) {
+	is_uv1 = !strcmp(oem_id, "SGI");
+	is_uv2 = !strcmp(oem_id, "SGI2");
+	if (is_uv1 || is_uv2) {
+		uv_hub_info->hub_revision =
+			is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE;
 		pnodeid = early_get_pnodeid();
 		early_get_apic_pnode_shift();
 		x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
@@ -484,12 +496,19 @@
 static __init void map_mmioh_high(int max_pnode)
 {
 	union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
-	int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+	int shift;
 
 	mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
-	if (mmioh.s.enable)
-		map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io,
+	if (is_uv1_hub() && mmioh.s1.enable) {
+		shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+		map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io,
 			max_pnode, map_uc);
+	}
+	if (is_uv2_hub() && mmioh.s2.enable) {
+		shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+		map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io,
+			max_pnode, map_uc);
+	}
 }
 
 static __init void map_low_mmrs(void)
@@ -736,13 +755,14 @@
 	unsigned long mmr_base, present, paddr;
 	unsigned short pnode_mask, pnode_io_mask;
 
+	printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2");
 	map_low_mmrs();
 
 	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
 	m_val = m_n_config.s.m_skt;
 	n_val = m_n_config.s.n_skt;
 	mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
-	n_io = mmioh.s.n_io;
+	n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io;
 	mmr_base =
 	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
 	    ~UV_MMR_ENABLE;
@@ -811,6 +831,8 @@
 		 */
 		uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
 		uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
+		uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
+
 		pnode = uv_apicid_to_pnode(apicid);
 		blade = boot_pnode_to_blade(pnode);
 		lcpu = uv_blade_info[blade].nr_possible_cpus;

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 3bfa022..965a766 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c

@@ -361,6 +361,7 @@
  * idle percentage above which bios idle calls are done
  */
 #ifdef CONFIG_APM_CPU_IDLE
+#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012
 #define DEFAULT_IDLE_THRESHOLD	95
 #else
 #define DEFAULT_IDLE_THRESHOLD	100
@@ -904,6 +905,7 @@
 	unsigned int jiffies_since_last_check = jiffies - last_jiffies;
 	unsigned int bucket;
 
+	WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
 recalc:
 	if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
 		use_apm_idle = 0;

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 8f5cabb..b13ed39 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c

@@ -612,8 +612,11 @@
 	}
 #endif
 
-	/* As a rule processors have APIC timer running in deep C states */
-	if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
+	/*
+	 * Family 0x12 and above processors have APIC timer
+	 * running in deep C states.
+	 */
+	if (c->x86 > 0x11)
 		set_cpu_cap(c, X86_FEATURE_ARAT);
 
 	/*

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c39576c..525514c 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c

@@ -19,6 +19,7 @@
 
 static int __init no_halt(char *s)
 {
+	WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n");
 	boot_cpu_data.hlt_works_ok = 0;
 	return 1;
 }

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c8b4162..22a073d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c

@@ -477,13 +477,6 @@
 	if (smp_num_siblings <= 1)
 		goto out;
 
-	if (smp_num_siblings > nr_cpu_ids) {
-		pr_warning("CPU: Unsupported number of siblings %d",
-			   smp_num_siblings);
-		smp_num_siblings = 1;
-		return;
-	}
-
 	index_msb = get_count_order(smp_num_siblings);
 	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
 
@@ -909,7 +902,7 @@
 void __init identify_boot_cpu(void)
 {
 	identify_cpu(&boot_cpu_data);
-	init_c1e_mask();
+	init_amd_e400_c1e_mask();
 #ifdef CONFIG_X86_32
 	sysenter_setup();
 	enable_sep_cpu();

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0ba15a6..c9a281f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c

@@ -123,7 +123,7 @@
 static atomic_t nmi_running = ATOMIC_INIT(0);
 static int mod_code_status;		/* holds return value of text write */
 static void *mod_code_ip;		/* holds the IP to write to */
-static void *mod_code_newcode;		/* holds the text to write to the IP */
+static const void *mod_code_newcode;	/* holds the text to write to the IP */
 
 static unsigned nmi_wait_count;
 static atomic_t nmi_update_count = ATOMIC_INIT(0);
@@ -225,7 +225,7 @@
 }
 
 static int
-do_ftrace_mod_code(unsigned long ip, void *new_code)
+do_ftrace_mod_code(unsigned long ip, const void *new_code)
 {
 	/*
 	 * On x86_64, kernel text mappings are mapped read-only with
@@ -266,8 +266,8 @@
 }
 
 static int
-ftrace_modify_code(unsigned long ip, unsigned char *old_code,
-		   unsigned char *new_code)
+ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+		   unsigned const char *new_code)
 {
 	unsigned char replaced[MCOUNT_INSN_SIZE];
 
@@ -301,7 +301,7 @@
 int ftrace_make_nop(struct module *mod,
 		    struct dyn_ftrace *rec, unsigned long addr)
 {
-	unsigned char *new, *old;
+	unsigned const char *new, *old;
 	unsigned long ip = rec->ip;
 
 	old = ftrace_call_replace(ip, addr);
@@ -312,7 +312,7 @@
 
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
-	unsigned char *new, *old;
+	unsigned const char *new, *old;
 	unsigned long ip = rec->ip;
 
 	old = ftrace_nop_replace();

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 88a90a9..2e4928d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c

@@ -337,7 +337,9 @@
  * Powermanagement idle function, if any..
  */
 void (*pm_idle)(void);
+#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE)
 EXPORT_SYMBOL(pm_idle);
+#endif
 
 #ifdef CONFIG_X86_32
 /*
@@ -397,7 +399,7 @@
 		cpu_relax();
 	}
 }
-#ifdef CONFIG_APM_MODULE
+#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE)
 EXPORT_SYMBOL(default_idle);
 #endif
 
@@ -535,45 +537,45 @@
 	return (edx & MWAIT_EDX_C1);
 }
 
-bool c1e_detected;
-EXPORT_SYMBOL(c1e_detected);
+bool amd_e400_c1e_detected;
+EXPORT_SYMBOL(amd_e400_c1e_detected);
 
-static cpumask_var_t c1e_mask;
+static cpumask_var_t amd_e400_c1e_mask;
 
-void c1e_remove_cpu(int cpu)
+void amd_e400_remove_cpu(int cpu)
 {
-	if (c1e_mask != NULL)
-		cpumask_clear_cpu(cpu, c1e_mask);
+	if (amd_e400_c1e_mask != NULL)
+		cpumask_clear_cpu(cpu, amd_e400_c1e_mask);
 }
 
 /*
- * C1E aware idle routine. We check for C1E active in the interrupt
+ * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt
  * pending message MSR. If we detect C1E, then we handle it the same
  * way as C3 power states (local apic timer and TSC stop)
  */
-static void c1e_idle(void)
+static void amd_e400_idle(void)
 {
 	if (need_resched())
 		return;
 
-	if (!c1e_detected) {
+	if (!amd_e400_c1e_detected) {
 		u32 lo, hi;
 
 		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
 
 		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
-			c1e_detected = true;
+			amd_e400_c1e_detected = true;
 			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 				mark_tsc_unstable("TSC halt in AMD C1E");
 			printk(KERN_INFO "System has AMD C1E enabled\n");
 		}
 	}
 
-	if (c1e_detected) {
+	if (amd_e400_c1e_detected) {
 		int cpu = smp_processor_id();
 
-		if (!cpumask_test_cpu(cpu, c1e_mask)) {
-			cpumask_set_cpu(cpu, c1e_mask);
+		if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
+			cpumask_set_cpu(cpu, amd_e400_c1e_mask);
 			/*
 			 * Force broadcast so ACPI can not interfere.
 			 */
@@ -616,17 +618,17 @@
 		pm_idle = mwait_idle;
 	} else if (cpu_has_amd_erratum(amd_erratum_400)) {
 		/* E400: APIC timer interrupt does not wake up CPU from C1e */
-		printk(KERN_INFO "using C1E aware idle routine\n");
-		pm_idle = c1e_idle;
+		printk(KERN_INFO "using AMD E400 aware idle routine\n");
+		pm_idle = amd_e400_idle;
 	} else
 		pm_idle = default_idle;
 }
 
-void __init init_c1e_mask(void)
+void __init init_amd_e400_c1e_mask(void)
 {
-	/* If we're using c1e_idle, we need to allocate c1e_mask. */
-	if (pm_idle == c1e_idle)
-		zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
+	/* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
+	if (pm_idle == amd_e400_idle)
+		zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
 }
 
 static int __init idle_setup(char *str)
@@ -640,6 +642,7 @@
 		boot_option_idle_override = IDLE_POLL;
 	} else if (!strcmp(str, "mwait")) {
 		boot_option_idle_override = IDLE_FORCE_MWAIT;
+		WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
 	} else if (!strcmp(str, "halt")) {
 		/*
 		 * When the boot option of idle=halt is added, halt is

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a3e5948..afaf384 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c

@@ -910,6 +910,13 @@
 	memblock.current_limit = get_max_mapped();
 	memblock_x86_fill();
 
+	/*
+	 * The EFI specification says that boot service code won't be called
+	 * after ExitBootServices(). This is, in fact, a lie.
+	 */
+	if (efi_enabled)
+		efi_reserve_boot_services();
+
 	/* preallocate 4k for mptable mpc */
 	early_reserve_e820_mpc_new();
 

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a3c430b..33a0c11 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c

@@ -1307,7 +1307,7 @@
 {
 	idle_task_exit();
 	reset_lazy_tlbstate();
-	c1e_remove_cpu(raw_smp_processor_id());
+	amd_e400_remove_cpu(raw_smp_processor_id());
 
 	mb();
 	/* Ack it */
@@ -1332,7 +1332,7 @@
 	void *mwait_ptr;
 	struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
 
-	if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
+	if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
 		return;
 	if (!this_cpu_has(X86_FEATURE_CLFLSH))
 		return;

diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 32cbffb..fbb0a04 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S

@@ -345,3 +345,4 @@
 	.long sys_clock_adjtime
 	.long sys_syncfs
 	.long sys_sendmmsg		/* 345 */
+	.long sys_setns

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index e191c09..db832fd 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c

@@ -993,6 +993,7 @@
 static void lguest_time_init(void)
 {
 	/* Set up the timer interrupt (0) to go to our simple timer routine */
+	lguest_setup_irq(0);
 	irq_set_handler(0, lguest_time_irq);
 
 	clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f7a2a05..2dbf6bf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c

@@ -823,16 +823,30 @@
 	force_sig_info_fault(SIGBUS, code, address, tsk, fault);
 }
 
-static noinline void
+static noinline int
 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	       unsigned long address, unsigned int fault)
 {
+	/*
+	 * Pagefault was interrupted by SIGKILL. We have no reason to
+	 * continue pagefault.
+	 */
+	if (fatal_signal_pending(current)) {
+		if (!(fault & VM_FAULT_RETRY))
+			up_read(&current->mm->mmap_sem);
+		if (!(error_code & PF_USER))
+			no_context(regs, error_code, address);
+		return 1;
+	}
+	if (!(fault & VM_FAULT_ERROR))
+		return 0;
+
 	if (fault & VM_FAULT_OOM) {
 		/* Kernel mode? Handle exceptions or die: */
 		if (!(error_code & PF_USER)) {
 			up_read(&current->mm->mmap_sem);
 			no_context(regs, error_code, address);
-			return;
+			return 1;
 		}
 
 		out_of_memory(regs, error_code, address);
@@ -843,6 +857,7 @@
 		else
 			BUG();
 	}
+	return 1;
 }
 
 static int spurious_fault_check(unsigned long error_code, pte_t *pte)
@@ -1133,19 +1148,9 @@
 	 */
 	fault = handle_mm_fault(mm, vma, address, flags);
 
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		mm_fault_error(regs, error_code, address, fault);
-		return;
-	}
-
-	/*
-	 * Pagefault was interrupted by SIGKILL. We have no reason to
-	 * continue pagefault.
-	 */
-	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
-		if (!(error_code & PF_USER))
-			no_context(regs, error_code, address);
-		return;
+	if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
+		if (mm_fault_error(regs, error_code, address, fault))
+			return;
 	}
 
 	/*

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index c3b8e24..9fd8a56 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c

@@ -316,16 +316,23 @@
 		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 }
 
-static inline int eilvt_is_available(int offset)
+static inline int get_eilvt(int offset)
 {
-	/* check if we may assign a vector */
 	return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
 }
 
+static inline int put_eilvt(int offset)
+{
+	return !setup_APIC_eilvt(offset, 0, 0, 1);
+}
+
 static inline int ibs_eilvt_valid(void)
 {
 	int offset;
 	u64 val;
+	int valid = 0;
+
+	preempt_disable();
 
 	rdmsrl(MSR_AMD64_IBSCTL, val);
 	offset = val & IBSCTL_LVT_OFFSET_MASK;
@@ -333,16 +340,20 @@
 	if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
 		pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
 		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-		return 0;
+		goto out;
 	}
 
-	if (!eilvt_is_available(offset)) {
+	if (!get_eilvt(offset)) {
 		pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
 		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-		return 0;
+		goto out;
 	}
 
-	return 1;
+	valid = 1;
+out:
+	preempt_enable();
+
+	return valid;
 }
 
 static inline int get_ibs_offset(void)
@@ -600,67 +611,69 @@
 
 static int force_ibs_eilvt_setup(void)
 {
-	int i;
+	int offset;
 	int ret;
 
-	/* find the next free available EILVT entry */
-	for (i = 1; i < 4; i++) {
-		if (!eilvt_is_available(i))
-			continue;
-		ret = setup_ibs_ctl(i);
-		if (ret)
-			return ret;
-		pr_err(FW_BUG "using offset %d for IBS interrupts\n", i);
-		return 0;
+	/*
+	 * find the next free available EILVT entry, skip offset 0,
+	 * pin search to this cpu
+	 */
+	preempt_disable();
+	for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
+		if (get_eilvt(offset))
+			break;
+	}
+	preempt_enable();
+
+	if (offset == APIC_EILVT_NR_MAX) {
+		printk(KERN_DEBUG "No EILVT entry available\n");
+		return -EBUSY;
 	}
 
-	printk(KERN_DEBUG "No EILVT entry available\n");
-
-	return -EBUSY;
-}
-
-static int __init_ibs_nmi(void)
-{
-	int ret;
-
-	if (ibs_eilvt_valid())
-		return 0;
-
-	ret = force_ibs_eilvt_setup();
+	ret = setup_ibs_ctl(offset);
 	if (ret)
-		return ret;
+		goto out;
 
-	if (!ibs_eilvt_valid())
-		return -EFAULT;
+	if (!ibs_eilvt_valid()) {
+		ret = -EFAULT;
+		goto out;
+	}
 
+	pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
 	pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
 
 	return 0;
+out:
+	preempt_disable();
+	put_eilvt(offset);
+	preempt_enable();
+	return ret;
 }
 
 /*
  * check and reserve APIC extended interrupt LVT offset for IBS if
  * available
- *
- * init_ibs() preforms implicitly cpu-local operations, so pin this
- * thread to its current CPU
  */
 
 static void init_ibs(void)
 {
-	preempt_disable();
-
 	ibs_caps = get_ibs_caps();
+
 	if (!ibs_caps)
+		return;
+
+	if (ibs_eilvt_valid())
 		goto out;
 
-	if (__init_ibs_nmi() < 0)
-		ibs_caps = 0;
-	else
-		printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
+	if (!force_ibs_eilvt_setup())
+		goto out;
+
+	/* Failed to setup ibs */
+	ibs_caps = 0;
+	return;
 
 out:
-	preempt_enable();
+	printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 }
 
 static int (*create_arch_files)(struct super_block *sb, struct dentry *root);

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index b30aa26..0d3a4fa 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c

@@ -304,6 +304,40 @@
 }
 #endif  /*  EFI_DEBUG  */
 
+void __init efi_reserve_boot_services(void)
+{
+	void *p;
+
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		efi_memory_desc_t *md = p;
+		unsigned long long start = md->phys_addr;
+		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+
+		if (md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+
+		memblock_x86_reserve_range(start, start + size, "EFI Boot");
+	}
+}
+
+static void __init efi_free_boot_services(void)
+{
+	void *p;
+
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		efi_memory_desc_t *md = p;
+		unsigned long long start = md->phys_addr;
+		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+
+		if (md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+
+		free_bootmem_late(start, size);
+	}
+}
+
 void __init efi_init(void)
 {
 	efi_config_table_t *config_tables;
@@ -536,7 +570,9 @@
 
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
-		if (!(md->attribute & EFI_MEMORY_RUNTIME))
+		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+		    md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
 
 		size = md->num_pages << EFI_PAGE_SHIFT;
@@ -593,6 +629,13 @@
 	}
 
 	/*
+	 * Thankfully, it does seem that no runtime services other than
+	 * SetVirtualAddressMap() will touch boot services code, so we can
+	 * get rid of it all at this point
+	 */
+	efi_free_boot_services();
+
+	/*
 	 * Now that EFI is in virtual mode, update the function
 	 * pointers in the runtime service table to the new virtual addresses.
 	 *

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 2649426..ac3aa54 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c

@@ -49,10 +49,11 @@
 	if (!(__supported_pte_mask & _PAGE_NX))
 		return;
 
-	/* Make EFI runtime service code area executable */
+	/* Make EFI service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
-		if (md->type == EFI_RUNTIME_SERVICES_CODE)
+		if (md->type == EFI_RUNTIME_SERVICES_CODE ||
+		    md->type == EFI_BOOT_SERVICES_CODE)
 			efi_set_executable(md, executable);
 	}
 }

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index c58e0ea..68e467f 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c

@@ -1,7 +1,7 @@
 /*
  *	SGI UltraViolet TLB flush routines.
  *
- *	(c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI.
+ *	(c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI.
  *
  *	This code is released under the GNU General Public License version 2 or
  *	later.
@@ -35,6 +35,7 @@
 		5242880,
 		167772160
 };
+
 static int timeout_us;
 static int nobau;
 static int baudisabled;
@@ -42,20 +43,70 @@
 static cycles_t congested_cycles;
 
 /* tunables: */
-static int max_bau_concurrent = MAX_BAU_CONCURRENT;
-static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT;
-static int plugged_delay = PLUGGED_DELAY;
-static int plugsb4reset = PLUGSB4RESET;
-static int timeoutsb4reset = TIMEOUTSB4RESET;
-static int ipi_reset_limit = IPI_RESET_LIMIT;
-static int complete_threshold = COMPLETE_THRESHOLD;
-static int congested_response_us = CONGESTED_RESPONSE_US;
-static int congested_reps = CONGESTED_REPS;
-static int congested_period = CONGESTED_PERIOD;
+static int max_concurr		= MAX_BAU_CONCURRENT;
+static int max_concurr_const	= MAX_BAU_CONCURRENT;
+static int plugged_delay	= PLUGGED_DELAY;
+static int plugsb4reset		= PLUGSB4RESET;
+static int timeoutsb4reset	= TIMEOUTSB4RESET;
+static int ipi_reset_limit	= IPI_RESET_LIMIT;
+static int complete_threshold	= COMPLETE_THRESHOLD;
+static int congested_respns_us	= CONGESTED_RESPONSE_US;
+static int congested_reps	= CONGESTED_REPS;
+static int congested_period	= CONGESTED_PERIOD;
+
+static struct tunables tunables[] = {
+	{&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
+	{&plugged_delay, PLUGGED_DELAY},
+	{&plugsb4reset, PLUGSB4RESET},
+	{&timeoutsb4reset, TIMEOUTSB4RESET},
+	{&ipi_reset_limit, IPI_RESET_LIMIT},
+	{&complete_threshold, COMPLETE_THRESHOLD},
+	{&congested_respns_us, CONGESTED_RESPONSE_US},
+	{&congested_reps, CONGESTED_REPS},
+	{&congested_period, CONGESTED_PERIOD}
+};
+
 static struct dentry *tunables_dir;
 static struct dentry *tunables_file;
 
-static int __init setup_nobau(char *arg)
+/* these correspond to the statistics printed by ptc_seq_show() */
+static char *stat_description[] = {
+	"sent:     number of shootdown messages sent",
+	"stime:    time spent sending messages",
+	"numuvhubs: number of hubs targeted with shootdown",
+	"numuvhubs16: number times 16 or more hubs targeted",
+	"numuvhubs8: number times 8 or more hubs targeted",
+	"numuvhubs4: number times 4 or more hubs targeted",
+	"numuvhubs2: number times 2 or more hubs targeted",
+	"numuvhubs1: number times 1 hub targeted",
+	"numcpus:  number of cpus targeted with shootdown",
+	"dto:      number of destination timeouts",
+	"retries:  destination timeout retries sent",
+	"rok:   :  destination timeouts successfully retried",
+	"resetp:   ipi-style resource resets for plugs",
+	"resett:   ipi-style resource resets for timeouts",
+	"giveup:   fall-backs to ipi-style shootdowns",
+	"sto:      number of source timeouts",
+	"bz:       number of stay-busy's",
+	"throt:    number times spun in throttle",
+	"swack:   image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
+	"recv:     shootdown messages received",
+	"rtime:    time spent processing messages",
+	"all:      shootdown all-tlb messages",
+	"one:      shootdown one-tlb messages",
+	"mult:     interrupts that found multiple messages",
+	"none:     interrupts that found no messages",
+	"retry:    number of retry messages processed",
+	"canc:     number messages canceled by retries",
+	"nocan:    number retries that found nothing to cancel",
+	"reset:    number of ipi-style reset requests processed",
+	"rcan:     number messages canceled by reset requests",
+	"disable:  number times use of the BAU was disabled",
+	"enable:   number times use of the BAU was re-enabled"
+};
+
+static int __init
+setup_nobau(char *arg)
 {
 	nobau = 1;
 	return 0;
@@ -63,7 +114,7 @@
 early_param("nobau", setup_nobau);
 
 /* base pnode in this partition */
-static int uv_partition_base_pnode __read_mostly;
+static int uv_base_pnode __read_mostly;
 /* position of pnode (which is nasid>>1): */
 static int uv_nshift __read_mostly;
 static unsigned long uv_mmask __read_mostly;
@@ -109,60 +160,52 @@
  * clear of the Timeout bit (as well) will free the resource. No reply will
  * be sent (the hardware will only do one reply per message).
  */
-static inline void uv_reply_to_message(struct msg_desc *mdp,
-				       struct bau_control *bcp)
+static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp)
 {
 	unsigned long dw;
-	struct bau_payload_queue_entry *msg;
+	struct bau_pq_entry *msg;
 
 	msg = mdp->msg;
 	if (!msg->canceled) {
-		dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) |
-						msg->sw_ack_vector;
-		uv_write_local_mmr(
-				UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
+		dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
+		write_mmr_sw_ack(dw);
 	}
 	msg->replied_to = 1;
-	msg->sw_ack_vector = 0;
+	msg->swack_vec = 0;
 }
 
 /*
  * Process the receipt of a RETRY message
  */
-static inline void uv_bau_process_retry_msg(struct msg_desc *mdp,
-					    struct bau_control *bcp)
+static void bau_process_retry_msg(struct msg_desc *mdp,
+					struct bau_control *bcp)
 {
 	int i;
 	int cancel_count = 0;
-	int slot2;
 	unsigned long msg_res;
 	unsigned long mmr = 0;
-	struct bau_payload_queue_entry *msg;
-	struct bau_payload_queue_entry *msg2;
-	struct ptc_stats *stat;
+	struct bau_pq_entry *msg = mdp->msg;
+	struct bau_pq_entry *msg2;
+	struct ptc_stats *stat = bcp->statp;
 
-	msg = mdp->msg;
-	stat = bcp->statp;
 	stat->d_retries++;
 	/*
 	 * cancel any message from msg+1 to the retry itself
 	 */
 	for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
-		if (msg2 > mdp->va_queue_last)
-			msg2 = mdp->va_queue_first;
+		if (msg2 > mdp->queue_last)
+			msg2 = mdp->queue_first;
 		if (msg2 == msg)
 			break;
 
-		/* same conditions for cancellation as uv_do_reset */
+		/* same conditions for cancellation as do_reset */
 		if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
-		    (msg2->sw_ack_vector) && ((msg2->sw_ack_vector &
-			msg->sw_ack_vector) == 0) &&
+		    (msg2->swack_vec) && ((msg2->swack_vec &
+			msg->swack_vec) == 0) &&
 		    (msg2->sending_cpu == msg->sending_cpu) &&
 		    (msg2->msg_type != MSG_NOOP)) {
-			slot2 = msg2 - mdp->va_queue_first;
-			mmr = uv_read_local_mmr
-				(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-			msg_res = msg2->sw_ack_vector;
+			mmr = read_mmr_sw_ack();
+			msg_res = msg2->swack_vec;
 			/*
 			 * This is a message retry; clear the resources held
 			 * by the previous message only if they timed out.
@@ -170,6 +213,7 @@
 			 * situation to report.
 			 */
 			if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
+				unsigned long mr;
 				/*
 				 * is the resource timed out?
 				 * make everyone ignore the cancelled message.
@@ -177,10 +221,8 @@
 				msg2->canceled = 1;
 				stat->d_canceled++;
 				cancel_count++;
-				uv_write_local_mmr(
-				    UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
-					(msg_res << UV_SW_ACK_NPENDING) |
-					 msg_res);
+				mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
+				write_mmr_sw_ack(mr);
 			}
 		}
 	}
@@ -192,20 +234,19 @@
  * Do all the things a cpu should do for a TLB shootdown message.
  * Other cpu's may come here at the same time for this message.
  */
-static void uv_bau_process_message(struct msg_desc *mdp,
-				   struct bau_control *bcp)
+static void bau_process_message(struct msg_desc *mdp,
+					struct bau_control *bcp)
 {
-	int msg_ack_count;
 	short socket_ack_count = 0;
-	struct ptc_stats *stat;
-	struct bau_payload_queue_entry *msg;
+	short *sp;
+	struct atomic_short *asp;
+	struct ptc_stats *stat = bcp->statp;
+	struct bau_pq_entry *msg = mdp->msg;
 	struct bau_control *smaster = bcp->socket_master;
 
 	/*
 	 * This must be a normal message, or retry of a normal message
 	 */
-	msg = mdp->msg;
-	stat = bcp->statp;
 	if (msg->address == TLB_FLUSH_ALL) {
 		local_flush_tlb();
 		stat->d_alltlb++;
@@ -222,30 +263,32 @@
 	 * cpu number.
 	 */
 	if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
-		uv_bau_process_retry_msg(mdp, bcp);
+		bau_process_retry_msg(mdp, bcp);
 
 	/*
-	 * This is a sw_ack message, so we have to reply to it.
+	 * This is a swack message, so we have to reply to it.
 	 * Count each responding cpu on the socket. This avoids
 	 * pinging the count's cache line back and forth between
 	 * the sockets.
 	 */
-	socket_ack_count = atomic_add_short_return(1, (struct atomic_short *)
-			&smaster->socket_acknowledge_count[mdp->msg_slot]);
+	sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
+	asp = (struct atomic_short *)sp;
+	socket_ack_count = atom_asr(1, asp);
 	if (socket_ack_count == bcp->cpus_in_socket) {
+		int msg_ack_count;
 		/*
 		 * Both sockets dump their completed count total into
 		 * the message's count.
 		 */
 		smaster->socket_acknowledge_count[mdp->msg_slot] = 0;
-		msg_ack_count = atomic_add_short_return(socket_ack_count,
-				(struct atomic_short *)&msg->acknowledge_count);
+		asp = (struct atomic_short *)&msg->acknowledge_count;
+		msg_ack_count = atom_asr(socket_ack_count, asp);
 
 		if (msg_ack_count == bcp->cpus_in_uvhub) {
 			/*
 			 * All cpus in uvhub saw it; reply
 			 */
-			uv_reply_to_message(mdp, bcp);
+			reply_to_message(mdp, bcp);
 		}
 	}
 
@@ -268,62 +311,51 @@
  * Last resort when we get a large number of destination timeouts is
  * to clear resources held by a given cpu.
  * Do this with IPI so that all messages in the BAU message queue
- * can be identified by their nonzero sw_ack_vector field.
+ * can be identified by their nonzero swack_vec field.
  *
  * This is entered for a single cpu on the uvhub.
  * The sender want's this uvhub to free a specific message's
- * sw_ack resources.
+ * swack resources.
  */
-static void
-uv_do_reset(void *ptr)
+static void do_reset(void *ptr)
 {
 	int i;
-	int slot;
-	int count = 0;
-	unsigned long mmr;
-	unsigned long msg_res;
-	struct bau_control *bcp;
-	struct reset_args *rap;
-	struct bau_payload_queue_entry *msg;
-	struct ptc_stats *stat;
+	struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
+	struct reset_args *rap = (struct reset_args *)ptr;
+	struct bau_pq_entry *msg;
+	struct ptc_stats *stat = bcp->statp;
 
-	bcp = &per_cpu(bau_control, smp_processor_id());
-	rap = (struct reset_args *)ptr;
-	stat = bcp->statp;
 	stat->d_resets++;
-
 	/*
 	 * We're looking for the given sender, and
-	 * will free its sw_ack resource.
+	 * will free its swack resource.
 	 * If all cpu's finally responded after the timeout, its
 	 * message 'replied_to' was set.
 	 */
-	for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
-		/* uv_do_reset: same conditions for cancellation as
-		   uv_bau_process_retry_msg() */
+	for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
+		unsigned long msg_res;
+		/* do_reset: same conditions for cancellation as
+		   bau_process_retry_msg() */
 		if ((msg->replied_to == 0) &&
 		    (msg->canceled == 0) &&
 		    (msg->sending_cpu == rap->sender) &&
-		    (msg->sw_ack_vector) &&
+		    (msg->swack_vec) &&
 		    (msg->msg_type != MSG_NOOP)) {
+			unsigned long mmr;
+			unsigned long mr;
 			/*
 			 * make everyone else ignore this message
 			 */
 			msg->canceled = 1;
-			slot = msg - bcp->va_queue_first;
-			count++;
 			/*
 			 * only reset the resource if it is still pending
 			 */
-			mmr = uv_read_local_mmr
-					(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-			msg_res = msg->sw_ack_vector;
+			mmr = read_mmr_sw_ack();
+			msg_res = msg->swack_vec;
+			mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
 			if (mmr & msg_res) {
 				stat->d_rcanceled++;
-				uv_write_local_mmr(
-				    UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS,
-					(msg_res << UV_SW_ACK_NPENDING) |
-					 msg_res);
+				write_mmr_sw_ack(mr);
 			}
 		}
 	}
@@ -334,39 +366,38 @@
  * Use IPI to get all target uvhubs to release resources held by
  * a given sending cpu number.
  */
-static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution,
-			      int sender)
+static void reset_with_ipi(struct bau_targ_hubmask *distribution, int sender)
 {
 	int uvhub;
-	int cpu;
+	int maskbits;
 	cpumask_t mask;
 	struct reset_args reset_args;
 
 	reset_args.sender = sender;
-
 	cpus_clear(mask);
 	/* find a single cpu for each uvhub in this distribution mask */
-	for (uvhub = 0;
-		    uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE;
-		    uvhub++) {
+	maskbits = sizeof(struct bau_targ_hubmask) * BITSPERBYTE;
+	for (uvhub = 0; uvhub < maskbits; uvhub++) {
+		int cpu;
 		if (!bau_uvhub_isset(uvhub, distribution))
 			continue;
 		/* find a cpu for this uvhub */
 		cpu = uvhub_to_first_cpu(uvhub);
 		cpu_set(cpu, mask);
 	}
-	/* IPI all cpus; Preemption is already disabled */
-	smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1);
+
+	/* IPI all cpus; preemption is already disabled */
+	smp_call_function_many(&mask, do_reset, (void *)&reset_args, 1);
 	return;
 }
 
-static inline unsigned long
-cycles_2_us(unsigned long long cyc)
+static inline unsigned long cycles_2_us(unsigned long long cyc)
 {
 	unsigned long long ns;
 	unsigned long us;
-	ns =  (cyc * per_cpu(cyc2ns, smp_processor_id()))
-						>> CYC2NS_SCALE_FACTOR;
+	int cpu = smp_processor_id();
+
+	ns =  (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR;
 	us = ns / 1000;
 	return us;
 }
@@ -376,56 +407,56 @@
  * leaves uvhub_quiesce set so that no new broadcasts are started by
  * bau_flush_send_and_wait()
  */
-static inline void
-quiesce_local_uvhub(struct bau_control *hmaster)
+static inline void quiesce_local_uvhub(struct bau_control *hmaster)
 {
-	atomic_add_short_return(1, (struct atomic_short *)
-		 &hmaster->uvhub_quiesce);
+	atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
 }
 
 /*
  * mark this quiet-requestor as done
  */
-static inline void
-end_uvhub_quiesce(struct bau_control *hmaster)
+static inline void end_uvhub_quiesce(struct bau_control *hmaster)
 {
-	atomic_add_short_return(-1, (struct atomic_short *)
-		&hmaster->uvhub_quiesce);
+	atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
+}
+
+static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift)
+{
+	unsigned long descriptor_status;
+
+	descriptor_status = uv_read_local_mmr(mmr_offset);
+	descriptor_status >>= right_shift;
+	descriptor_status &= UV_ACT_STATUS_MASK;
+	return descriptor_status;
 }
 
 /*
  * Wait for completion of a broadcast software ack message
  * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
  */
-static int uv_wait_completion(struct bau_desc *bau_desc,
-	unsigned long mmr_offset, int right_shift, int this_cpu,
-	struct bau_control *bcp, struct bau_control *smaster, long try)
+static int uv1_wait_completion(struct bau_desc *bau_desc,
+				unsigned long mmr_offset, int right_shift,
+				struct bau_control *bcp, long try)
 {
 	unsigned long descriptor_status;
-	cycles_t ttime;
+	cycles_t ttm;
 	struct ptc_stats *stat = bcp->statp;
-	struct bau_control *hmaster;
 
-	hmaster = bcp->uvhub_master;
-
+	descriptor_status = uv1_read_status(mmr_offset, right_shift);
 	/* spin on the status MMR, waiting for it to go idle */
-	while ((descriptor_status = (((unsigned long)
-		uv_read_local_mmr(mmr_offset) >>
-			right_shift) & UV_ACT_STATUS_MASK)) !=
-			DESC_STATUS_IDLE) {
+	while ((descriptor_status != DS_IDLE)) {
 		/*
-		 * Our software ack messages may be blocked because there are
-		 * no swack resources available.  As long as none of them
-		 * has timed out hardware will NACK our message and its
-		 * state will stay IDLE.
+		 * Our software ack messages may be blocked because
+		 * there are no swack resources available.  As long
+		 * as none of them has timed out hardware will NACK
+		 * our message and its state will stay IDLE.
 		 */
-		if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
+		if (descriptor_status == DS_SOURCE_TIMEOUT) {
 			stat->s_stimeout++;
 			return FLUSH_GIVEUP;
-		} else if (descriptor_status ==
-					DESC_STATUS_DESTINATION_TIMEOUT) {
+		} else if (descriptor_status == DS_DESTINATION_TIMEOUT) {
 			stat->s_dtimeout++;
-			ttime = get_cycles();
+			ttm = get_cycles();
 
 			/*
 			 * Our retries may be blocked by all destination
@@ -433,8 +464,7 @@
 			 * pending.  In that case hardware returns the
 			 * ERROR that looks like a destination timeout.
 			 */
-			if (cycles_2_us(ttime - bcp->send_message) <
-							timeout_us) {
+			if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
 				bcp->conseccompletes = 0;
 				return FLUSH_RETRY_PLUGGED;
 			}
@@ -447,13 +477,106 @@
 			 */
 			cpu_relax();
 		}
+		descriptor_status = uv1_read_status(mmr_offset, right_shift);
 	}
 	bcp->conseccompletes++;
 	return FLUSH_COMPLETE;
 }
 
-static inline cycles_t
-sec_2_cycles(unsigned long sec)
+/*
+ * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
+ */
+static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu)
+{
+	unsigned long descriptor_status;
+	unsigned long descriptor_status2;
+
+	descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
+	descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL;
+	descriptor_status = (descriptor_status << 1) | descriptor_status2;
+	return descriptor_status;
+}
+
+static int uv2_wait_completion(struct bau_desc *bau_desc,
+				unsigned long mmr_offset, int right_shift,
+				struct bau_control *bcp, long try)
+{
+	unsigned long descriptor_stat;
+	cycles_t ttm;
+	int cpu = bcp->uvhub_cpu;
+	struct ptc_stats *stat = bcp->statp;
+
+	descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu);
+
+	/* spin on the status MMR, waiting for it to go idle */
+	while (descriptor_stat != UV2H_DESC_IDLE) {
+		/*
+		 * Our software ack messages may be blocked because
+		 * there are no swack resources available.  As long
+		 * as none of them has timed out hardware will NACK
+		 * our message and its state will stay IDLE.
+		 */
+		if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) ||
+		    (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) ||
+		    (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) {
+			stat->s_stimeout++;
+			return FLUSH_GIVEUP;
+		} else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
+			stat->s_dtimeout++;
+			ttm = get_cycles();
+			/*
+			 * Our retries may be blocked by all destination
+			 * swack resources being consumed, and a timeout
+			 * pending.  In that case hardware returns the
+			 * ERROR that looks like a destination timeout.
+			 */
+			if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
+				bcp->conseccompletes = 0;
+				return FLUSH_RETRY_PLUGGED;
+			}
+			bcp->conseccompletes = 0;
+			return FLUSH_RETRY_TIMEOUT;
+		} else {
+			/*
+			 * descriptor_stat is still BUSY
+			 */
+			cpu_relax();
+		}
+		descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu);
+	}
+	bcp->conseccompletes++;
+	return FLUSH_COMPLETE;
+}
+
+/*
+ * There are 2 status registers; each and array[32] of 2 bits. Set up for
+ * which register to read and position in that register based on cpu in
+ * current hub.
+ */
+static int wait_completion(struct bau_desc *bau_desc,
+				struct bau_control *bcp, long try)
+{
+	int right_shift;
+	unsigned long mmr_offset;
+	int cpu = bcp->uvhub_cpu;
+
+	if (cpu < UV_CPUS_PER_AS) {
+		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
+		right_shift = cpu * UV_ACT_STATUS_SIZE;
+	} else {
+		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
+		right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
+	}
+
+	if (is_uv1_hub())
+		return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
+								bcp, try);
+	else
+		return uv2_wait_completion(bau_desc, mmr_offset, right_shift,
+								bcp, try);
+}
+
+static inline cycles_t sec_2_cycles(unsigned long sec)
 {
 	unsigned long ns;
 	cycles_t cyc;
@@ -464,63 +587,50 @@
 }
 
 /*
- * conditionally add 1 to *v, unless *v is >= u
- * return 0 if we cannot add 1 to *v because it is >= u
- * return 1 if we can add 1 to *v because it is < u
- * the add is atomic
- *
- * This is close to atomic_add_unless(), but this allows the 'u' value
- * to be lowered below the current 'v'.  atomic_add_unless can only stop
- * on equal.
- */
-static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
-{
-	spin_lock(lock);
-	if (atomic_read(v) >= u) {
-		spin_unlock(lock);
-		return 0;
-	}
-	atomic_inc(v);
-	spin_unlock(lock);
-	return 1;
-}
-
-/*
- * Our retries are blocked by all destination swack resources being
+ * Our retries are blocked by all destination sw ack resources being
  * in use, and a timeout is pending. In that case hardware immediately
  * returns the ERROR that looks like a destination timeout.
  */
-static void
-destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp,
+static void destination_plugged(struct bau_desc *bau_desc,
+			struct bau_control *bcp,
 			struct bau_control *hmaster, struct ptc_stats *stat)
 {
 	udelay(bcp->plugged_delay);
 	bcp->plugged_tries++;
+
 	if (bcp->plugged_tries >= bcp->plugsb4reset) {
 		bcp->plugged_tries = 0;
+
 		quiesce_local_uvhub(hmaster);
+
 		spin_lock(&hmaster->queue_lock);
-		uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
+		reset_with_ipi(&bau_desc->distribution, bcp->cpu);
 		spin_unlock(&hmaster->queue_lock);
+
 		end_uvhub_quiesce(hmaster);
+
 		bcp->ipi_attempts++;
 		stat->s_resets_plug++;
 	}
 }
 
-static void
-destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp,
-			struct bau_control *hmaster, struct ptc_stats *stat)
+static void destination_timeout(struct bau_desc *bau_desc,
+			struct bau_control *bcp, struct bau_control *hmaster,
+			struct ptc_stats *stat)
 {
-	hmaster->max_bau_concurrent = 1;
+	hmaster->max_concurr = 1;
 	bcp->timeout_tries++;
 	if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
 		bcp->timeout_tries = 0;
+
 		quiesce_local_uvhub(hmaster);
+
 		spin_lock(&hmaster->queue_lock);
-		uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu);
+		reset_with_ipi(&bau_desc->distribution, bcp->cpu);
 		spin_unlock(&hmaster->queue_lock);
+
 		end_uvhub_quiesce(hmaster);
+
 		bcp->ipi_attempts++;
 		stat->s_resets_timeout++;
 	}
@@ -530,34 +640,104 @@
  * Completions are taking a very long time due to a congested numalink
  * network.
  */
-static void
-disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat)
+static void disable_for_congestion(struct bau_control *bcp,
+					struct ptc_stats *stat)
 {
-	int tcpu;
-	struct bau_control *tbcp;
-
 	/* let only one cpu do this disabling */
 	spin_lock(&disable_lock);
+
 	if (!baudisabled && bcp->period_requests &&
 	    ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
+		int tcpu;
+		struct bau_control *tbcp;
 		/* it becomes this cpu's job to turn on the use of the
 		   BAU again */
 		baudisabled = 1;
 		bcp->set_bau_off = 1;
-		bcp->set_bau_on_time = get_cycles() +
-			sec_2_cycles(bcp->congested_period);
+		bcp->set_bau_on_time = get_cycles();
+		bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period);
 		stat->s_bau_disabled++;
 		for_each_present_cpu(tcpu) {
 			tbcp = &per_cpu(bau_control, tcpu);
-				tbcp->baudisabled = 1;
+			tbcp->baudisabled = 1;
 		}
 	}
+
 	spin_unlock(&disable_lock);
 }
 
-/**
- * uv_flush_send_and_wait
- *
+static void count_max_concurr(int stat, struct bau_control *bcp,
+				struct bau_control *hmaster)
+{
+	bcp->plugged_tries = 0;
+	bcp->timeout_tries = 0;
+	if (stat != FLUSH_COMPLETE)
+		return;
+	if (bcp->conseccompletes <= bcp->complete_threshold)
+		return;
+	if (hmaster->max_concurr >= hmaster->max_concurr_const)
+		return;
+	hmaster->max_concurr++;
+}
+
+static void record_send_stats(cycles_t time1, cycles_t time2,
+		struct bau_control *bcp, struct ptc_stats *stat,
+		int completion_status, int try)
+{
+	cycles_t elapsed;
+
+	if (time2 > time1) {
+		elapsed = time2 - time1;
+		stat->s_time += elapsed;
+
+		if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
+			bcp->period_requests++;
+			bcp->period_time += elapsed;
+			if ((elapsed > congested_cycles) &&
+			    (bcp->period_requests > bcp->cong_reps))
+				disable_for_congestion(bcp, stat);
+		}
+	} else
+		stat->s_requestor--;
+
+	if (completion_status == FLUSH_COMPLETE && try > 1)
+		stat->s_retriesok++;
+	else if (completion_status == FLUSH_GIVEUP)
+		stat->s_giveup++;
+}
+
+/*
+ * Because of a uv1 hardware bug only a limited number of concurrent
+ * requests can be made.
+ */
+static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
+{
+	spinlock_t *lock = &hmaster->uvhub_lock;
+	atomic_t *v;
+
+	v = &hmaster->active_descriptor_count;
+	if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) {
+		stat->s_throttles++;
+		do {
+			cpu_relax();
+		} while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr));
+	}
+}
+
+/*
+ * Handle the completion status of a message send.
+ */
+static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
+			struct bau_control *bcp, struct bau_control *hmaster,
+			struct ptc_stats *stat)
+{
+	if (completion_status == FLUSH_RETRY_PLUGGED)
+		destination_plugged(bau_desc, bcp, hmaster, stat);
+	else if (completion_status == FLUSH_RETRY_TIMEOUT)
+		destination_timeout(bau_desc, bcp, hmaster, stat);
+}
+
+/*
  * Send a broadcast and wait for it to complete.
  *
  * The flush_mask contains the cpus the broadcast is to be sent to including
@@ -568,44 +748,23 @@
  * returned to the kernel.
  */
 int uv_flush_send_and_wait(struct bau_desc *bau_desc,
-			   struct cpumask *flush_mask, struct bau_control *bcp)
+			struct cpumask *flush_mask, struct bau_control *bcp)
 {
-	int right_shift;
-	int completion_status = 0;
 	int seq_number = 0;
+	int completion_stat = 0;
 	long try = 0;
-	int cpu = bcp->uvhub_cpu;
-	int this_cpu = bcp->cpu;
-	unsigned long mmr_offset;
 	unsigned long index;
 	cycles_t time1;
 	cycles_t time2;
-	cycles_t elapsed;
 	struct ptc_stats *stat = bcp->statp;
-	struct bau_control *smaster = bcp->socket_master;
 	struct bau_control *hmaster = bcp->uvhub_master;
 
-	if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
-			&hmaster->active_descriptor_count,
-			hmaster->max_bau_concurrent)) {
-		stat->s_throttles++;
-		do {
-			cpu_relax();
-		} while (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
-			&hmaster->active_descriptor_count,
-			hmaster->max_bau_concurrent));
-	}
+	if (is_uv1_hub())
+		uv1_throttle(hmaster, stat);
+
 	while (hmaster->uvhub_quiesce)
 		cpu_relax();
 
-	if (cpu < UV_CPUS_PER_ACT_STATUS) {
-		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
-		right_shift = cpu * UV_ACT_STATUS_SIZE;
-	} else {
-		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
-		right_shift =
-		    ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
-	}
 	time1 = get_cycles();
 	do {
 		if (try == 0) {
@@ -615,64 +774,134 @@
 			bau_desc->header.msg_type = MSG_RETRY;
 			stat->s_retry_messages++;
 		}
-		bau_desc->header.sequence = seq_number;
-		index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
-			bcp->uvhub_cpu;
-		bcp->send_message = get_cycles();
-		uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
-		try++;
-		completion_status = uv_wait_completion(bau_desc, mmr_offset,
-			right_shift, this_cpu, bcp, smaster, try);
 
-		if (completion_status == FLUSH_RETRY_PLUGGED) {
-			destination_plugged(bau_desc, bcp, hmaster, stat);
-		} else if (completion_status == FLUSH_RETRY_TIMEOUT) {
-			destination_timeout(bau_desc, bcp, hmaster, stat);
-		}
+		bau_desc->header.sequence = seq_number;
+		index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
+		bcp->send_message = get_cycles();
+
+		write_mmr_activation(index);
+
+		try++;
+		completion_stat = wait_completion(bau_desc, bcp, try);
+
+		handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
+
 		if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
 			bcp->ipi_attempts = 0;
-			completion_status = FLUSH_GIVEUP;
+			completion_stat = FLUSH_GIVEUP;
 			break;
 		}
 		cpu_relax();
-	} while ((completion_status == FLUSH_RETRY_PLUGGED) ||
-		 (completion_status == FLUSH_RETRY_TIMEOUT));
+	} while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
+		 (completion_stat == FLUSH_RETRY_TIMEOUT));
+
 	time2 = get_cycles();
-	bcp->plugged_tries = 0;
-	bcp->timeout_tries = 0;
-	if ((completion_status == FLUSH_COMPLETE) &&
-	    (bcp->conseccompletes > bcp->complete_threshold) &&
-	    (hmaster->max_bau_concurrent <
-					hmaster->max_bau_concurrent_constant))
-			hmaster->max_bau_concurrent++;
+
+	count_max_concurr(completion_stat, bcp, hmaster);
+
 	while (hmaster->uvhub_quiesce)
 		cpu_relax();
+
 	atomic_dec(&hmaster->active_descriptor_count);
-	if (time2 > time1) {
-		elapsed = time2 - time1;
-		stat->s_time += elapsed;
-		if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
-			bcp->period_requests++;
-			bcp->period_time += elapsed;
-			if ((elapsed > congested_cycles) &&
-			    (bcp->period_requests > bcp->congested_reps)) {
-				disable_for_congestion(bcp, stat);
-			}
-		}
-	} else
-		stat->s_requestor--;
-	if (completion_status == FLUSH_COMPLETE && try > 1)
-		stat->s_retriesok++;
-	else if (completion_status == FLUSH_GIVEUP) {
-		stat->s_giveup++;
+
+	record_send_stats(time1, time2, bcp, stat, completion_stat, try);
+
+	if (completion_stat == FLUSH_GIVEUP)
 		return 1;
-	}
 	return 0;
 }
 
-/**
- * uv_flush_tlb_others - globally purge translation cache of a virtual
- * address or all TLB's
+/*
+ * The BAU is disabled. When the disabled time period has expired, the cpu
+ * that disabled it must re-enable it.
+ * Return 0 if it is re-enabled for all cpus.
+ */
+static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
+{
+	int tcpu;
+	struct bau_control *tbcp;
+
+	if (bcp->set_bau_off) {
+		if (get_cycles() >= bcp->set_bau_on_time) {
+			stat->s_bau_reenabled++;
+			baudisabled = 0;
+			for_each_present_cpu(tcpu) {
+				tbcp = &per_cpu(bau_control, tcpu);
+				tbcp->baudisabled = 0;
+				tbcp->period_requests = 0;
+				tbcp->period_time = 0;
+			}
+			return 0;
+		}
+	}
+	return -1;
+}
+
+static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
+				int remotes, struct bau_desc *bau_desc)
+{
+	stat->s_requestor++;
+	stat->s_ntargcpu += remotes + locals;
+	stat->s_ntargremotes += remotes;
+	stat->s_ntarglocals += locals;
+
+	/* uvhub statistics */
+	hubs = bau_uvhub_weight(&bau_desc->distribution);
+	if (locals) {
+		stat->s_ntarglocaluvhub++;
+		stat->s_ntargremoteuvhub += (hubs - 1);
+	} else
+		stat->s_ntargremoteuvhub += hubs;
+
+	stat->s_ntarguvhub += hubs;
+
+	if (hubs >= 16)
+		stat->s_ntarguvhub16++;
+	else if (hubs >= 8)
+		stat->s_ntarguvhub8++;
+	else if (hubs >= 4)
+		stat->s_ntarguvhub4++;
+	else if (hubs >= 2)
+		stat->s_ntarguvhub2++;
+	else
+		stat->s_ntarguvhub1++;
+}
+
+/*
+ * Translate a cpu mask to the uvhub distribution mask in the BAU
+ * activation descriptor.
+ */
+static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
+			struct bau_desc *bau_desc, int *localsp, int *remotesp)
+{
+	int cpu;
+	int pnode;
+	int cnt = 0;
+	struct hub_and_pnode *hpp;
+
+	for_each_cpu(cpu, flush_mask) {
+		/*
+		 * The distribution vector is a bit map of pnodes, relative
+		 * to the partition base pnode (and the partition base nasid
+		 * in the header).
+		 * Translate cpu to pnode and hub using a local memory array.
+		 */
+		hpp = &bcp->socket_master->thp[cpu];
+		pnode = hpp->pnode - bcp->partition_base_pnode;
+		bau_uvhub_set(pnode, &bau_desc->distribution);
+		cnt++;
+		if (hpp->uvhub == bcp->uvhub)
+			(*localsp)++;
+		else
+			(*remotesp)++;
+	}
+	if (!cnt)
+		return 1;
+	return 0;
+}
+
+/*
+ * globally purge translation cache of a virtual address or all TLB's
  * @cpumask: mask of all cpu's in which the address is to be removed
  * @mm: mm_struct containing virtual address range
  * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
@@ -696,20 +925,16 @@
  * done.  The returned pointer is valid till preemption is re-enabled.
  */
 const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
-					  struct mm_struct *mm,
-					  unsigned long va, unsigned int cpu)
+				struct mm_struct *mm, unsigned long va,
+				unsigned int cpu)
 {
 	int locals = 0;
 	int remotes = 0;
 	int hubs = 0;
-	int tcpu;
-	int tpnode;
 	struct bau_desc *bau_desc;
 	struct cpumask *flush_mask;
 	struct ptc_stats *stat;
 	struct bau_control *bcp;
-	struct bau_control *tbcp;
-	struct hub_and_pnode *hpp;
 
 	/* kernel was booted 'nobau' */
 	if (nobau)
@@ -720,20 +945,8 @@
 
 	/* bau was disabled due to slow response */
 	if (bcp->baudisabled) {
-		/* the cpu that disabled it must re-enable it */
-		if (bcp->set_bau_off) {
-			if (get_cycles() >= bcp->set_bau_on_time) {
-				stat->s_bau_reenabled++;
-				baudisabled = 0;
-				for_each_present_cpu(tcpu) {
-					tbcp = &per_cpu(bau_control, tcpu);
-					tbcp->baudisabled = 0;
-					tbcp->period_requests = 0;
-					tbcp->period_time = 0;
-				}
-			}
-		}
-		return cpumask;
+		if (check_enable(bcp, stat))
+			return cpumask;
 	}
 
 	/*
@@ -744,59 +957,20 @@
 	flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
 	/* don't actually do a shootdown of the local cpu */
 	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
+
 	if (cpu_isset(cpu, *cpumask))
 		stat->s_ntargself++;
 
 	bau_desc = bcp->descriptor_base;
-	bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
+	bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu;
 	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
-
-	for_each_cpu(tcpu, flush_mask) {
-		/*
-		 * The distribution vector is a bit map of pnodes, relative
-		 * to the partition base pnode (and the partition base nasid
-		 * in the header).
-		 * Translate cpu to pnode and hub using an array stored
-		 * in local memory.
-		 */
-		hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
-		tpnode = hpp->pnode - bcp->partition_base_pnode;
-		bau_uvhub_set(tpnode, &bau_desc->distribution);
-		if (hpp->uvhub == bcp->uvhub)
-			locals++;
-		else
-			remotes++;
-	}
-	if ((locals + remotes) == 0)
+	if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
 		return NULL;
-	stat->s_requestor++;
-	stat->s_ntargcpu += remotes + locals;
-	stat->s_ntargremotes += remotes;
-	stat->s_ntarglocals += locals;
-	remotes = bau_uvhub_weight(&bau_desc->distribution);
 
-	/* uvhub statistics */
-	hubs = bau_uvhub_weight(&bau_desc->distribution);
-	if (locals) {
-		stat->s_ntarglocaluvhub++;
-		stat->s_ntargremoteuvhub += (hubs - 1);
-	} else
-		stat->s_ntargremoteuvhub += hubs;
-	stat->s_ntarguvhub += hubs;
-	if (hubs >= 16)
-		stat->s_ntarguvhub16++;
-	else if (hubs >= 8)
-		stat->s_ntarguvhub8++;
-	else if (hubs >= 4)
-		stat->s_ntarguvhub4++;
-	else if (hubs >= 2)
-		stat->s_ntarguvhub2++;
-	else
-		stat->s_ntarguvhub1++;
+	record_send_statistics(stat, locals, hubs, remotes, bau_desc);
 
 	bau_desc->payload.address = va;
 	bau_desc->payload.sending_cpu = cpu;
-
 	/*
 	 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
 	 * or 1 if it gave up and the original cpumask should be returned.
@@ -825,26 +999,31 @@
 {
 	int count = 0;
 	cycles_t time_start;
-	struct bau_payload_queue_entry *msg;
+	struct bau_pq_entry *msg;
 	struct bau_control *bcp;
 	struct ptc_stats *stat;
 	struct msg_desc msgdesc;
 
 	time_start = get_cycles();
+
 	bcp = &per_cpu(bau_control, smp_processor_id());
 	stat = bcp->statp;
-	msgdesc.va_queue_first = bcp->va_queue_first;
-	msgdesc.va_queue_last = bcp->va_queue_last;
+
+	msgdesc.queue_first = bcp->queue_first;
+	msgdesc.queue_last = bcp->queue_last;
+
 	msg = bcp->bau_msg_head;
-	while (msg->sw_ack_vector) {
+	while (msg->swack_vec) {
 		count++;
-		msgdesc.msg_slot = msg - msgdesc.va_queue_first;
-		msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1;
+
+		msgdesc.msg_slot = msg - msgdesc.queue_first;
+		msgdesc.swack_slot = ffs(msg->swack_vec) - 1;
 		msgdesc.msg = msg;
-		uv_bau_process_message(&msgdesc, bcp);
+		bau_process_message(&msgdesc, bcp);
+
 		msg++;
-		if (msg > msgdesc.va_queue_last)
-			msg = msgdesc.va_queue_first;
+		if (msg > msgdesc.queue_last)
+			msg = msgdesc.queue_first;
 		bcp->bau_msg_head = msg;
 	}
 	stat->d_time += (get_cycles() - time_start);
@@ -852,18 +1031,17 @@
 		stat->d_nomsg++;
 	else if (count > 1)
 		stat->d_multmsg++;
+
 	ack_APIC_irq();
 }
 
 /*
- * uv_enable_timeouts
- *
- * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have
+ * Each target uvhub (i.e. a uvhub that has cpu's) needs to have
  * shootdown message timeouts enabled.  The timeout does not cause
  * an interrupt, but causes an error message to be returned to
  * the sender.
  */
-static void __init uv_enable_timeouts(void)
+static void __init enable_timeouts(void)
 {
 	int uvhub;
 	int nuvhubs;
@@ -877,47 +1055,44 @@
 			continue;
 
 		pnode = uv_blade_to_pnode(uvhub);
-		mmr_image =
-		    uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
+		mmr_image = read_mmr_misc_control(pnode);
 		/*
 		 * Set the timeout period and then lock it in, in three
 		 * steps; captures and locks in the period.
 		 *
 		 * To program the period, the SOFT_ACK_MODE must be off.
 		 */
-		mmr_image &= ~((unsigned long)1 <<
-		    UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
-		uv_write_global_mmr64
-		    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+		mmr_image &= ~(1L << SOFTACK_MSHIFT);
+		write_mmr_misc_control(pnode, mmr_image);
 		/*
 		 * Set the 4-bit period.
 		 */
-		mmr_image &= ~((unsigned long)0xf <<
-		     UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
-		mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
-		     UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT);
-		uv_write_global_mmr64
-		    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+		mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT);
+		mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT);
+		write_mmr_misc_control(pnode, mmr_image);
 		/*
+		 * UV1:
 		 * Subsequent reversals of the timebase bit (3) cause an
 		 * immediate timeout of one or all INTD resources as
 		 * indicated in bits 2:0 (7 causes all of them to timeout).
 		 */
-		mmr_image |= ((unsigned long)1 <<
-		    UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT);
-		uv_write_global_mmr64
-		    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+		mmr_image |= (1L << SOFTACK_MSHIFT);
+		if (is_uv2_hub()) {
+			mmr_image |= (1L << UV2_LEG_SHFT);
+			mmr_image |= (1L << UV2_EXT_SHFT);
+		}
+		write_mmr_misc_control(pnode, mmr_image);
 	}
 }
 
-static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
+static void *ptc_seq_start(struct seq_file *file, loff_t *offset)
 {
 	if (*offset < num_possible_cpus())
 		return offset;
 	return NULL;
 }
 
-static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
+static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
 {
 	(*offset)++;
 	if (*offset < num_possible_cpus())
@@ -925,12 +1100,11 @@
 	return NULL;
 }
 
-static void uv_ptc_seq_stop(struct seq_file *file, void *data)
+static void ptc_seq_stop(struct seq_file *file, void *data)
 {
 }
 
-static inline unsigned long long
-microsec_2_cycles(unsigned long microsec)
+static inline unsigned long long usec_2_cycles(unsigned long microsec)
 {
 	unsigned long ns;
 	unsigned long long cyc;
@@ -941,29 +1115,27 @@
 }
 
 /*
- * Display the statistics thru /proc.
+ * Display the statistics thru /proc/sgi_uv/ptc_statistics
  * 'data' points to the cpu number
+ * Note: see the descriptions in stat_description[].
  */
-static int uv_ptc_seq_show(struct seq_file *file, void *data)
+static int ptc_seq_show(struct seq_file *file, void *data)
 {
 	struct ptc_stats *stat;
 	int cpu;
 
 	cpu = *(loff_t *)data;
-
 	if (!cpu) {
 		seq_printf(file,
 			"# cpu sent stime self locals remotes ncpus localhub ");
 		seq_printf(file,
 			"remotehub numuvhubs numuvhubs16 numuvhubs8 ");
 		seq_printf(file,
-			"numuvhubs4 numuvhubs2 numuvhubs1 dto ");
+			"numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok ");
 		seq_printf(file,
-			"retries rok resetp resett giveup sto bz throt ");
+			"resetp resett giveup sto bz throt swack recv rtime ");
 		seq_printf(file,
-			"sw_ack recv rtime all ");
-		seq_printf(file,
-			"one mult none retry canc nocan reset rcan ");
+			"all one mult none retry canc nocan reset rcan ");
 		seq_printf(file,
 			"disable enable\n");
 	}
@@ -990,8 +1162,7 @@
 		/* destination side statistics */
 		seq_printf(file,
 			   "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
-			   uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
-					UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
+			   read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
 			   stat->d_requestee, cycles_2_us(stat->d_time),
 			   stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
 			   stat->d_nomsg, stat->d_retries, stat->d_canceled,
@@ -1000,7 +1171,6 @@
 		seq_printf(file, "%ld %ld\n",
 			stat->s_bau_disabled, stat->s_bau_reenabled);
 	}
-
 	return 0;
 }
 
@@ -1008,18 +1178,18 @@
  * Display the tunables thru debugfs
  */
 static ssize_t tunables_read(struct file *file, char __user *userbuf,
-						size_t count, loff_t *ppos)
+				size_t count, loff_t *ppos)
 {
 	char *buf;
 	int ret;
 
 	buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n",
-		"max_bau_concurrent plugged_delay plugsb4reset",
+		"max_concur plugged_delay plugsb4reset",
 		"timeoutsb4reset ipi_reset_limit complete_threshold",
 		"congested_response_us congested_reps congested_period",
-		max_bau_concurrent, plugged_delay, plugsb4reset,
+		max_concurr, plugged_delay, plugsb4reset,
 		timeoutsb4reset, ipi_reset_limit, complete_threshold,
-		congested_response_us, congested_reps, congested_period);
+		congested_respns_us, congested_reps, congested_period);
 
 	if (!buf)
 		return -ENOMEM;
@@ -1030,13 +1200,16 @@
 }
 
 /*
- * -1: resetf the statistics
+ * handle a write to /proc/sgi_uv/ptc_statistics
+ * -1: reset the statistics
  *  0: display meaning of the statistics
  */
-static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
-				 size_t count, loff_t *data)
+static ssize_t ptc_proc_write(struct file *file, const char __user *user,
+				size_t count, loff_t *data)
 {
 	int cpu;
+	int i;
+	int elements;
 	long input_arg;
 	char optstr[64];
 	struct ptc_stats *stat;
@@ -1046,79 +1219,18 @@
 	if (copy_from_user(optstr, user, count))
 		return -EFAULT;
 	optstr[count - 1] = '\0';
+
 	if (strict_strtol(optstr, 10, &input_arg) < 0) {
 		printk(KERN_DEBUG "%s is invalid\n", optstr);
 		return -EINVAL;
 	}
 
 	if (input_arg == 0) {
+		elements = sizeof(stat_description)/sizeof(*stat_description);
 		printk(KERN_DEBUG "# cpu:      cpu number\n");
 		printk(KERN_DEBUG "Sender statistics:\n");
-		printk(KERN_DEBUG
-		"sent:     number of shootdown messages sent\n");
-		printk(KERN_DEBUG
-		"stime:    time spent sending messages\n");
-		printk(KERN_DEBUG
-		"numuvhubs: number of hubs targeted with shootdown\n");
-		printk(KERN_DEBUG
-		"numuvhubs16: number times 16 or more hubs targeted\n");
-		printk(KERN_DEBUG
-		"numuvhubs8: number times 8 or more hubs targeted\n");
-		printk(KERN_DEBUG
-		"numuvhubs4: number times 4 or more hubs targeted\n");
-		printk(KERN_DEBUG
-		"numuvhubs2: number times 2 or more hubs targeted\n");
-		printk(KERN_DEBUG
-		"numuvhubs1: number times 1 hub targeted\n");
-		printk(KERN_DEBUG
-		"numcpus:  number of cpus targeted with shootdown\n");
-		printk(KERN_DEBUG
-		"dto:      number of destination timeouts\n");
-		printk(KERN_DEBUG
-		"retries:  destination timeout retries sent\n");
-		printk(KERN_DEBUG
-		"rok:   :  destination timeouts successfully retried\n");
-		printk(KERN_DEBUG
-		"resetp:   ipi-style resource resets for plugs\n");
-		printk(KERN_DEBUG
-		"resett:   ipi-style resource resets for timeouts\n");
-		printk(KERN_DEBUG
-		"giveup:   fall-backs to ipi-style shootdowns\n");
-		printk(KERN_DEBUG
-		"sto:      number of source timeouts\n");
-		printk(KERN_DEBUG
-		"bz:       number of stay-busy's\n");
-		printk(KERN_DEBUG
-		"throt:    number times spun in throttle\n");
-		printk(KERN_DEBUG "Destination side statistics:\n");
-		printk(KERN_DEBUG
-		"sw_ack:   image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
-		printk(KERN_DEBUG
-		"recv:     shootdown messages received\n");
-		printk(KERN_DEBUG
-		"rtime:    time spent processing messages\n");
-		printk(KERN_DEBUG
-		"all:      shootdown all-tlb messages\n");
-		printk(KERN_DEBUG
-		"one:      shootdown one-tlb messages\n");
-		printk(KERN_DEBUG
-		"mult:     interrupts that found multiple messages\n");
-		printk(KERN_DEBUG
-		"none:     interrupts that found no messages\n");
-		printk(KERN_DEBUG
-		"retry:    number of retry messages processed\n");
-		printk(KERN_DEBUG
-		"canc:     number messages canceled by retries\n");
-		printk(KERN_DEBUG
-		"nocan:    number retries that found nothing to cancel\n");
-		printk(KERN_DEBUG
-		"reset:    number of ipi-style reset requests processed\n");
-		printk(KERN_DEBUG
-		"rcan:     number messages canceled by reset requests\n");
-		printk(KERN_DEBUG
-		"disable:  number times use of the BAU was disabled\n");
-		printk(KERN_DEBUG
-		"enable:   number times use of the BAU was re-enabled\n");
+		for (i = 0; i < elements; i++)
+			printk(KERN_DEBUG "%s\n", stat_description[i]);
 	} else if (input_arg == -1) {
 		for_each_present_cpu(cpu) {
 			stat = &per_cpu(ptcstats, cpu);
@@ -1145,27 +1257,18 @@
 }
 
 /*
- * set the tunables
- * 0 values reset them to defaults
+ * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables.
+ * Zero values reset them to defaults.
  */
-static ssize_t tunables_write(struct file *file, const char __user *user,
-				 size_t count, loff_t *data)
+static int parse_tunables_write(struct bau_control *bcp, char *instr,
+				int count)
 {
-	int cpu;
-	int cnt = 0;
-	int val;
 	char *p;
 	char *q;
-	char instr[64];
-	struct bau_control *bcp;
+	int cnt = 0;
+	int val;
+	int e = sizeof(tunables) / sizeof(*tunables);
 
-	if (count == 0 || count > sizeof(instr)-1)
-		return -EINVAL;
-	if (copy_from_user(instr, user, count))
-		return -EFAULT;
-
-	instr[count] = '\0';
-	/* count the fields */
 	p = instr + strspn(instr, WHITESPACE);
 	q = p;
 	for (; *p; p = q + strspn(q, WHITESPACE)) {
@@ -1174,8 +1277,8 @@
 		if (q == p)
 			break;
 	}
-	if (cnt != 9) {
-		printk(KERN_INFO "bau tunable error: should be 9 numbers\n");
+	if (cnt != e) {
+		printk(KERN_INFO "bau tunable error: should be %d values\n", e);
 		return -EINVAL;
 	}
 
@@ -1187,97 +1290,80 @@
 		switch (cnt) {
 		case 0:
 			if (val == 0) {
-				max_bau_concurrent = MAX_BAU_CONCURRENT;
-				max_bau_concurrent_constant =
-							MAX_BAU_CONCURRENT;
+				max_concurr = MAX_BAU_CONCURRENT;
+				max_concurr_const = MAX_BAU_CONCURRENT;
 				continue;
 			}
-			bcp = &per_cpu(bau_control, smp_processor_id());
 			if (val < 1 || val > bcp->cpus_in_uvhub) {
 				printk(KERN_DEBUG
 				"Error: BAU max concurrent %d is invalid\n",
 				val);
 				return -EINVAL;
 			}
-			max_bau_concurrent = val;
-			max_bau_concurrent_constant = val;
+			max_concurr = val;
+			max_concurr_const = val;
 			continue;
-		case 1:
+		default:
 			if (val == 0)
-				plugged_delay = PLUGGED_DELAY;
+				*tunables[cnt].tunp = tunables[cnt].deflt;
 			else
-				plugged_delay = val;
-			continue;
-		case 2:
-			if (val == 0)
-				plugsb4reset = PLUGSB4RESET;
-			else
-				plugsb4reset = val;
-			continue;
-		case 3:
-			if (val == 0)
-				timeoutsb4reset = TIMEOUTSB4RESET;
-			else
-				timeoutsb4reset = val;
-			continue;
-		case 4:
-			if (val == 0)
-				ipi_reset_limit = IPI_RESET_LIMIT;
-			else
-				ipi_reset_limit = val;
-			continue;
-		case 5:
-			if (val == 0)
-				complete_threshold = COMPLETE_THRESHOLD;
-			else
-				complete_threshold = val;
-			continue;
-		case 6:
-			if (val == 0)
-				congested_response_us = CONGESTED_RESPONSE_US;
-			else
-				congested_response_us = val;
-			continue;
-		case 7:
-			if (val == 0)
-				congested_reps = CONGESTED_REPS;
-			else
-				congested_reps = val;
-			continue;
-		case 8:
-			if (val == 0)
-				congested_period = CONGESTED_PERIOD;
-			else
-				congested_period = val;
+				*tunables[cnt].tunp = val;
 			continue;
 		}
 		if (q == p)
 			break;
 	}
+	return 0;
+}
+
+/*
+ * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables)
+ */
+static ssize_t tunables_write(struct file *file, const char __user *user,
+				size_t count, loff_t *data)
+{
+	int cpu;
+	int ret;
+	char instr[100];
+	struct bau_control *bcp;
+
+	if (count == 0 || count > sizeof(instr)-1)
+		return -EINVAL;
+	if (copy_from_user(instr, user, count))
+		return -EFAULT;
+
+	instr[count] = '\0';
+
+	bcp = &per_cpu(bau_control, smp_processor_id());
+
+	ret = parse_tunables_write(bcp, instr, count);
+	if (ret)
+		return ret;
+
 	for_each_present_cpu(cpu) {
 		bcp = &per_cpu(bau_control, cpu);
-		bcp->max_bau_concurrent = max_bau_concurrent;
-		bcp->max_bau_concurrent_constant = max_bau_concurrent;
-		bcp->plugged_delay = plugged_delay;
-		bcp->plugsb4reset = plugsb4reset;
-		bcp->timeoutsb4reset = timeoutsb4reset;
-		bcp->ipi_reset_limit = ipi_reset_limit;
-		bcp->complete_threshold = complete_threshold;
-		bcp->congested_response_us = congested_response_us;
-		bcp->congested_reps = congested_reps;
-		bcp->congested_period = congested_period;
+		bcp->max_concurr =		max_concurr;
+		bcp->max_concurr_const =	max_concurr;
+		bcp->plugged_delay =		plugged_delay;
+		bcp->plugsb4reset =		plugsb4reset;
+		bcp->timeoutsb4reset =		timeoutsb4reset;
+		bcp->ipi_reset_limit =		ipi_reset_limit;
+		bcp->complete_threshold =	complete_threshold;
+		bcp->cong_response_us =		congested_respns_us;
+		bcp->cong_reps =		congested_reps;
+		bcp->cong_period =		congested_period;
 	}
 	return count;
 }
 
 static const struct seq_operations uv_ptc_seq_ops = {
-	.start		= uv_ptc_seq_start,
-	.next		= uv_ptc_seq_next,
-	.stop		= uv_ptc_seq_stop,
-	.show		= uv_ptc_seq_show
+	.start		= ptc_seq_start,
+	.next		= ptc_seq_next,
+	.stop		= ptc_seq_stop,
+	.show		= ptc_seq_show
 };
 
-static int uv_ptc_proc_open(struct inode *inode, struct file *file)
+static int ptc_proc_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &uv_ptc_seq_ops);
 }
@@ -1288,9 +1374,9 @@
 }
 
 static const struct file_operations proc_uv_ptc_operations = {
-	.open		= uv_ptc_proc_open,
+	.open		= ptc_proc_open,
 	.read		= seq_read,
-	.write		= uv_ptc_proc_write,
+	.write		= ptc_proc_write,
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
@@ -1324,7 +1410,7 @@
 		return -EINVAL;
 	}
 	tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
-			tunables_dir, NULL, &tunables_fops);
+					tunables_dir, NULL, &tunables_fops);
 	if (!tunables_file) {
 		printk(KERN_ERR "unable to create debugfs file %s\n",
 		       UV_BAU_TUNABLES_FILE);
@@ -1336,24 +1422,24 @@
 /*
  * Initialize the sending side's sending buffers.
  */
-static void
-uv_activation_descriptor_init(int node, int pnode, int base_pnode)
+static void activation_descriptor_init(int node, int pnode, int base_pnode)
 {
 	int i;
 	int cpu;
 	unsigned long pa;
 	unsigned long m;
 	unsigned long n;
+	size_t dsize;
 	struct bau_desc *bau_desc;
 	struct bau_desc *bd2;
 	struct bau_control *bcp;
 
 	/*
-	 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
-	 * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE)
+	 * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC)
+	 * per cpu; and one per cpu on the uvhub (ADP_SZ)
 	 */
-	bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
-				* UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
+	dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC;
+	bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
 	BUG_ON(!bau_desc);
 
 	pa = uv_gpa(bau_desc); /* need the real nasid*/
@@ -1361,27 +1447,25 @@
 	m = pa & uv_mmask;
 
 	/* the 14-bit pnode */
-	uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
-			      (n << UV_DESC_BASE_PNODE_SHIFT | m));
+	write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
 	/*
-	 * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+	 * Initializing all 8 (ITEMS_PER_DESC) descriptors for each
 	 * cpu even though we only use the first one; one descriptor can
 	 * describe a broadcast to 256 uv hubs.
 	 */
-	for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
-		i++, bd2++) {
+	for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
 		memset(bd2, 0, sizeof(struct bau_desc));
-		bd2->header.sw_ack_flag = 1;
+		bd2->header.swack_flag =	1;
 		/*
 		 * The base_dest_nasid set in the message header is the nasid
 		 * of the first uvhub in the partition. The bit map will
 		 * indicate destination pnode numbers relative to that base.
 		 * They may not be consecutive if nasid striding is being used.
 		 */
-		bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
-		bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
-		bd2->header.command = UV_NET_ENDPOINT_INTD;
-		bd2->header.int_both = 1;
+		bd2->header.base_dest_nasid =	UV_PNODE_TO_NASID(base_pnode);
+		bd2->header.dest_subnodeid =	UV_LB_SUBNODEID;
+		bd2->header.command =		UV_NET_ENDPOINT_INTD;
+		bd2->header.int_both =		1;
 		/*
 		 * all others need to be set to zero:
 		 *   fairness chaining multilevel count replied_to
@@ -1401,57 +1485,55 @@
  * - node is first node (kernel memory notion) on the uvhub
  * - pnode is the uvhub's physical identifier
  */
-static void
-uv_payload_queue_init(int node, int pnode)
+static void pq_init(int node, int pnode)
 {
-	int pn;
 	int cpu;
+	size_t plsize;
 	char *cp;
-	unsigned long pa;
-	struct bau_payload_queue_entry *pqp;
-	struct bau_payload_queue_entry *pqp_malloc;
+	void *vp;
+	unsigned long pn;
+	unsigned long first;
+	unsigned long pn_first;
+	unsigned long last;
+	struct bau_pq_entry *pqp;
 	struct bau_control *bcp;
 
-	pqp = kmalloc_node((DEST_Q_SIZE + 1)
-			   * sizeof(struct bau_payload_queue_entry),
-			   GFP_KERNEL, node);
+	plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
+	vp = kmalloc_node(plsize, GFP_KERNEL, node);
+	pqp = (struct bau_pq_entry *)vp;
 	BUG_ON(!pqp);
-	pqp_malloc = pqp;
 
 	cp = (char *)pqp + 31;
-	pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
+	pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
 
 	for_each_present_cpu(cpu) {
 		if (pnode != uv_cpu_to_pnode(cpu))
 			continue;
 		/* for every cpu on this pnode: */
 		bcp = &per_cpu(bau_control, cpu);
-		bcp->va_queue_first = pqp;
-		bcp->bau_msg_head = pqp;
-		bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
+		bcp->queue_first	= pqp;
+		bcp->bau_msg_head	= pqp;
+		bcp->queue_last		= pqp + (DEST_Q_SIZE - 1);
 	}
 	/*
 	 * need the pnode of where the memory was really allocated
 	 */
-	pa = uv_gpa(pqp);
-	pn = pa >> uv_nshift;
-	uv_write_global_mmr64(pnode,
-			      UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
-			      ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
-			      uv_physnodeaddr(pqp));
-	uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
-			      uv_physnodeaddr(pqp));
-	uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
-			      (unsigned long)
-			      uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)));
+	pn = uv_gpa(pqp) >> uv_nshift;
+	first = uv_physnodeaddr(pqp);
+	pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first;
+	last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1));
+	write_mmr_payload_first(pnode, pn_first);
+	write_mmr_payload_tail(pnode, first);
+	write_mmr_payload_last(pnode, last);
+
 	/* in effect, all msg_type's are set to MSG_NOOP */
-	memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
+	memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
 }
 
 /*
  * Initialization of each UV hub's structures
  */
-static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
+static void __init init_uvhub(int uvhub, int vector, int base_pnode)
 {
 	int node;
 	int pnode;
@@ -1459,24 +1541,24 @@
 
 	node = uvhub_to_first_node(uvhub);
 	pnode = uv_blade_to_pnode(uvhub);
-	uv_activation_descriptor_init(node, pnode, base_pnode);
-	uv_payload_queue_init(node, pnode);
+
+	activation_descriptor_init(node, pnode, base_pnode);
+
+	pq_init(node, pnode);
 	/*
 	 * The below initialization can't be in firmware because the
 	 * messaging IRQ will be determined by the OS.
 	 */
 	apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
-	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
-				      ((apicid << 32) | vector));
+	write_mmr_data_config(pnode, ((apicid << 32) | vector));
 }
 
 /*
  * We will set BAU_MISC_CONTROL with a timeout period.
  * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
- * So the destination timeout period has be be calculated from them.
+ * So the destination timeout period has to be calculated from them.
  */
-static int
-calculate_destination_timeout(void)
+static int calculate_destination_timeout(void)
 {
 	unsigned long mmr_image;
 	int mult1;
@@ -1486,73 +1568,92 @@
 	int ret;
 	unsigned long ts_ns;
 
-	mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
-	mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
-	index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
-	mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
-	mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
-	base = timeout_base_ns[index];
-	ts_ns = base * mult1 * mult2;
-	ret = ts_ns / 1000;
+	if (is_uv1_hub()) {
+		mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
+		mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
+		index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
+		mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
+		mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
+		base = timeout_base_ns[index];
+		ts_ns = base * mult1 * mult2;
+		ret = ts_ns / 1000;
+	} else {
+		/* 4 bits  0/1 for 10/80us, 3 bits of multiplier */
+		mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
+		mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
+		if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
+			mult1 = 80;
+		else
+			mult1 = 10;
+		base = mmr_image & UV2_ACK_MASK;
+		ret = mult1 * base;
+	}
 	return ret;
 }
 
-/*
- * initialize the bau_control structure for each cpu
- */
-static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
+static void __init init_per_cpu_tunables(void)
 {
-	int i;
 	int cpu;
-	int tcpu;
+	struct bau_control *bcp;
+
+	for_each_present_cpu(cpu) {
+		bcp = &per_cpu(bau_control, cpu);
+		bcp->baudisabled		= 0;
+		bcp->statp			= &per_cpu(ptcstats, cpu);
+		/* time interval to catch a hardware stay-busy bug */
+		bcp->timeout_interval		= usec_2_cycles(2*timeout_us);
+		bcp->max_concurr		= max_concurr;
+		bcp->max_concurr_const		= max_concurr;
+		bcp->plugged_delay		= plugged_delay;
+		bcp->plugsb4reset		= plugsb4reset;
+		bcp->timeoutsb4reset		= timeoutsb4reset;
+		bcp->ipi_reset_limit		= ipi_reset_limit;
+		bcp->complete_threshold		= complete_threshold;
+		bcp->cong_response_us		= congested_respns_us;
+		bcp->cong_reps			= congested_reps;
+		bcp->cong_period		= congested_period;
+	}
+}
+
+/*
+ * Scan all cpus to collect blade and socket summaries.
+ */
+static int __init get_cpu_topology(int base_pnode,
+					struct uvhub_desc *uvhub_descs,
+					unsigned char *uvhub_mask)
+{
+	int cpu;
 	int pnode;
 	int uvhub;
-	int have_hmaster;
-	short socket = 0;
-	unsigned short socket_mask;
-	unsigned char *uvhub_mask;
+	int socket;
 	struct bau_control *bcp;
 	struct uvhub_desc *bdp;
 	struct socket_desc *sdp;
-	struct bau_control *hmaster = NULL;
-	struct bau_control *smaster = NULL;
-	struct socket_desc {
-		short num_cpus;
-		short cpu_number[MAX_CPUS_PER_SOCKET];
-	};
-	struct uvhub_desc {
-		unsigned short socket_mask;
-		short num_cpus;
-		short uvhub;
-		short pnode;
-		struct socket_desc socket[2];
-	};
-	struct uvhub_desc *uvhub_descs;
 
-	timeout_us = calculate_destination_timeout();
-
-	uvhub_descs = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
-	memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
-	uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
 	for_each_present_cpu(cpu) {
 		bcp = &per_cpu(bau_control, cpu);
+
 		memset(bcp, 0, sizeof(struct bau_control));
+
 		pnode = uv_cpu_hub_info(cpu)->pnode;
-		if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
+		if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
 			printk(KERN_EMERG
 				"cpu %d pnode %d-%d beyond %d; BAU disabled\n",
-				cpu, pnode, base_part_pnode,
-				UV_DISTRIBUTION_SIZE);
+				cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
 			return 1;
 		}
+
 		bcp->osnode = cpu_to_node(cpu);
-		bcp->partition_base_pnode = uv_partition_base_pnode;
+		bcp->partition_base_pnode = base_pnode;
+
 		uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
 		*(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
 		bdp = &uvhub_descs[uvhub];
+
 		bdp->num_cpus++;
 		bdp->uvhub = uvhub;
 		bdp->pnode = pnode;
+
 		/* kludge: 'assuming' one node per socket, and assuming that
 		   disabling a socket just leaves a gap in node numbers */
 		socket = bcp->osnode & 1;
@@ -1561,84 +1662,129 @@
 		sdp->cpu_number[sdp->num_cpus] = cpu;
 		sdp->num_cpus++;
 		if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
-			printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus);
+			printk(KERN_EMERG "%d cpus per socket invalid\n",
+				sdp->num_cpus);
 			return 1;
 		}
 	}
+	return 0;
+}
+
+/*
+ * Each socket is to get a local array of pnodes/hubs.
+ */
+static void make_per_cpu_thp(struct bau_control *smaster)
+{
+	int cpu;
+	size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
+
+	smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
+	memset(smaster->thp, 0, hpsz);
+	for_each_present_cpu(cpu) {
+		smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
+		smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
+	}
+}
+
+/*
+ * Initialize all the per_cpu information for the cpu's on a given socket,
+ * given what has been gathered into the socket_desc struct.
+ * And reports the chosen hub and socket masters back to the caller.
+ */
+static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
+			struct bau_control **smasterp,
+			struct bau_control **hmasterp)
+{
+	int i;
+	int cpu;
+	struct bau_control *bcp;
+
+	for (i = 0; i < sdp->num_cpus; i++) {
+		cpu = sdp->cpu_number[i];
+		bcp = &per_cpu(bau_control, cpu);
+		bcp->cpu = cpu;
+		if (i == 0) {
+			*smasterp = bcp;
+			if (!(*hmasterp))
+				*hmasterp = bcp;
+		}
+		bcp->cpus_in_uvhub = bdp->num_cpus;
+		bcp->cpus_in_socket = sdp->num_cpus;
+		bcp->socket_master = *smasterp;
+		bcp->uvhub = bdp->uvhub;
+		bcp->uvhub_master = *hmasterp;
+		bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+		if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
+			printk(KERN_EMERG "%d cpus per uvhub invalid\n",
+				bcp->uvhub_cpu);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Summarize the blade and socket topology into the per_cpu structures.
+ */
+static int __init summarize_uvhub_sockets(int nuvhubs,
+			struct uvhub_desc *uvhub_descs,
+			unsigned char *uvhub_mask)
+{
+	int socket;
+	int uvhub;
+	unsigned short socket_mask;
+
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
+		struct uvhub_desc *bdp;
+		struct bau_control *smaster = NULL;
+		struct bau_control *hmaster = NULL;
+
 		if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
 			continue;
-		have_hmaster = 0;
+
 		bdp = &uvhub_descs[uvhub];
 		socket_mask = bdp->socket_mask;
 		socket = 0;
 		while (socket_mask) {
-			if (!(socket_mask & 1))
-				goto nextsocket;
-			sdp = &bdp->socket[socket];
-			for (i = 0; i < sdp->num_cpus; i++) {
-				cpu = sdp->cpu_number[i];
-				bcp = &per_cpu(bau_control, cpu);
-				bcp->cpu = cpu;
-				if (i == 0) {
-					smaster = bcp;
-					if (!have_hmaster) {
-						have_hmaster++;
-						hmaster = bcp;
-					}
-				}
-				bcp->cpus_in_uvhub = bdp->num_cpus;
-				bcp->cpus_in_socket = sdp->num_cpus;
-				bcp->socket_master = smaster;
-				bcp->uvhub = bdp->uvhub;
-				bcp->uvhub_master = hmaster;
-				bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
-						blade_processor_id;
-				if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
-					printk(KERN_EMERG
-						"%d cpus per uvhub invalid\n",
-						bcp->uvhub_cpu);
+			struct socket_desc *sdp;
+			if ((socket_mask & 1)) {
+				sdp = &bdp->socket[socket];
+				if (scan_sock(sdp, bdp, &smaster, &hmaster))
 					return 1;
-				}
 			}
-nextsocket:
 			socket++;
 			socket_mask = (socket_mask >> 1);
-			/* each socket gets a local array of pnodes/hubs */
-			bcp = smaster;
-			bcp->target_hub_and_pnode = kmalloc_node(
-				sizeof(struct hub_and_pnode) *
-				num_possible_cpus(), GFP_KERNEL, bcp->osnode);
-			memset(bcp->target_hub_and_pnode, 0,
-				sizeof(struct hub_and_pnode) *
-				num_possible_cpus());
-			for_each_present_cpu(tcpu) {
-				bcp->target_hub_and_pnode[tcpu].pnode =
-					uv_cpu_hub_info(tcpu)->pnode;
-				bcp->target_hub_and_pnode[tcpu].uvhub =
-					uv_cpu_hub_info(tcpu)->numa_blade_id;
-			}
+			make_per_cpu_thp(smaster);
 		}
 	}
+	return 0;
+}
+
+/*
+ * initialize the bau_control structure for each cpu
+ */
+static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
+{
+	unsigned char *uvhub_mask;
+	void *vp;
+	struct uvhub_desc *uvhub_descs;
+
+	timeout_us = calculate_destination_timeout();
+
+	vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
+	uvhub_descs = (struct uvhub_desc *)vp;
+	memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
+	uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
+
+	if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
+		return 1;
+
+	if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask))
+		return 1;
+
 	kfree(uvhub_descs);
 	kfree(uvhub_mask);
-	for_each_present_cpu(cpu) {
-		bcp = &per_cpu(bau_control, cpu);
-		bcp->baudisabled = 0;
-		bcp->statp = &per_cpu(ptcstats, cpu);
-		/* time interval to catch a hardware stay-busy bug */
-		bcp->timeout_interval = microsec_2_cycles(2*timeout_us);
-		bcp->max_bau_concurrent = max_bau_concurrent;
-		bcp->max_bau_concurrent_constant = max_bau_concurrent;
-		bcp->plugged_delay = plugged_delay;
-		bcp->plugsb4reset = plugsb4reset;
-		bcp->timeoutsb4reset = timeoutsb4reset;
-		bcp->ipi_reset_limit = ipi_reset_limit;
-		bcp->complete_threshold = complete_threshold;
-		bcp->congested_response_us = congested_response_us;
-		bcp->congested_reps = congested_reps;
-		bcp->congested_period = congested_period;
-	}
+	init_per_cpu_tunables();
 	return 0;
 }
 
@@ -1651,8 +1797,9 @@
 	int pnode;
 	int nuvhubs;
 	int cur_cpu;
+	int cpus;
 	int vector;
-	unsigned long mmr;
+	cpumask_var_t *mask;
 
 	if (!is_uv_system())
 		return 0;
@@ -1660,24 +1807,25 @@
 	if (nobau)
 		return 0;
 
-	for_each_possible_cpu(cur_cpu)
-		zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
-				       GFP_KERNEL, cpu_to_node(cur_cpu));
+	for_each_possible_cpu(cur_cpu) {
+		mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
+		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
+	}
 
 	uv_nshift = uv_hub_info->m_val;
 	uv_mmask = (1UL << uv_hub_info->m_val) - 1;
 	nuvhubs = uv_num_possible_blades();
 	spin_lock_init(&disable_lock);
-	congested_cycles = microsec_2_cycles(congested_response_us);
+	congested_cycles = usec_2_cycles(congested_respns_us);
 
-	uv_partition_base_pnode = 0x7fffffff;
+	uv_base_pnode = 0x7fffffff;
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
-		if (uv_blade_nr_possible_cpus(uvhub) &&
-			(uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
-			uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
+		cpus = uv_blade_nr_possible_cpus(uvhub);
+		if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode))
+			uv_base_pnode = uv_blade_to_pnode(uvhub);
 	}
 
-	if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
+	if (init_per_cpu(nuvhubs, uv_base_pnode)) {
 		nobau = 1;
 		return 0;
 	}
@@ -1685,21 +1833,21 @@
 	vector = UV_BAU_MESSAGE;
 	for_each_possible_blade(uvhub)
 		if (uv_blade_nr_possible_cpus(uvhub))
-			uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
+			init_uvhub(uvhub, vector, uv_base_pnode);
 
-	uv_enable_timeouts();
+	enable_timeouts();
 	alloc_intr_gate(vector, uv_bau_message_intr1);
 
 	for_each_possible_blade(uvhub) {
 		if (uv_blade_nr_possible_cpus(uvhub)) {
+			unsigned long val;
+			unsigned long mmr;
 			pnode = uv_blade_to_pnode(uvhub);
 			/* INIT the bau */
-			uv_write_global_mmr64(pnode,
-					UVH_LB_BAU_SB_ACTIVATION_CONTROL,
-					((unsigned long)1 << 63));
+			val = 1L << 63;
+			write_gmmr_activation(pnode, val);
 			mmr = 1; /* should be 1 to broadcast to both sockets */
-			uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST,
-						mmr);
+			write_mmr_data_broadcast(pnode, mmr);
 		}
 	}
 

diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 0eb9018..9f29a01 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c

@@ -99,8 +99,12 @@
 /* Check for an RTC interrupt pending */
 static int uv_intr_pending(int pnode)
 {
-	return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
-		UVH_EVENT_OCCURRED0_RTC1_MASK;
+	if (is_uv1_hub())
+		return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
+			UV1H_EVENT_OCCURRED0_RTC1_MASK;
+	else
+		return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) &
+			UV2H_EVENT_OCCURRED2_RTC_1_MASK;
 }
 
 /* Setup interrupt and return non-zero if early expiration occurred. */
@@ -114,8 +118,12 @@
 		UVH_RTC1_INT_CONFIG_M_MASK);
 	uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
 
-	uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
-		UVH_EVENT_OCCURRED0_RTC1_MASK);
+	if (is_uv1_hub())
+		uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
+				UV1H_EVENT_OCCURRED0_RTC1_MASK);
+	else
+		uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS,
+				UV2H_EVENT_OCCURRED2_RTC_1_MASK);
 
 	val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
 		((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);

diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index 528042c..a6f934f 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h

@@ -683,8 +683,10 @@
 __SYSCALL(306, sys_eventfd, 1)
 #define __NR_recvmmsg				307
 __SYSCALL(307, sys_recvmmsg, 5)
+#define __NR_setns				308
+__SYSCALL(308, sys_setns, 2)
 
-#define __NR_syscall_count			308
+#define __NR_syscall_count			309
 
 /*
  * sysxtensa syscall handler

diff --git a/drivers/Makefile b/drivers/Makefile
index 6b17f58..09f3232 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile

@@ -17,6 +17,9 @@
 # was used and do nothing if so
 obj-$(CONFIG_PNP)		+= pnp/
 obj-$(CONFIG_ARM_AMBA)		+= amba/
+# Many drivers will want to use DMA so this has to be made available
+# really early.
+obj-$(CONFIG_DMA_ENGINE)	+= dma/
 
 obj-$(CONFIG_VIRTIO)		+= virtio/
 obj-$(CONFIG_XEN)		+= xen/
@@ -92,7 +95,6 @@
 obj-y				+= lguest/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
-obj-$(CONFIG_DMA_ENGINE)	+= dma/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_MEMSTICK)		+= memstick/
 obj-y				+= leds/

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index bc2218d..de0e3df 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig

@@ -369,6 +369,21 @@
 	  which is used to report some hardware errors notified via
 	  SCI, mainly the corrected errors.
 
+config ACPI_CUSTOM_METHOD
+	tristate "Allow ACPI methods to be inserted/replaced at run time"
+	depends on DEBUG_FS
+	default n
+	help
+	  This debug facility allows ACPI AML methods to me inserted and/or
+	  replaced without rebooting the system. For details refer to:
+	  Documentation/acpi/method-customizing.txt.
+
+	  NOTE: This option is security sensitive, because it allows arbitrary
+	  kernel memory to be written to by root (uid=0) users, allowing them
+	  to bypass certain security measures (e.g. if root is not allowed to
+	  load additional kernel modules after boot, this feature may be used
+	  to override that restriction).
+
 source "drivers/acpi/apei/Kconfig"
 
 endif	# ACPI

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index b66fbb2..ecb26b4 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile

@@ -61,6 +61,7 @@
 obj-$(CONFIG_ACPI_SBS)		+= sbs.o
 obj-$(CONFIG_ACPI_HED)		+= hed.o
 obj-$(CONFIG_ACPI_EC_DEBUGFS)	+= ec_sys.o
+obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o
 
 # processor has its own "processor." module_param namespace
 processor-y			:= processor_driver.o processor_throttling.o

diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index a122471..301bd2d 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile

@@ -14,7 +14,7 @@
 
 acpi-y += evevent.o  evregion.o  evsci.o    evxfevnt.o \
 	 evmisc.o   evrgnini.o  evxface.o  evxfregn.o \
-	 evgpe.o    evgpeblk.o evgpeinit.o  evgpeutil.o evxfgpe.o
+	 evgpe.o    evgpeblk.o evgpeinit.o  evgpeutil.o evxfgpe.o evglock.o
 
 acpi-y += exconfig.o  exfield.o  exnames.o   exoparg6.o  exresolv.o  exstorob.o\
 	 exconvrt.o  exfldio.o  exoparg1.o  exprep.o    exresop.o   exsystem.o\

diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h
index ab87396..bc533dd 100644
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h

@@ -187,7 +187,6 @@
 
 /* Operation regions */
 
-#define ACPI_NUM_PREDEFINED_REGIONS     9
 #define ACPI_USER_REGION_BEGIN          0x80
 
 /* Maximum space_ids for Operation Regions */

diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index 41d247d..bea3b48 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h

@@ -58,12 +58,6 @@
  */
 u8 acpi_ev_is_notify_object(struct acpi_namespace_node *node);
 
-acpi_status acpi_ev_acquire_global_lock(u16 timeout);
-
-acpi_status acpi_ev_release_global_lock(void);
-
-acpi_status acpi_ev_init_global_lock_handler(void);
-
 u32 acpi_ev_get_gpe_number_index(u32 gpe_number);
 
 acpi_status
@@ -71,6 +65,17 @@
 			     u32 notify_value);
 
 /*
+ * evglock - Global Lock support
+ */
+acpi_status acpi_ev_init_global_lock_handler(void);
+
+acpi_status acpi_ev_acquire_global_lock(u16 timeout);
+
+acpi_status acpi_ev_release_global_lock(void);
+
+acpi_status acpi_ev_remove_global_lock_handler(void);
+
+/*
  * evgpe - Low-level GPE support
  */
 u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info *gpe_xrupt_list);

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index d69750b..73863d8 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h

@@ -214,24 +214,23 @@
 
 /*
  * Global lock mutex is an actual AML mutex object
- * Global lock semaphore works in conjunction with the HW global lock
+ * Global lock semaphore works in conjunction with the actual global lock
+ * Global lock spinlock is used for "pending" handshake
  */
 ACPI_EXTERN union acpi_operand_object *acpi_gbl_global_lock_mutex;
 ACPI_EXTERN acpi_semaphore acpi_gbl_global_lock_semaphore;
+ACPI_EXTERN acpi_spinlock acpi_gbl_global_lock_pending_lock;
 ACPI_EXTERN u16 acpi_gbl_global_lock_handle;
 ACPI_EXTERN u8 acpi_gbl_global_lock_acquired;
 ACPI_EXTERN u8 acpi_gbl_global_lock_present;
+ACPI_EXTERN u8 acpi_gbl_global_lock_pending;
 
 /*
  * Spinlocks are used for interfaces that can be possibly called at
  * interrupt level
  */
-ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock;	/* For GPE data structs and registers */
-ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock;	/* For ACPI H/W except GPE registers */
-ACPI_EXTERN spinlock_t _acpi_ev_global_lock_pending_lock; /* For global lock */
-#define acpi_gbl_gpe_lock	&_acpi_gbl_gpe_lock
-#define acpi_gbl_hardware_lock	&_acpi_gbl_hardware_lock
-#define acpi_ev_global_lock_pending_lock &_acpi_ev_global_lock_pending_lock
+ACPI_EXTERN acpi_spinlock acpi_gbl_gpe_lock;	/* For GPE data structs and registers */
+ACPI_EXTERN acpi_spinlock acpi_gbl_hardware_lock;	/* For ACPI H/W except GPE registers */
 
 /*****************************************************************************
  *

diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index f4f0998..1077f17 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h

@@ -394,21 +394,6 @@
 #define AML_CLASS_METHOD_CALL       0x09
 #define AML_CLASS_UNKNOWN           0x0A
 
-/* Predefined Operation Region space_iDs */
-
-typedef enum {
-	REGION_MEMORY = 0,
-	REGION_IO,
-	REGION_PCI_CONFIG,
-	REGION_EC,
-	REGION_SMBUS,
-	REGION_CMOS,
-	REGION_PCI_BAR,
-	REGION_IPMI,
-	REGION_DATA_TABLE,	/* Internal use only */
-	REGION_FIXED_HW = 0x7F
-} AML_REGION_TYPES;
-
 /* Comparison operation codes for match_op operator */
 
 typedef enum {

diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
index 23a3b1a..324acec 100644
--- a/drivers/acpi/acpica/dswload.c
+++ b/drivers/acpi/acpica/dswload.c

@@ -450,7 +450,7 @@
 			status =
 			    acpi_ex_create_region(op->named.data,
 						  op->named.length,
-						  REGION_DATA_TABLE,
+						  ACPI_ADR_SPACE_DATA_TABLE,
 						  walk_state);
 			if (ACPI_FAILURE(status)) {
 				return_ACPI_STATUS(status);

diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 4be4e92..9763181 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c

@@ -562,7 +562,7 @@
 				    ((op->common.value.arg)->common.value.
 				     integer);
 			} else {
-				region_space = REGION_DATA_TABLE;
+				region_space = ACPI_ADR_SPACE_DATA_TABLE;
 			}
 
 			/*

diff --git a/drivers/acpi/acpica/evglock.c b/drivers/acpi/acpica/evglock.c
new file mode 100644
index 0000000..56a562a
--- /dev/null
+++ b/drivers/acpi/acpica/evglock.c

@@ -0,0 +1,335 @@
+/******************************************************************************
+ *
+ * Module Name: evglock - Global Lock support
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2011, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "acevents.h"
+#include "acinterp.h"
+
+#define _COMPONENT          ACPI_EVENTS
+ACPI_MODULE_NAME("evglock")
+
+/* Local prototypes */
+static u32 acpi_ev_global_lock_handler(void *context);
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ev_init_global_lock_handler
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Install a handler for the global lock release event
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ev_init_global_lock_handler(void)
+{
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ev_init_global_lock_handler);
+
+	/* Attempt installation of the global lock handler */
+
+	status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL,
+						  acpi_ev_global_lock_handler,
+						  NULL);
+
+	/*
+	 * If the global lock does not exist on this platform, the attempt to
+	 * enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick).
+	 * Map to AE_OK, but mark global lock as not present. Any attempt to
+	 * actually use the global lock will be flagged with an error.
+	 */
+	acpi_gbl_global_lock_present = FALSE;
+	if (status == AE_NO_HARDWARE_RESPONSE) {
+		ACPI_ERROR((AE_INFO,
+			    "No response from Global Lock hardware, disabling lock"));
+
+		return_ACPI_STATUS(AE_OK);
+	}
+
+	status = acpi_os_create_lock(&acpi_gbl_global_lock_pending_lock);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	acpi_gbl_global_lock_pending = FALSE;
+	acpi_gbl_global_lock_present = TRUE;
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ev_remove_global_lock_handler
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Remove the handler for the Global Lock
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ev_remove_global_lock_handler(void)
+{
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ev_remove_global_lock_handler);
+
+	acpi_gbl_global_lock_present = FALSE;
+	status = acpi_remove_fixed_event_handler(ACPI_EVENT_GLOBAL,
+						 acpi_ev_global_lock_handler);
+
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ev_global_lock_handler
+ *
+ * PARAMETERS:  Context         - From thread interface, not used
+ *
+ * RETURN:      ACPI_INTERRUPT_HANDLED
+ *
+ * DESCRIPTION: Invoked directly from the SCI handler when a global lock
+ *              release interrupt occurs. If there is actually a pending
+ *              request for the lock, signal the waiting thread.
+ *
+ ******************************************************************************/
+
+static u32 acpi_ev_global_lock_handler(void *context)
+{
+	acpi_status status;
+	acpi_cpu_flags flags;
+
+	flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock);
+
+	/*
+	 * If a request for the global lock is not actually pending,
+	 * we are done. This handles "spurious" global lock interrupts
+	 * which are possible (and have been seen) with bad BIOSs.
+	 */
+	if (!acpi_gbl_global_lock_pending) {
+		goto cleanup_and_exit;
+	}
+
+	/*
+	 * Send a unit to the global lock semaphore. The actual acquisition
+	 * of the global lock will be performed by the waiting thread.
+	 */
+	status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1);
+	if (ACPI_FAILURE(status)) {
+		ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore"));
+	}
+
+	acpi_gbl_global_lock_pending = FALSE;
+
+      cleanup_and_exit:
+
+	acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags);
+	return (ACPI_INTERRUPT_HANDLED);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_ev_acquire_global_lock
+ *
+ * PARAMETERS:  Timeout         - Max time to wait for the lock, in millisec.
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Attempt to gain ownership of the Global Lock.
+ *
+ * MUTEX:       Interpreter must be locked
+ *
+ * Note: The original implementation allowed multiple threads to "acquire" the
+ * Global Lock, and the OS would hold the lock until the last thread had
+ * released it. However, this could potentially starve the BIOS out of the
+ * lock, especially in the case where there is a tight handshake between the
+ * Embedded Controller driver and the BIOS. Therefore, this implementation
+ * allows only one thread to acquire the HW Global Lock at a time, and makes
+ * the global lock appear as a standard mutex on the OS side.
+ *
+ *****************************************************************************/
+
+acpi_status acpi_ev_acquire_global_lock(u16 timeout)
+{
+	acpi_cpu_flags flags;
+	acpi_status status;
+	u8 acquired = FALSE;
+
+	ACPI_FUNCTION_TRACE(ev_acquire_global_lock);
+
+	/*
+	 * Only one thread can acquire the GL at a time, the global_lock_mutex
+	 * enforces this. This interface releases the interpreter if we must wait.
+	 */
+	status =
+	    acpi_ex_system_wait_mutex(acpi_gbl_global_lock_mutex->mutex.
+				      os_mutex, timeout);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/*
+	 * Update the global lock handle and check for wraparound. The handle is
+	 * only used for the external global lock interfaces, but it is updated
+	 * here to properly handle the case where a single thread may acquire the
+	 * lock via both the AML and the acpi_acquire_global_lock interfaces. The
+	 * handle is therefore updated on the first acquire from a given thread
+	 * regardless of where the acquisition request originated.
+	 */
+	acpi_gbl_global_lock_handle++;
+	if (acpi_gbl_global_lock_handle == 0) {
+		acpi_gbl_global_lock_handle = 1;
+	}
+
+	/*
+	 * Make sure that a global lock actually exists. If not, just
+	 * treat the lock as a standard mutex.
+	 */
+	if (!acpi_gbl_global_lock_present) {
+		acpi_gbl_global_lock_acquired = TRUE;
+		return_ACPI_STATUS(AE_OK);
+	}
+
+	flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock);
+
+	do {
+
+		/* Attempt to acquire the actual hardware lock */
+
+		ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired);
+		if (acquired) {
+			acpi_gbl_global_lock_acquired = TRUE;
+			ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+					  "Acquired hardware Global Lock\n"));
+			break;
+		}
+
+		/*
+		 * Did not get the lock. The pending bit was set above, and
+		 * we must now wait until we receive the global lock
+		 * released interrupt.
+		 */
+		acpi_gbl_global_lock_pending = TRUE;
+		acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags);
+
+		ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+				  "Waiting for hardware Global Lock\n"));
+
+		/*
+		 * Wait for handshake with the global lock interrupt handler.
+		 * This interface releases the interpreter if we must wait.
+		 */
+		status =
+		    acpi_ex_system_wait_semaphore
+		    (acpi_gbl_global_lock_semaphore, ACPI_WAIT_FOREVER);
+
+		flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock);
+
+	} while (ACPI_SUCCESS(status));
+
+	acpi_gbl_global_lock_pending = FALSE;
+	acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags);
+
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ev_release_global_lock
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Releases ownership of the Global Lock.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ev_release_global_lock(void)
+{
+	u8 pending = FALSE;
+	acpi_status status = AE_OK;
+
+	ACPI_FUNCTION_TRACE(ev_release_global_lock);
+
+	/* Lock must be already acquired */
+
+	if (!acpi_gbl_global_lock_acquired) {
+		ACPI_WARNING((AE_INFO,
+			      "Cannot release the ACPI Global Lock, it has not been acquired"));
+		return_ACPI_STATUS(AE_NOT_ACQUIRED);
+	}
+
+	if (acpi_gbl_global_lock_present) {
+
+		/* Allow any thread to release the lock */
+
+		ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending);
+
+		/*
+		 * If the pending bit was set, we must write GBL_RLS to the control
+		 * register
+		 */
+		if (pending) {
+			status =
+			    acpi_write_bit_register
+			    (ACPI_BITREG_GLOBAL_LOCK_RELEASE,
+			     ACPI_ENABLE_EVENT);
+		}
+
+		ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+				  "Released hardware Global Lock\n"));
+	}
+
+	acpi_gbl_global_lock_acquired = FALSE;
+
+	/* Release the local GL mutex */
+
+	acpi_os_release_mutex(acpi_gbl_global_lock_mutex->mutex.os_mutex);
+	return_ACPI_STATUS(status);
+}

diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c
index 7dc8094..d0b3318 100644
--- a/drivers/acpi/acpica/evmisc.c
+++ b/drivers/acpi/acpica/evmisc.c

@@ -45,7 +45,6 @@
 #include "accommon.h"
 #include "acevents.h"
 #include "acnamesp.h"
-#include "acinterp.h"
 
 #define _COMPONENT          ACPI_EVENTS
 ACPI_MODULE_NAME("evmisc")
@@ -53,10 +52,6 @@
 /* Local prototypes */
 static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context);
 
-static u32 acpi_ev_global_lock_handler(void *context);
-
-static acpi_status acpi_ev_remove_global_lock_handler(void);
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ev_is_notify_object
@@ -275,304 +270,6 @@
 	acpi_ut_delete_generic_state(notify_info);
 }
 
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ev_global_lock_handler
- *
- * PARAMETERS:  Context         - From thread interface, not used
- *
- * RETURN:      ACPI_INTERRUPT_HANDLED
- *
- * DESCRIPTION: Invoked directly from the SCI handler when a global lock
- *              release interrupt occurs.  If there's a thread waiting for
- *              the global lock, signal it.
- *
- * NOTE: Assumes that the semaphore can be signaled from interrupt level. If
- * this is not possible for some reason, a separate thread will have to be
- * scheduled to do this.
- *
- ******************************************************************************/
-static u8 acpi_ev_global_lock_pending;
-
-static u32 acpi_ev_global_lock_handler(void *context)
-{
-	acpi_status status;
-	acpi_cpu_flags flags;
-
-	flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock);
-
-	if (!acpi_ev_global_lock_pending) {
-		goto out;
-	}
-
-	/* Send a unit to the semaphore */
-
-	status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1);
-	if (ACPI_FAILURE(status)) {
-		ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore"));
-	}
-
-	acpi_ev_global_lock_pending = FALSE;
-
- out:
-	acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags);
-
-	return (ACPI_INTERRUPT_HANDLED);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ev_init_global_lock_handler
- *
- * PARAMETERS:  None
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Install a handler for the global lock release event
- *
- ******************************************************************************/
-
-acpi_status acpi_ev_init_global_lock_handler(void)
-{
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ev_init_global_lock_handler);
-
-	/* Attempt installation of the global lock handler */
-
-	status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL,
-						  acpi_ev_global_lock_handler,
-						  NULL);
-
-	/*
-	 * If the global lock does not exist on this platform, the attempt to
-	 * enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick).
-	 * Map to AE_OK, but mark global lock as not present. Any attempt to
-	 * actually use the global lock will be flagged with an error.
-	 */
-	if (status == AE_NO_HARDWARE_RESPONSE) {
-		ACPI_ERROR((AE_INFO,
-			    "No response from Global Lock hardware, disabling lock"));
-
-		acpi_gbl_global_lock_present = FALSE;
-		return_ACPI_STATUS(AE_OK);
-	}
-
-	acpi_gbl_global_lock_present = TRUE;
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ev_remove_global_lock_handler
- *
- * PARAMETERS:  None
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Remove the handler for the Global Lock
- *
- ******************************************************************************/
-
-static acpi_status acpi_ev_remove_global_lock_handler(void)
-{
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ev_remove_global_lock_handler);
-
-	acpi_gbl_global_lock_present = FALSE;
-	status = acpi_remove_fixed_event_handler(ACPI_EVENT_GLOBAL,
-						 acpi_ev_global_lock_handler);
-
-	return_ACPI_STATUS(status);
-}
-
-/******************************************************************************
- *
- * FUNCTION:    acpi_ev_acquire_global_lock
- *
- * PARAMETERS:  Timeout         - Max time to wait for the lock, in millisec.
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Attempt to gain ownership of the Global Lock.
- *
- * MUTEX:       Interpreter must be locked
- *
- * Note: The original implementation allowed multiple threads to "acquire" the
- * Global Lock, and the OS would hold the lock until the last thread had
- * released it. However, this could potentially starve the BIOS out of the
- * lock, especially in the case where there is a tight handshake between the
- * Embedded Controller driver and the BIOS. Therefore, this implementation
- * allows only one thread to acquire the HW Global Lock at a time, and makes
- * the global lock appear as a standard mutex on the OS side.
- *
- *****************************************************************************/
-static acpi_thread_id acpi_ev_global_lock_thread_id;
-static int acpi_ev_global_lock_acquired;
-
-acpi_status acpi_ev_acquire_global_lock(u16 timeout)
-{
-	acpi_cpu_flags flags;
-	acpi_status status = AE_OK;
-	u8 acquired = FALSE;
-
-	ACPI_FUNCTION_TRACE(ev_acquire_global_lock);
-
-	/*
-	 * Only one thread can acquire the GL at a time, the global_lock_mutex
-	 * enforces this. This interface releases the interpreter if we must wait.
-	 */
-	status = acpi_ex_system_wait_mutex(
-			acpi_gbl_global_lock_mutex->mutex.os_mutex, 0);
-	if (status == AE_TIME) {
-		if (acpi_ev_global_lock_thread_id == acpi_os_get_thread_id()) {
-			acpi_ev_global_lock_acquired++;
-			return AE_OK;
-		}
-	}
-
-	if (ACPI_FAILURE(status)) {
-		status = acpi_ex_system_wait_mutex(
-				acpi_gbl_global_lock_mutex->mutex.os_mutex,
-				timeout);
-	}
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	acpi_ev_global_lock_thread_id = acpi_os_get_thread_id();
-	acpi_ev_global_lock_acquired++;
-
-	/*
-	 * Update the global lock handle and check for wraparound. The handle is
-	 * only used for the external global lock interfaces, but it is updated
-	 * here to properly handle the case where a single thread may acquire the
-	 * lock via both the AML and the acpi_acquire_global_lock interfaces. The
-	 * handle is therefore updated on the first acquire from a given thread
-	 * regardless of where the acquisition request originated.
-	 */
-	acpi_gbl_global_lock_handle++;
-	if (acpi_gbl_global_lock_handle == 0) {
-		acpi_gbl_global_lock_handle = 1;
-	}
-
-	/*
-	 * Make sure that a global lock actually exists. If not, just treat the
-	 * lock as a standard mutex.
-	 */
-	if (!acpi_gbl_global_lock_present) {
-		acpi_gbl_global_lock_acquired = TRUE;
-		return_ACPI_STATUS(AE_OK);
-	}
-
-	flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock);
-
-	do {
-
-		/* Attempt to acquire the actual hardware lock */
-
-		ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired);
-		if (acquired) {
-			acpi_gbl_global_lock_acquired = TRUE;
-
-			ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-					  "Acquired hardware Global Lock\n"));
-			break;
-		}
-
-		acpi_ev_global_lock_pending = TRUE;
-
-		acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags);
-
-		/*
-		 * Did not get the lock. The pending bit was set above, and we
-		 * must wait until we get the global lock released interrupt.
-		 */
-		ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-				  "Waiting for hardware Global Lock\n"));
-
-		/*
-		 * Wait for handshake with the global lock interrupt handler.
-		 * This interface releases the interpreter if we must wait.
-		 */
-		status = acpi_ex_system_wait_semaphore(
-						acpi_gbl_global_lock_semaphore,
-						ACPI_WAIT_FOREVER);
-
-		flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock);
-
-	} while (ACPI_SUCCESS(status));
-
-	acpi_ev_global_lock_pending = FALSE;
-
-	acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags);
-
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ev_release_global_lock
- *
- * PARAMETERS:  None
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Releases ownership of the Global Lock.
- *
- ******************************************************************************/
-
-acpi_status acpi_ev_release_global_lock(void)
-{
-	u8 pending = FALSE;
-	acpi_status status = AE_OK;
-
-	ACPI_FUNCTION_TRACE(ev_release_global_lock);
-
-	/* Lock must be already acquired */
-
-	if (!acpi_gbl_global_lock_acquired) {
-		ACPI_WARNING((AE_INFO,
-			      "Cannot release the ACPI Global Lock, it has not been acquired"));
-		return_ACPI_STATUS(AE_NOT_ACQUIRED);
-	}
-
-	acpi_ev_global_lock_acquired--;
-	if (acpi_ev_global_lock_acquired > 0) {
-		return AE_OK;
-	}
-
-	if (acpi_gbl_global_lock_present) {
-
-		/* Allow any thread to release the lock */
-
-		ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending);
-
-		/*
-		 * If the pending bit was set, we must write GBL_RLS to the control
-		 * register
-		 */
-		if (pending) {
-			status =
-			    acpi_write_bit_register
-			    (ACPI_BITREG_GLOBAL_LOCK_RELEASE,
-			     ACPI_ENABLE_EVENT);
-		}
-
-		ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-				  "Released hardware Global Lock\n"));
-	}
-
-	acpi_gbl_global_lock_acquired = FALSE;
-
-	/* Release the local GL mutex */
-	acpi_ev_global_lock_thread_id = 0;
-	acpi_ev_global_lock_acquired = 0;
-	acpi_os_release_mutex(acpi_gbl_global_lock_mutex->mutex.os_mutex);
-	return_ACPI_STATUS(status);
-}
-
 /******************************************************************************
  *
  * FUNCTION:    acpi_ev_terminate

diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index bea7223..f0edf5c 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c

@@ -55,6 +55,8 @@
 acpi_ev_has_default_handler(struct acpi_namespace_node *node,
 			    acpi_adr_space_type space_id);
 
+static void acpi_ev_orphan_ec_reg_method(void);
+
 static acpi_status
 acpi_ev_reg_run(acpi_handle obj_handle,
 		u32 level, void *context, void **return_value);
@@ -561,7 +563,9 @@
 
 			/* Now stop region accesses by executing the _REG method */
 
-			status = acpi_ev_execute_reg_method(region_obj, 0);
+			status =
+			    acpi_ev_execute_reg_method(region_obj,
+						       ACPI_REG_DISCONNECT);
 			if (ACPI_FAILURE(status)) {
 				ACPI_EXCEPTION((AE_INFO, status,
 						"from region _REG, [%s]",
@@ -1062,6 +1066,12 @@
 					ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run,
 					NULL, &space_id, NULL);
 
+	/* Special case for EC: handle "orphan" _REG methods with no region */
+
+	if (space_id == ACPI_ADR_SPACE_EC) {
+		acpi_ev_orphan_ec_reg_method();
+	}
+
 	return_ACPI_STATUS(status);
 }
 
@@ -1120,6 +1130,113 @@
 		return (AE_OK);
 	}
 
-	status = acpi_ev_execute_reg_method(obj_desc, 1);
+	status = acpi_ev_execute_reg_method(obj_desc, ACPI_REG_CONNECT);
 	return (status);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ev_orphan_ec_reg_method
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Execute an "orphan" _REG method that appears under the EC
+ *              device. This is a _REG method that has no corresponding region
+ *              within the EC device scope. The orphan _REG method appears to
+ *              have been enabled by the description of the ECDT in the ACPI
+ *              specification: "The availability of the region space can be
+ *              detected by providing a _REG method object underneath the
+ *              Embedded Controller device."
+ *
+ *              To quickly access the EC device, we use the EC_ID that appears
+ *              within the ECDT. Otherwise, we would need to perform a time-
+ *              consuming namespace walk, executing _HID methods to find the
+ *              EC device.
+ *
+ ******************************************************************************/
+
+static void acpi_ev_orphan_ec_reg_method(void)
+{
+	struct acpi_table_ecdt *table;
+	acpi_status status;
+	struct acpi_object_list args;
+	union acpi_object objects[2];
+	struct acpi_namespace_node *ec_device_node;
+	struct acpi_namespace_node *reg_method;
+	struct acpi_namespace_node *next_node;
+
+	ACPI_FUNCTION_TRACE(ev_orphan_ec_reg_method);
+
+	/* Get the ECDT (if present in system) */
+
+	status = acpi_get_table(ACPI_SIG_ECDT, 0,
+				ACPI_CAST_INDIRECT_PTR(struct acpi_table_header,
+						       &table));
+	if (ACPI_FAILURE(status)) {
+		return_VOID;
+	}
+
+	/* We need a valid EC_ID string */
+
+	if (!(*table->id)) {
+		return_VOID;
+	}
+
+	/* Namespace is currently locked, must release */
+
+	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
+
+	/* Get a handle to the EC device referenced in the ECDT */
+
+	status = acpi_get_handle(NULL,
+				 ACPI_CAST_PTR(char, table->id),
+				 ACPI_CAST_PTR(acpi_handle, &ec_device_node));
+	if (ACPI_FAILURE(status)) {
+		goto exit;
+	}
+
+	/* Get a handle to a _REG method immediately under the EC device */
+
+	status = acpi_get_handle(ec_device_node,
+				 METHOD_NAME__REG, ACPI_CAST_PTR(acpi_handle,
+								 &reg_method));
+	if (ACPI_FAILURE(status)) {
+		goto exit;
+	}
+
+	/*
+	 * Execute the _REG method only if there is no Operation Region in
+	 * this scope with the Embedded Controller space ID. Otherwise, it
+	 * will already have been executed. Note, this allows for Regions
+	 * with other space IDs to be present; but the code below will then
+	 * execute the _REG method with the EC space ID argument.
+	 */
+	next_node = acpi_ns_get_next_node(ec_device_node, NULL);
+	while (next_node) {
+		if ((next_node->type == ACPI_TYPE_REGION) &&
+		    (next_node->object) &&
+		    (next_node->object->region.space_id == ACPI_ADR_SPACE_EC)) {
+			goto exit;	/* Do not execute _REG */
+		}
+		next_node = acpi_ns_get_next_node(ec_device_node, next_node);
+	}
+
+	/* Evaluate the _REG(EC,Connect) method */
+
+	args.count = 2;
+	args.pointer = objects;
+	objects[0].type = ACPI_TYPE_INTEGER;
+	objects[0].integer.value = ACPI_ADR_SPACE_EC;
+	objects[1].type = ACPI_TYPE_INTEGER;
+	objects[1].integer.value = ACPI_REG_CONNECT;
+
+	status = acpi_evaluate_object(reg_method, NULL, &args, NULL);
+
+      exit:
+	/* We ignore all errors from above, don't care */
+
+	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
+	return_VOID;
+}

diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 9659cee..55a5d35 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c

@@ -637,7 +637,7 @@
 
 					status =
 					    acpi_ev_execute_reg_method
-					    (region_obj, 1);
+					    (region_obj, ACPI_REG_CONNECT);
 
 					if (acpi_ns_locked) {
 						status =

diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c
index c85c8c4..00cd956 100644
--- a/drivers/acpi/acpica/evxfregn.c
+++ b/drivers/acpi/acpica/evxfregn.c

@@ -130,20 +130,21 @@
 	case ACPI_ADR_SPACE_PCI_CONFIG:
 	case ACPI_ADR_SPACE_DATA_TABLE:
 
-		if (acpi_gbl_reg_methods_executed) {
+		if (!acpi_gbl_reg_methods_executed) {
 
-			/* Run all _REG methods for this address space */
-
-			status = acpi_ev_execute_reg_methods(node, space_id);
+			/* We will defer execution of the _REG methods for this space */
+			goto unlock_and_exit;
 		}
 		break;
 
 	default:
-
-		status = acpi_ev_execute_reg_methods(node, space_id);
 		break;
 	}
 
+	/* Run all _REG methods for this address space */
+
+	status = acpi_ev_execute_reg_methods(node, space_id);
+
       unlock_and_exit:
 	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 	return_ACPI_STATUS(status);

diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c
index e7b372d..110711a 100644
--- a/drivers/acpi/acpica/excreate.c
+++ b/drivers/acpi/acpica/excreate.c

@@ -305,7 +305,8 @@
 	 * range
 	 */
 	if ((region_space >= ACPI_NUM_PREDEFINED_REGIONS) &&
-	    (region_space < ACPI_USER_REGION_BEGIN)) {
+	    (region_space < ACPI_USER_REGION_BEGIN) &&
+	    (region_space != ACPI_ADR_SPACE_DATA_TABLE)) {
 		ACPI_ERROR((AE_INFO, "Invalid AddressSpace type 0x%X",
 			    region_space));
 		return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID);

diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index 1d76ac8..ac7b854 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c

@@ -74,7 +74,6 @@
  *
  * Additional possible repairs:
  *
- * Optional/unnecessary NULL package elements removed
  * Required package elements that are NULL replaced by Integer/String/Buffer
  * Incorrect standalone package wrapped with required outer package
  *
@@ -623,16 +622,12 @@
 	ACPI_FUNCTION_NAME(ns_remove_null_elements);
 
 	/*
-	 * PTYPE1 packages contain no subpackages.
-	 * PTYPE2 packages contain a variable number of sub-packages. We can
-	 * safely remove all NULL elements from the PTYPE2 packages.
+	 * We can safely remove all NULL elements from these package types:
+	 * PTYPE1_VAR packages contain a variable number of simple data types.
+	 * PTYPE2 packages contain a variable number of sub-packages.
 	 */
 	switch (package_type) {
-	case ACPI_PTYPE1_FIXED:
 	case ACPI_PTYPE1_VAR:
-	case ACPI_PTYPE1_OPTION:
-		return;
-
 	case ACPI_PTYPE2:
 	case ACPI_PTYPE2_COUNT:
 	case ACPI_PTYPE2_PKG_COUNT:
@@ -642,6 +637,8 @@
 		break;
 
 	default:
+	case ACPI_PTYPE1_FIXED:
+	case ACPI_PTYPE1_OPTION:
 		return;
 	}
 

diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index 136a814..97cb36f 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c

@@ -170,8 +170,7 @@
 	"SMBus",
 	"SystemCMOS",
 	"PCIBARTarget",
-	"IPMI",
-	"DataTable"
+	"IPMI"
 };
 
 char *acpi_ut_get_region_name(u8 space_id)
@@ -179,6 +178,8 @@
 
 	if (space_id >= ACPI_USER_REGION_BEGIN) {
 		return ("UserDefinedRegion");
+	} else if (space_id == ACPI_ADR_SPACE_DATA_TABLE) {
+		return ("DataTable");
 	} else if (space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
 		return ("FunctionalFixedHW");
 	} else if (space_id >= ACPI_NUM_PREDEFINED_REGIONS) {

diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c
index a946c68..7d797e2 100644
--- a/drivers/acpi/acpica/utmutex.c
+++ b/drivers/acpi/acpica/utmutex.c

@@ -83,9 +83,15 @@
 
 	/* Create the spinlocks for use at interrupt level */
 
-	spin_lock_init(acpi_gbl_gpe_lock);
-	spin_lock_init(acpi_gbl_hardware_lock);
-	spin_lock_init(acpi_ev_global_lock_pending_lock);
+	status = acpi_os_create_lock (&acpi_gbl_gpe_lock);
+	if (ACPI_FAILURE (status)) {
+		return_ACPI_STATUS (status);
+	}
+
+	status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
+	if (ACPI_FAILURE (status)) {
+		return_ACPI_STATUS (status);
+	}
 
 	/* Mutex for _OSI support */
 	status = acpi_os_create_mutex(&acpi_gbl_osi_mutex);

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 9749980..d1e06c1 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c

@@ -227,7 +227,7 @@
 	acpi_status status = AE_OK;
 	char object_name[5] = { '_', 'P', 'S', '0' + state, '\0' };
 
-	if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3))
+	if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD))
 		return -EINVAL;
 
 	/* Make sure this is a valid target state */

diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c
new file mode 100644
index 0000000..5d42c24
--- /dev/null
+++ b/drivers/acpi/custom_method.c

@@ -0,0 +1,100 @@
+/*
+ * debugfs.c - ACPI debugfs interface to userspace.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <acpi/acpi_drivers.h>
+
+#include "internal.h"
+
+#define _COMPONENT		ACPI_SYSTEM_COMPONENT
+ACPI_MODULE_NAME("custom_method");
+MODULE_LICENSE("GPL");
+
+static struct dentry *cm_dentry;
+
+/* /sys/kernel/debug/acpi/custom_method */
+
+static ssize_t cm_write(struct file *file, const char __user * user_buf,
+			size_t count, loff_t *ppos)
+{
+	static char *buf;
+	static u32 max_size;
+	static u32 uncopied_bytes;
+
+	struct acpi_table_header table;
+	acpi_status status;
+
+	if (!(*ppos)) {
+		/* parse the table header to get the table length */
+		if (count <= sizeof(struct acpi_table_header))
+			return -EINVAL;
+		if (copy_from_user(&table, user_buf,
+				   sizeof(struct acpi_table_header)))
+			return -EFAULT;
+		uncopied_bytes = max_size = table.length;
+		buf = kzalloc(max_size, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	}
+
+	if (buf == NULL)
+		return -EINVAL;
+
+	if ((*ppos > max_size) ||
+	    (*ppos + count > max_size) ||
+	    (*ppos + count < count) ||
+	    (count > uncopied_bytes))
+		return -EINVAL;
+
+	if (copy_from_user(buf + (*ppos), user_buf, count)) {
+		kfree(buf);
+		buf = NULL;
+		return -EFAULT;
+	}
+
+	uncopied_bytes -= count;
+	*ppos += count;
+
+	if (!uncopied_bytes) {
+		status = acpi_install_method(buf);
+		kfree(buf);
+		buf = NULL;
+		if (ACPI_FAILURE(status))
+			return -EINVAL;
+		add_taint(TAINT_OVERRIDDEN_ACPI_TABLE);
+	}
+
+	return count;
+}
+
+static const struct file_operations cm_fops = {
+	.write = cm_write,
+	.llseek = default_llseek,
+};
+
+static int __init acpi_custom_method_init(void)
+{
+	if (acpi_debugfs_dir == NULL)
+		return -ENOENT;
+
+	cm_dentry = debugfs_create_file("custom_method", S_IWUSR,
+					acpi_debugfs_dir, NULL, &cm_fops);
+	if (cm_dentry == NULL)
+		return -ENODEV;
+
+	return 0;
+}
+
+static void __exit acpi_custom_method_exit(void)
+{
+	if (cm_dentry)
+		debugfs_remove(cm_dentry);
+ }
+
+module_init(acpi_custom_method_init);
+module_exit(acpi_custom_method_exit);

diff --git a/drivers/acpi/debugfs.c b/drivers/acpi/debugfs.c
index 384f7ab..182a9fc 100644
--- a/drivers/acpi/debugfs.c
+++ b/drivers/acpi/debugfs.c

@@ -3,100 +3,16 @@
  */
 
 #include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/uaccess.h>
 #include <linux/debugfs.h>
 #include <acpi/acpi_drivers.h>
 
 #define _COMPONENT		ACPI_SYSTEM_COMPONENT
 ACPI_MODULE_NAME("debugfs");
 
+struct dentry *acpi_debugfs_dir;
+EXPORT_SYMBOL_GPL(acpi_debugfs_dir);
 
-/* /sys/modules/acpi/parameters/aml_debug_output */
-
-module_param_named(aml_debug_output, acpi_gbl_enable_aml_debug_object,
-		   bool, 0644);
-MODULE_PARM_DESC(aml_debug_output,
-		 "To enable/disable the ACPI Debug Object output.");
-
-/* /sys/kernel/debug/acpi/custom_method */
-
-static ssize_t cm_write(struct file *file, const char __user * user_buf,
-			size_t count, loff_t *ppos)
+void __init acpi_debugfs_init(void)
 {
-	static char *buf;
-	static u32 max_size;
-	static u32 uncopied_bytes;
-
-	struct acpi_table_header table;
-	acpi_status status;
-
-	if (!(*ppos)) {
-		/* parse the table header to get the table length */
-		if (count <= sizeof(struct acpi_table_header))
-			return -EINVAL;
-		if (copy_from_user(&table, user_buf,
-				   sizeof(struct acpi_table_header)))
-			return -EFAULT;
-		uncopied_bytes = max_size = table.length;
-		buf = kzalloc(max_size, GFP_KERNEL);
-		if (!buf)
-			return -ENOMEM;
-	}
-
-	if (buf == NULL)
-		return -EINVAL;
-
-	if ((*ppos > max_size) ||
-	    (*ppos + count > max_size) ||
-	    (*ppos + count < count) ||
-	    (count > uncopied_bytes))
-		return -EINVAL;
-
-	if (copy_from_user(buf + (*ppos), user_buf, count)) {
-		kfree(buf);
-		buf = NULL;
-		return -EFAULT;
-	}
-
-	uncopied_bytes -= count;
-	*ppos += count;
-
-	if (!uncopied_bytes) {
-		status = acpi_install_method(buf);
-		kfree(buf);
-		buf = NULL;
-		if (ACPI_FAILURE(status))
-			return -EINVAL;
-		add_taint(TAINT_OVERRIDDEN_ACPI_TABLE);
-	}
-
-	return count;
-}
-
-static const struct file_operations cm_fops = {
-	.write = cm_write,
-	.llseek = default_llseek,
-};
-
-int __init acpi_debugfs_init(void)
-{
-	struct dentry *acpi_dir, *cm_dentry;
-
-	acpi_dir = debugfs_create_dir("acpi", NULL);
-	if (!acpi_dir)
-		goto err;
-
-	cm_dentry = debugfs_create_file("custom_method", S_IWUSR,
-					acpi_dir, NULL, &cm_fops);
-	if (!cm_dentry)
-		goto err;
-
-	return 0;
-
-err:
-	if (acpi_dir)
-		debugfs_remove(acpi_dir);
-	return -EINVAL;
+	acpi_debugfs_dir = debugfs_create_dir("acpi", NULL);
 }

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index fa848c4..b19a18d 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c

@@ -69,7 +69,6 @@
 
 #define ACPI_EC_DELAY		500	/* Wait 500ms max. during EC ops */
 #define ACPI_EC_UDELAY_GLK	1000	/* Wait 1ms max. to get global lock */
-#define ACPI_EC_CDELAY		10	/* Wait 10us before polling EC */
 #define ACPI_EC_MSI_UDELAY	550	/* Wait 550us for MSI EC */
 
 #define ACPI_EC_STORM_THRESHOLD 8	/* number of false interrupts
@@ -433,8 +432,7 @@
 
 int ec_transaction(u8 command,
 		   const u8 * wdata, unsigned wdata_len,
-		   u8 * rdata, unsigned rdata_len,
-		   int force_poll)
+		   u8 * rdata, unsigned rdata_len)
 {
 	struct transaction t = {.command = command,
 				.wdata = wdata, .rdata = rdata,
@@ -592,8 +590,6 @@
 	mutex_unlock(&ec->lock);
 }
 
-static void acpi_ec_gpe_query(void *ec_cxt);
-
 static int ec_check_sci(struct acpi_ec *ec, u8 state)
 {
 	if (state & ACPI_EC_FLAG_SCI) {
@@ -808,8 +804,6 @@
 			return -EINVAL;
 	}
 
-	ec->handle = device->handle;
-
 	/* Find and register all query methods */
 	acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1,
 			    acpi_ec_register_query_methods, NULL, ec, NULL);
@@ -938,8 +932,19 @@
 	ec_flag_msi, "MSI hardware", {
 	DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR")}, NULL},
 	{
+	ec_flag_msi, "Quanta hardware", {
+	DMI_MATCH(DMI_SYS_VENDOR, "Quanta"),
+	DMI_MATCH(DMI_PRODUCT_NAME, "TW8/SW8/DW8"),}, NULL},
+	{
+	ec_flag_msi, "Quanta hardware", {
+	DMI_MATCH(DMI_SYS_VENDOR, "Quanta"),
+	DMI_MATCH(DMI_PRODUCT_NAME, "TW9/SW9"),}, NULL},
+	{
 	ec_validate_ecdt, "ASUS hardware", {
 	DMI_MATCH(DMI_BIOS_VENDOR, "ASUS") }, NULL},
+	{
+	ec_validate_ecdt, "ASUS hardware", {
+	DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc.") }, NULL},
 	{},
 };
 

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 4bfb759..ca75b9c 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h

@@ -28,9 +28,10 @@
 int acpi_sysfs_init(void);
 
 #ifdef CONFIG_DEBUG_FS
+extern struct dentry *acpi_debugfs_dir;
 int acpi_debugfs_init(void);
 #else
-static inline int acpi_debugfs_init(void) { return 0; }
+static inline void acpi_debugfs_init(void) { return; }
 #endif
 
 /* --------------------------------------------------------------------------

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 45ad4ff..52ca964 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c

@@ -902,14 +902,6 @@
 
 EXPORT_SYMBOL(acpi_os_wait_events_complete);
 
-/*
- * Deallocate the memory for a spinlock.
- */
-void acpi_os_delete_lock(acpi_spinlock handle)
-{
-	return;
-}
-
 acpi_status
 acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle)
 {
@@ -1341,6 +1333,31 @@
 EXPORT_SYMBOL(acpi_resources_are_enforced);
 
 /*
+ * Create and initialize a spinlock.
+ */
+acpi_status
+acpi_os_create_lock(acpi_spinlock *out_handle)
+{
+	spinlock_t *lock;
+
+	lock = ACPI_ALLOCATE(sizeof(spinlock_t));
+	if (!lock)
+		return AE_NO_MEMORY;
+	spin_lock_init(lock);
+	*out_handle = lock;
+
+	return AE_OK;
+}
+
+/*
+ * Deallocate the memory for a spinlock.
+ */
+void acpi_os_delete_lock(acpi_spinlock handle)
+{
+	ACPI_FREE(handle);
+}
+
+/*
  * Acquire a spinlock.
  *
  * handle is a pointer to the spinlock_t.

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 25bf17d..02d2a4c 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c

@@ -37,7 +37,6 @@
 	{},
 };
 
-#ifdef CONFIG_SMP
 static int map_lapic_id(struct acpi_subtable_header *entry,
 		 u32 acpi_id, int *apic_id)
 {
@@ -165,7 +164,9 @@
 
 int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
 {
+#ifdef CONFIG_SMP
 	int i;
+#endif
 	int apic_id = -1;
 
 	apic_id = map_mat_entry(handle, type, acpi_id);
@@ -174,14 +175,19 @@
 	if (apic_id == -1)
 		return apic_id;
 
+#ifdef CONFIG_SMP
 	for_each_possible_cpu(i) {
 		if (cpu_physical_id(i) == apic_id)
 			return i;
 	}
+#else
+	/* In UP kernel, only processor 0 is valid */
+	if (apic_id == 0)
+		return apic_id;
+#endif
 	return -1;
 }
 EXPORT_SYMBOL_GPL(acpi_get_cpuid);
-#endif
 
 static bool __init processor_physically_present(acpi_handle handle)
 {
@@ -217,7 +223,7 @@
 	type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
 	cpuid = acpi_get_cpuid(handle, type, acpi_id);
 
-	if ((cpuid == -1) && (num_possible_cpus() > 1))
+	if (cpuid == -1)
 		return false;
 
 	return true;

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index d615b7d..431ab11 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c

@@ -161,7 +161,7 @@
 	if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
 		return;
 
-	if (c1e_detected)
+	if (amd_e400_c1e_detected)
 		type = ACPI_STATE_C1;
 
 	/*

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 61891e7..77255f2 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c

@@ -220,6 +220,14 @@
 		  NULL, 0644);
 #endif /* CONFIG_ACPI_DEBUG */
 
+
+/* /sys/modules/acpi/parameters/aml_debug_output */
+
+module_param_named(aml_debug_output, acpi_gbl_enable_aml_debug_object,
+		   bool, 0644);
+MODULE_PARM_DESC(aml_debug_output,
+		 "To enable/disable the ACPI Debug Object output.");
+
 /* /sys/module/acpi/parameters/acpica_version */
 static int param_get_acpica_version(char *buffer, struct kernel_param *kp)
 {

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index db8f885..98de8f4 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c

@@ -1038,6 +1038,7 @@
 {
 	unsigned long flags;
 
+	WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012");
 	spin_lock_irqsave(&floppy_hlt_lock, flags);
 	if (!hlt_disabled) {
 		hlt_disabled = 1;

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6ecf89c..079c088 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c

@@ -6,10 +6,13 @@
 #include <linux/virtio.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
+#include <linux/string_helpers.h>
+#include <scsi/scsi_cmnd.h>
 
 #define PART_BITS 4
 
 static int major, index;
+struct workqueue_struct *virtblk_wq;
 
 struct virtio_blk
 {
@@ -26,6 +29,9 @@
 
 	mempool_t *pool;
 
+	/* Process context for config space updates */
+	struct work_struct config_work;
+
 	/* What host tells us, plus 2 for header & tailer. */
 	unsigned int sg_elems;
 
@@ -141,7 +147,7 @@
 	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
 
 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
-		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
+		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
 		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
 			   sizeof(vbr->in_hdr));
 	}
@@ -291,6 +297,46 @@
 }
 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
 
+static void virtblk_config_changed_work(struct work_struct *work)
+{
+	struct virtio_blk *vblk =
+		container_of(work, struct virtio_blk, config_work);
+	struct virtio_device *vdev = vblk->vdev;
+	struct request_queue *q = vblk->disk->queue;
+	char cap_str_2[10], cap_str_10[10];
+	u64 capacity, size;
+
+	/* Host must always specify the capacity. */
+	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
+			  &capacity, sizeof(capacity));
+
+	/* If capacity is too big, truncate with warning. */
+	if ((sector_t)capacity != capacity) {
+		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
+			 (unsigned long long)capacity);
+		capacity = (sector_t)-1;
+	}
+
+	size = capacity * queue_logical_block_size(q);
+	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
+	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
+
+	dev_notice(&vdev->dev,
+		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
+		  (unsigned long long)capacity,
+		  queue_logical_block_size(q),
+		  cap_str_10, cap_str_2);
+
+	set_capacity(vblk->disk, capacity);
+}
+
+static void virtblk_config_changed(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+
+	queue_work(virtblk_wq, &vblk->config_work);
+}
+
 static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
@@ -327,6 +373,7 @@
 	vblk->vdev = vdev;
 	vblk->sg_elems = sg_elems;
 	sg_init_table(vblk->sg, vblk->sg_elems);
+	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
 
 	/* We expect one virtqueue, for output. */
 	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
@@ -477,6 +524,8 @@
 {
 	struct virtio_blk *vblk = vdev->priv;
 
+	flush_work(&vblk->config_work);
+
 	/* Nothing should be pending. */
 	BUG_ON(!list_empty(&vblk->reqs));
 
@@ -508,27 +557,47 @@
  * Use __refdata to avoid this warning.
  */
 static struct virtio_driver __refdata virtio_blk = {
-	.feature_table = features,
-	.feature_table_size = ARRAY_SIZE(features),
-	.driver.name =	KBUILD_MODNAME,
-	.driver.owner =	THIS_MODULE,
-	.id_table =	id_table,
-	.probe =	virtblk_probe,
-	.remove =	__devexit_p(virtblk_remove),
+	.feature_table		= features,
+	.feature_table_size	= ARRAY_SIZE(features),
+	.driver.name		= KBUILD_MODNAME,
+	.driver.owner		= THIS_MODULE,
+	.id_table		= id_table,
+	.probe			= virtblk_probe,
+	.remove			= __devexit_p(virtblk_remove),
+	.config_changed		= virtblk_config_changed,
 };
 
 static int __init init(void)
 {
+	int error;
+
+	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
+	if (!virtblk_wq)
+		return -ENOMEM;
+
 	major = register_blkdev(0, "virtblk");
-	if (major < 0)
-		return major;
-	return register_virtio_driver(&virtio_blk);
+	if (major < 0) {
+		error = major;
+		goto out_destroy_workqueue;
+	}
+
+	error = register_virtio_driver(&virtio_blk);
+	if (error)
+		goto out_unregister_blkdev;
+	return 0;
+
+out_unregister_blkdev:
+	unregister_blkdev(major, "virtblk");
+out_destroy_workqueue:
+	destroy_workqueue(virtblk_wq);
+	return error;
 }
 
 static void __exit fini(void)
 {
 	unregister_blkdev(major, "virtblk");
 	unregister_virtio_driver(&virtio_blk);
+	destroy_workqueue(virtblk_wq);
 }
 module_init(init);
 module_exit(fini);

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 838568a..fb68b12 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c

@@ -1677,17 +1677,12 @@
 	portdev->config.max_nr_ports = 1;
 	if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) {
 		multiport = true;
-		vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT;
-
 		vdev->config->get(vdev, offsetof(struct virtio_console_config,
 						 max_nr_ports),
 				  &portdev->config.max_nr_ports,
 				  sizeof(portdev->config.max_nr_ports));
 	}
 
-	/* Let the Host know we support multiple ports.*/
-	vdev->config->finalize_features(vdev);
-
 	err = init_vqs(portdev);
 	if (err < 0) {
 		dev_err(&vdev->dev, "Error %d initializing vqs\n", err);

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f508690..c47f3d0 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c

@@ -237,6 +237,7 @@
 	unsigned int power_usage = -1;
 	int i;
 	int multiplier;
+	struct timespec t;
 
 	if (data->needs_update) {
 		menu_update(dev);
@@ -251,8 +252,9 @@
 		return 0;
 
 	/* determine the expected residency time, round up */
+	t = ktime_to_timespec(tick_nohz_get_sleep_length());
 	data->expected_us =
-	    DIV_ROUND_UP((u32)ktime_to_ns(tick_nohz_get_sleep_length()), 1000);
+		t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC;
 
 
 	data->bucket = which_bucket(data->expected_us);

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index a572600..25cf327 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig

@@ -200,16 +200,18 @@
 	  platform_data for a dma-pl330 device.
 
 config PCH_DMA
-	tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7213 IOH DMA support"
+	tristate "Intel EG20T PCH / OKI Semi IOH(ML7213/ML7223) DMA support"
 	depends on PCI && X86
 	select DMA_ENGINE
 	help
 	  Enable support for Intel EG20T PCH DMA engine.
 
-	  This driver also can be used for OKI SEMICONDUCTOR ML7213 IOH(Input/
-	  Output Hub) which is for IVI(In-Vehicle Infotainment) use.
-	  ML7213 is companion chip for Intel Atom E6xx series.
-	  ML7213 is completely compatible for Intel EG20T PCH.
+	  This driver also can be used for OKI SEMICONDUCTOR IOH(Input/
+	  Output Hub), ML7213 and ML7223.
+	  ML7213 IOH is for IVI(In-Vehicle Infotainment) use and ML7223 IOH is
+	  for MP(Media Phone) use.
+	  ML7213/ML7223 is companion chip for Intel Atom E6xx series.
+	  ML7213/ML7223 is completely compatible for Intel EG20T PCH.
 
 config IMX_SDMA
 	tristate "i.MX SDMA support"

diff --git a/drivers/dma/TODO b/drivers/dma/TODO
new file mode 100644
index 0000000..a4af858
--- /dev/null
+++ b/drivers/dma/TODO

@@ -0,0 +1,14 @@
+TODO for slave dma
+
+1. Move remaining drivers to use new slave interface
+2. Remove old slave pointer machansim
+3. Make issue_pending to start the transaction in below drivers
+	- mpc512x_dma
+	- imx-dma
+	- imx-sdma
+	- mxs-dma.c
+	- dw_dmac
+	- intel_mid_dma
+	- ste_dma40
+4. Check other subsystems for dma drivers and merge/move to dmaengine
+5. Remove dma_slave_config's dma direction.

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 235f53b..36144f8 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c

@@ -37,8 +37,8 @@
 
 #define	ATC_DEFAULT_CFG		(ATC_FIFOCFG_HALFFIFO)
 #define	ATC_DEFAULT_CTRLA	(0)
-#define	ATC_DEFAULT_CTRLB	(ATC_SIF(0)	\
-				|ATC_DIF(1))
+#define	ATC_DEFAULT_CTRLB	(ATC_SIF(AT_DMA_MEM_IF) \
+				|ATC_DIF(AT_DMA_MEM_IF))
 
 /*
  * Initial number of descriptors to allocate for each channel. This could
@@ -165,6 +165,29 @@
 }
 
 /**
+ * atc_desc_chain - build chain adding a descripor
+ * @first: address of first descripor of the chain
+ * @prev: address of previous descripor of the chain
+ * @desc: descriptor to queue
+ *
+ * Called from prep_* functions
+ */
+static void atc_desc_chain(struct at_desc **first, struct at_desc **prev,
+			   struct at_desc *desc)
+{
+	if (!(*first)) {
+		*first = desc;
+	} else {
+		/* inform the HW lli about chaining */
+		(*prev)->lli.dscr = desc->txd.phys;
+		/* insert the link descriptor to the LD ring */
+		list_add_tail(&desc->desc_node,
+				&(*first)->tx_list);
+	}
+	*prev = desc;
+}
+
+/**
  * atc_assign_cookie - compute and assign new cookie
  * @atchan: channel we work on
  * @desc: descriptor to assign cookie for
@@ -237,16 +260,12 @@
 static void
 atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
 {
-	dma_async_tx_callback		callback;
-	void				*param;
 	struct dma_async_tx_descriptor	*txd = &desc->txd;
 
 	dev_vdbg(chan2dev(&atchan->chan_common),
 		"descriptor %u complete\n", txd->cookie);
 
 	atchan->completed_cookie = txd->cookie;
-	callback = txd->callback;
-	param = txd->callback_param;
 
 	/* move children to free_list */
 	list_splice_init(&desc->tx_list, &atchan->free_list);
@@ -278,12 +297,19 @@
 		}
 	}
 
-	/*
-	 * The API requires that no submissions are done from a
-	 * callback, so we don't need to drop the lock here
-	 */
-	if (callback)
-		callback(param);
+	/* for cyclic transfers,
+	 * no need to replay callback function while stopping */
+	if (!test_bit(ATC_IS_CYCLIC, &atchan->status)) {
+		dma_async_tx_callback	callback = txd->callback;
+		void			*param = txd->callback_param;
+
+		/*
+		 * The API requires that no submissions are done from a
+		 * callback, so we don't need to drop the lock here
+		 */
+		if (callback)
+			callback(param);
+	}
 
 	dma_run_dependencies(txd);
 }
@@ -419,6 +445,26 @@
 	atc_chain_complete(atchan, bad_desc);
 }
 
+/**
+ * atc_handle_cyclic - at the end of a period, run callback function
+ * @atchan: channel used for cyclic operations
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_handle_cyclic(struct at_dma_chan *atchan)
+{
+	struct at_desc			*first = atc_first_active(atchan);
+	struct dma_async_tx_descriptor	*txd = &first->txd;
+	dma_async_tx_callback		callback = txd->callback;
+	void				*param = txd->callback_param;
+
+	dev_vdbg(chan2dev(&atchan->chan_common),
+			"new cyclic period llp 0x%08x\n",
+			channel_readl(atchan, DSCR));
+
+	if (callback)
+		callback(param);
+}
 
 /*--  IRQ & Tasklet  ---------------------------------------------------*/
 
@@ -426,16 +472,11 @@
 {
 	struct at_dma_chan *atchan = (struct at_dma_chan *)data;
 
-	/* Channel cannot be enabled here */
-	if (atc_chan_is_enabled(atchan)) {
-		dev_err(chan2dev(&atchan->chan_common),
-			"BUG: channel enabled in tasklet\n");
-		return;
-	}
-
 	spin_lock(&atchan->lock);
-	if (test_and_clear_bit(0, &atchan->error_status))
+	if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status))
 		atc_handle_error(atchan);
+	else if (test_bit(ATC_IS_CYCLIC, &atchan->status))
+		atc_handle_cyclic(atchan);
 	else
 		atc_advance_work(atchan);
 
@@ -464,12 +505,13 @@
 
 		for (i = 0; i < atdma->dma_common.chancnt; i++) {
 			atchan = &atdma->chan[i];
-			if (pending & (AT_DMA_CBTC(i) | AT_DMA_ERR(i))) {
+			if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) {
 				if (pending & AT_DMA_ERR(i)) {
 					/* Disable channel on AHB error */
-					dma_writel(atdma, CHDR, atchan->mask);
+					dma_writel(atdma, CHDR,
+						AT_DMA_RES(i) | atchan->mask);
 					/* Give information to tasklet */
-					set_bit(0, &atchan->error_status);
+					set_bit(ATC_IS_ERROR, &atchan->status);
 				}
 				tasklet_schedule(&atchan->tasklet);
 				ret = IRQ_HANDLED;
@@ -549,7 +591,7 @@
 	}
 
 	ctrla =   ATC_DEFAULT_CTRLA;
-	ctrlb =   ATC_DEFAULT_CTRLB
+	ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
 		| ATC_SRC_ADDR_MODE_INCR
 		| ATC_DST_ADDR_MODE_INCR
 		| ATC_FC_MEM2MEM;
@@ -584,16 +626,7 @@
 
 		desc->txd.cookie = 0;
 
-		if (!first) {
-			first = desc;
-		} else {
-			/* inform the HW lli about chaining */
-			prev->lli.dscr = desc->txd.phys;
-			/* insert the link descriptor to the LD ring */
-			list_add_tail(&desc->desc_node,
-					&first->tx_list);
-		}
-		prev = desc;
+		atc_desc_chain(&first, &prev, desc);
 	}
 
 	/* First descriptor of the chain embedds additional information */
@@ -639,7 +672,8 @@
 	struct scatterlist	*sg;
 	size_t			total_len = 0;
 
-	dev_vdbg(chan2dev(chan), "prep_slave_sg: %s f0x%lx\n",
+	dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n",
+			sg_len,
 			direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
 			flags);
 
@@ -651,14 +685,15 @@
 	reg_width = atslave->reg_width;
 
 	ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
-	ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN;
+	ctrlb = ATC_IEN;
 
 	switch (direction) {
 	case DMA_TO_DEVICE:
 		ctrla |=  ATC_DST_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
 			| ATC_SRC_ADDR_MODE_INCR
-			| ATC_FC_MEM2PER;
+			| ATC_FC_MEM2PER
+			| ATC_SIF(AT_DMA_MEM_IF) | ATC_DIF(AT_DMA_PER_IF);
 		reg = atslave->tx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct at_desc	*desc;
@@ -682,16 +717,7 @@
 					| len >> mem_width;
 			desc->lli.ctrlb = ctrlb;
 
-			if (!first) {
-				first = desc;
-			} else {
-				/* inform the HW lli about chaining */
-				prev->lli.dscr = desc->txd.phys;
-				/* insert the link descriptor to the LD ring */
-				list_add_tail(&desc->desc_node,
-						&first->tx_list);
-			}
-			prev = desc;
+			atc_desc_chain(&first, &prev, desc);
 			total_len += len;
 		}
 		break;
@@ -699,7 +725,8 @@
 		ctrla |=  ATC_SRC_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_INCR
 			| ATC_SRC_ADDR_MODE_FIXED
-			| ATC_FC_PER2MEM;
+			| ATC_FC_PER2MEM
+			| ATC_SIF(AT_DMA_PER_IF) | ATC_DIF(AT_DMA_MEM_IF);
 
 		reg = atslave->rx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
@@ -724,16 +751,7 @@
 					| len >> reg_width;
 			desc->lli.ctrlb = ctrlb;
 
-			if (!first) {
-				first = desc;
-			} else {
-				/* inform the HW lli about chaining */
-				prev->lli.dscr = desc->txd.phys;
-				/* insert the link descriptor to the LD ring */
-				list_add_tail(&desc->desc_node,
-						&first->tx_list);
-			}
-			prev = desc;
+			atc_desc_chain(&first, &prev, desc);
 			total_len += len;
 		}
 		break;
@@ -759,41 +777,211 @@
 	return NULL;
 }
 
+/**
+ * atc_dma_cyclic_check_values
+ * Check for too big/unaligned periods and unaligned DMA buffer
+ */
+static int
+atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
+		size_t period_len, enum dma_data_direction direction)
+{
+	if (period_len > (ATC_BTSIZE_MAX << reg_width))
+		goto err_out;
+	if (unlikely(period_len & ((1 << reg_width) - 1)))
+		goto err_out;
+	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
+		goto err_out;
+	if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+		goto err_out;
+
+	return 0;
+
+err_out:
+	return -EINVAL;
+}
+
+/**
+ * atc_dma_cyclic_fill_desc - Fill one period decriptor
+ */
+static int
+atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
+		unsigned int period_index, dma_addr_t buf_addr,
+		size_t period_len, enum dma_data_direction direction)
+{
+	u32		ctrla;
+	unsigned int	reg_width = atslave->reg_width;
+
+	/* prepare common CRTLA value */
+	ctrla =   ATC_DEFAULT_CTRLA | atslave->ctrla
+		| ATC_DST_WIDTH(reg_width)
+		| ATC_SRC_WIDTH(reg_width)
+		| period_len >> reg_width;
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		desc->lli.saddr = buf_addr + (period_len * period_index);
+		desc->lli.daddr = atslave->tx_reg;
+		desc->lli.ctrla = ctrla;
+		desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED
+				| ATC_SRC_ADDR_MODE_INCR
+				| ATC_FC_MEM2PER
+				| ATC_SIF(AT_DMA_MEM_IF)
+				| ATC_DIF(AT_DMA_PER_IF);
+		break;
+
+	case DMA_FROM_DEVICE:
+		desc->lli.saddr = atslave->rx_reg;
+		desc->lli.daddr = buf_addr + (period_len * period_index);
+		desc->lli.ctrla = ctrla;
+		desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR
+				| ATC_SRC_ADDR_MODE_FIXED
+				| ATC_FC_PER2MEM
+				| ATC_SIF(AT_DMA_PER_IF)
+				| ATC_DIF(AT_DMA_MEM_IF);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * atc_prep_dma_cyclic - prepare the cyclic DMA transfer
+ * @chan: the DMA channel to prepare
+ * @buf_addr: physical DMA address where the buffer starts
+ * @buf_len: total number of bytes for the entire buffer
+ * @period_len: number of bytes for each period
+ * @direction: transfer direction, to or from device
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma_slave	*atslave = chan->private;
+	struct at_desc		*first = NULL;
+	struct at_desc		*prev = NULL;
+	unsigned long		was_cyclic;
+	unsigned int		periods = buf_len / period_len;
+	unsigned int		i;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n",
+			direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+			buf_addr,
+			periods, buf_len, period_len);
+
+	if (unlikely(!atslave || !buf_len || !period_len)) {
+		dev_dbg(chan2dev(chan), "prep_dma_cyclic: length is zero!\n");
+		return NULL;
+	}
+
+	was_cyclic = test_and_set_bit(ATC_IS_CYCLIC, &atchan->status);
+	if (was_cyclic) {
+		dev_dbg(chan2dev(chan), "prep_dma_cyclic: channel in use!\n");
+		return NULL;
+	}
+
+	/* Check for too big/unaligned periods and unaligned DMA buffer */
+	if (atc_dma_cyclic_check_values(atslave->reg_width, buf_addr,
+					period_len, direction))
+		goto err_out;
+
+	/* build cyclic linked list */
+	for (i = 0; i < periods; i++) {
+		struct at_desc	*desc;
+
+		desc = atc_desc_get(atchan);
+		if (!desc)
+			goto err_desc_get;
+
+		if (atc_dma_cyclic_fill_desc(atslave, desc, i, buf_addr,
+						period_len, direction))
+			goto err_desc_get;
+
+		atc_desc_chain(&first, &prev, desc);
+	}
+
+	/* lets make a cyclic list */
+	prev->lli.dscr = first->txd.phys;
+
+	/* First descriptor of the chain embedds additional information */
+	first->txd.cookie = -EBUSY;
+	first->len = buf_len;
+
+	return &first->txd;
+
+err_desc_get:
+	dev_err(chan2dev(chan), "not enough descriptors available\n");
+	atc_desc_put(atchan, first);
+err_out:
+	clear_bit(ATC_IS_CYCLIC, &atchan->status);
+	return NULL;
+}
+
+
 static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		       unsigned long arg)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
-	struct at_desc		*desc, *_desc;
+	int			chan_id = atchan->chan_common.chan_id;
+
 	LIST_HEAD(list);
 
-	/* Only supports DMA_TERMINATE_ALL */
-	if (cmd != DMA_TERMINATE_ALL)
+	dev_vdbg(chan2dev(chan), "atc_control (%d)\n", cmd);
+
+	if (cmd == DMA_PAUSE) {
+		spin_lock_bh(&atchan->lock);
+
+		dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id));
+		set_bit(ATC_IS_PAUSED, &atchan->status);
+
+		spin_unlock_bh(&atchan->lock);
+	} else if (cmd == DMA_RESUME) {
+		if (!test_bit(ATC_IS_PAUSED, &atchan->status))
+			return 0;
+
+		spin_lock_bh(&atchan->lock);
+
+		dma_writel(atdma, CHDR, AT_DMA_RES(chan_id));
+		clear_bit(ATC_IS_PAUSED, &atchan->status);
+
+		spin_unlock_bh(&atchan->lock);
+	} else if (cmd == DMA_TERMINATE_ALL) {
+		struct at_desc	*desc, *_desc;
+		/*
+		 * This is only called when something went wrong elsewhere, so
+		 * we don't really care about the data. Just disable the
+		 * channel. We still have to poll the channel enable bit due
+		 * to AHB/HSB limitations.
+		 */
+		spin_lock_bh(&atchan->lock);
+
+		/* disabling channel: must also remove suspend state */
+		dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask);
+
+		/* confirm that this channel is disabled */
+		while (dma_readl(atdma, CHSR) & atchan->mask)
+			cpu_relax();
+
+		/* active_list entries will end up before queued entries */
+		list_splice_init(&atchan->queue, &list);
+		list_splice_init(&atchan->active_list, &list);
+
+		/* Flush all pending and queued descriptors */
+		list_for_each_entry_safe(desc, _desc, &list, desc_node)
+			atc_chain_complete(atchan, desc);
+
+		clear_bit(ATC_IS_PAUSED, &atchan->status);
+		/* if channel dedicated to cyclic operations, free it */
+		clear_bit(ATC_IS_CYCLIC, &atchan->status);
+
+		spin_unlock_bh(&atchan->lock);
+	} else {
 		return -ENXIO;
-
-	/*
-	 * This is only called when something went wrong elsewhere, so
-	 * we don't really care about the data. Just disable the
-	 * channel. We still have to poll the channel enable bit due
-	 * to AHB/HSB limitations.
-	 */
-	spin_lock_bh(&atchan->lock);
-
-	dma_writel(atdma, CHDR, atchan->mask);
-
-	/* confirm that this channel is disabled */
-	while (dma_readl(atdma, CHSR) & atchan->mask)
-		cpu_relax();
-
-	/* active_list entries will end up before queued entries */
-	list_splice_init(&atchan->queue, &list);
-	list_splice_init(&atchan->active_list, &list);
-
-	/* Flush all pending and queued descriptors */
-	list_for_each_entry_safe(desc, _desc, &list, desc_node)
-		atc_chain_complete(atchan, desc);
-
-	spin_unlock_bh(&atchan->lock);
+	}
 
 	return 0;
 }
@@ -835,9 +1023,17 @@
 
 	spin_unlock_bh(&atchan->lock);
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	dev_vdbg(chan2dev(chan), "tx_status: %d (d%d, u%d)\n",
-		 cookie, last_complete ? last_complete : 0,
+	if (ret != DMA_SUCCESS)
+		dma_set_tx_state(txstate, last_complete, last_used,
+			atc_first_active(atchan)->len);
+	else
+		dma_set_tx_state(txstate, last_complete, last_used, 0);
+
+	if (test_bit(ATC_IS_PAUSED, &atchan->status))
+		ret = DMA_PAUSED;
+
+	dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d (d%d, u%d)\n",
+		 ret, cookie, last_complete ? last_complete : 0,
 		 last_used ? last_used : 0);
 
 	return ret;
@@ -853,6 +1049,10 @@
 
 	dev_vdbg(chan2dev(chan), "issue_pending\n");
 
+	/* Not needed for cyclic transfers */
+	if (test_bit(ATC_IS_CYCLIC, &atchan->status))
+		return;
+
 	spin_lock_bh(&atchan->lock);
 	if (!atc_chan_is_enabled(atchan)) {
 		atc_advance_work(atchan);
@@ -959,6 +1159,7 @@
 	}
 	list_splice_init(&atchan->free_list, &list);
 	atchan->descs_allocated = 0;
+	atchan->status = 0;
 
 	dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
 }
@@ -1092,10 +1293,15 @@
 	if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
 		atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
 
-	if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) {
+	if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask))
 		atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg;
+
+	if (dma_has_cap(DMA_CYCLIC, atdma->dma_common.cap_mask))
+		atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic;
+
+	if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ||
+	    dma_has_cap(DMA_CYCLIC, atdma->dma_common.cap_mask))
 		atdma->dma_common.device_control = atc_control;
-	}
 
 	dma_writel(atdma, EN, AT_DMA_ENABLE);
 

diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index 495457e..087dbf1 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h

@@ -103,6 +103,10 @@
 /* Bitfields in CTRLB */
 #define	ATC_SIF(i)		(0x3 & (i))	/* Src tx done via AHB-Lite Interface i */
 #define	ATC_DIF(i)		((0x3 & (i)) <<  4)	/* Dst tx done via AHB-Lite Interface i */
+				  /* Specify AHB interfaces */
+#define AT_DMA_MEM_IF		0 /* interface 0 as memory interface */
+#define AT_DMA_PER_IF		1 /* interface 1 as peripheral interface */
+
 #define	ATC_SRC_PIP		(0x1 <<  8)	/* Source Picture-in-Picture enabled */
 #define	ATC_DST_PIP		(0x1 << 12)	/* Destination Picture-in-Picture enabled */
 #define	ATC_SRC_DSCR_DIS	(0x1 << 16)	/* Src Descriptor fetch disable */
@@ -181,12 +185,23 @@
 /*--  Channels  --------------------------------------------------------*/
 
 /**
+ * atc_status - information bits stored in channel status flag
+ *
+ * Manipulated with atomic operations.
+ */
+enum atc_status {
+	ATC_IS_ERROR = 0,
+	ATC_IS_PAUSED = 1,
+	ATC_IS_CYCLIC = 24,
+};
+
+/**
  * struct at_dma_chan - internal representation of an Atmel HDMAC channel
  * @chan_common: common dmaengine channel object members
  * @device: parent device
  * @ch_regs: memory mapped register base
  * @mask: channel index in a mask
- * @error_status: transmit error status information from irq handler
+ * @status: transmit status information from irq/prep* functions
  *                to tasklet (use atomic operations)
  * @tasklet: bottom half to finish transaction work
  * @lock: serializes enqueue/dequeue operations to descriptors lists
@@ -201,7 +216,7 @@
 	struct at_dma		*device;
 	void __iomem		*ch_regs;
 	u8			mask;
-	unsigned long		error_status;
+	unsigned long		status;
 	struct tasklet_struct	tasklet;
 
 	spinlock_t		lock;
@@ -309,8 +324,8 @@
 	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
 	u32		ebci;
 
-	/* enable interrupts on buffer chain completion & error */
-	ebci =    AT_DMA_CBTC(atchan->chan_common.chan_id)
+	/* enable interrupts on buffer transfer completion & error */
+	ebci =    AT_DMA_BTC(atchan->chan_common.chan_id)
 		| AT_DMA_ERR(atchan->chan_common.chan_id);
 	if (on)
 		dma_writel(atdma, EBCIER, ebci);
@@ -347,7 +362,12 @@
  */
 static void set_desc_eol(struct at_desc *desc)
 {
-	desc->lli.ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS;
+	u32 ctrlb = desc->lli.ctrlb;
+
+	ctrlb &= ~ATC_IEN;
+	ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS;
+
+	desc->lli.ctrlb = ctrlb;
 	desc->lli.dscr = 0;
 }
 

diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index f48e540..af8c0b5 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c

@@ -1610,7 +1610,7 @@
 {
 	return platform_driver_probe(&coh901318_driver, coh901318_probe);
 }
-arch_initcall(coh901318_init);
+subsys_initcall(coh901318_init);
 
 void __exit coh901318_exit(void)
 {

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 2a2e2fa..4d180ca 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c

@@ -3,6 +3,7 @@
  * AVR32 systems.)
  *
  * Copyright (C) 2007-2008 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -93,8 +94,9 @@
 	struct dw_desc *desc, *_desc;
 	struct dw_desc *ret = NULL;
 	unsigned int i = 0;
+	unsigned long flags;
 
-	spin_lock_bh(&dwc->lock);
+	spin_lock_irqsave(&dwc->lock, flags);
 	list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
 		if (async_tx_test_ack(&desc->txd)) {
 			list_del(&desc->desc_node);
@@ -104,7 +106,7 @@
 		dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
 		i++;
 	}
-	spin_unlock_bh(&dwc->lock);
+	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i);
 
@@ -130,12 +132,14 @@
  */
 static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
 {
+	unsigned long flags;
+
 	if (desc) {
 		struct dw_desc *child;
 
 		dwc_sync_desc_for_cpu(dwc, desc);
 
-		spin_lock_bh(&dwc->lock);
+		spin_lock_irqsave(&dwc->lock, flags);
 		list_for_each_entry(child, &desc->tx_list, desc_node)
 			dev_vdbg(chan2dev(&dwc->chan),
 					"moving child desc %p to freelist\n",
@@ -143,7 +147,7 @@
 		list_splice_init(&desc->tx_list, &dwc->free_list);
 		dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
 		list_add(&desc->desc_node, &dwc->free_list);
-		spin_unlock_bh(&dwc->lock);
+		spin_unlock_irqrestore(&dwc->lock, flags);
 	}
 }
 
@@ -195,18 +199,23 @@
 /*----------------------------------------------------------------------*/
 
 static void
-dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
+dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
+		bool callback_required)
 {
-	dma_async_tx_callback		callback;
-	void				*param;
+	dma_async_tx_callback		callback = NULL;
+	void				*param = NULL;
 	struct dma_async_tx_descriptor	*txd = &desc->txd;
 	struct dw_desc			*child;
+	unsigned long			flags;
 
 	dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
 
+	spin_lock_irqsave(&dwc->lock, flags);
 	dwc->completed = txd->cookie;
-	callback = txd->callback;
-	param = txd->callback_param;
+	if (callback_required) {
+		callback = txd->callback;
+		param = txd->callback_param;
+	}
 
 	dwc_sync_desc_for_cpu(dwc, desc);
 
@@ -238,11 +247,9 @@
 		}
 	}
 
-	/*
-	 * The API requires that no submissions are done from a
-	 * callback, so we don't need to drop the lock here
-	 */
-	if (callback)
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	if (callback_required && callback)
 		callback(param);
 }
 
@@ -250,7 +257,9 @@
 {
 	struct dw_desc *desc, *_desc;
 	LIST_HEAD(list);
+	unsigned long flags;
 
+	spin_lock_irqsave(&dwc->lock, flags);
 	if (dma_readl(dw, CH_EN) & dwc->mask) {
 		dev_err(chan2dev(&dwc->chan),
 			"BUG: XFER bit set, but channel not idle!\n");
@@ -271,8 +280,10 @@
 		dwc_dostart(dwc, dwc_first_active(dwc));
 	}
 
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
 	list_for_each_entry_safe(desc, _desc, &list, desc_node)
-		dwc_descriptor_complete(dwc, desc);
+		dwc_descriptor_complete(dwc, desc, true);
 }
 
 static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
@@ -281,7 +292,9 @@
 	struct dw_desc *desc, *_desc;
 	struct dw_desc *child;
 	u32 status_xfer;
+	unsigned long flags;
 
+	spin_lock_irqsave(&dwc->lock, flags);
 	/*
 	 * Clear block interrupt flag before scanning so that we don't
 	 * miss any, and read LLP before RAW_XFER to ensure it is
@@ -294,30 +307,47 @@
 	if (status_xfer & dwc->mask) {
 		/* Everything we've submitted is done */
 		dma_writel(dw, CLEAR.XFER, dwc->mask);
+		spin_unlock_irqrestore(&dwc->lock, flags);
+
 		dwc_complete_all(dw, dwc);
 		return;
 	}
 
-	if (list_empty(&dwc->active_list))
+	if (list_empty(&dwc->active_list)) {
+		spin_unlock_irqrestore(&dwc->lock, flags);
 		return;
+	}
 
 	dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp);
 
 	list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
-		if (desc->lli.llp == llp)
-			/* This one is currently in progress */
+		/* check first descriptors addr */
+		if (desc->txd.phys == llp) {
+			spin_unlock_irqrestore(&dwc->lock, flags);
 			return;
+		}
+
+		/* check first descriptors llp */
+		if (desc->lli.llp == llp) {
+			/* This one is currently in progress */
+			spin_unlock_irqrestore(&dwc->lock, flags);
+			return;
+		}
 
 		list_for_each_entry(child, &desc->tx_list, desc_node)
-			if (child->lli.llp == llp)
+			if (child->lli.llp == llp) {
 				/* Currently in progress */
+				spin_unlock_irqrestore(&dwc->lock, flags);
 				return;
+			}
 
 		/*
 		 * No descriptors so far seem to be in progress, i.e.
 		 * this one must be done.
 		 */
-		dwc_descriptor_complete(dwc, desc);
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		dwc_descriptor_complete(dwc, desc, true);
+		spin_lock_irqsave(&dwc->lock, flags);
 	}
 
 	dev_err(chan2dev(&dwc->chan),
@@ -332,6 +362,7 @@
 		list_move(dwc->queue.next, &dwc->active_list);
 		dwc_dostart(dwc, dwc_first_active(dwc));
 	}
+	spin_unlock_irqrestore(&dwc->lock, flags);
 }
 
 static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
@@ -346,9 +377,12 @@
 {
 	struct dw_desc *bad_desc;
 	struct dw_desc *child;
+	unsigned long flags;
 
 	dwc_scan_descriptors(dw, dwc);
 
+	spin_lock_irqsave(&dwc->lock, flags);
+
 	/*
 	 * The descriptor currently at the head of the active list is
 	 * borked. Since we don't have any way to report errors, we'll
@@ -378,8 +412,10 @@
 	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
 		dwc_dump_lli(dwc, &child->lli);
 
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
 	/* Pretend the descriptor completed successfully */
-	dwc_descriptor_complete(dwc, bad_desc);
+	dwc_descriptor_complete(dwc, bad_desc, true);
 }
 
 /* --------------------- Cyclic DMA API extensions -------------------- */
@@ -402,6 +438,8 @@
 static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 		u32 status_block, u32 status_err, u32 status_xfer)
 {
+	unsigned long flags;
+
 	if (status_block & dwc->mask) {
 		void (*callback)(void *param);
 		void *callback_param;
@@ -412,11 +450,9 @@
 
 		callback = dwc->cdesc->period_callback;
 		callback_param = dwc->cdesc->period_callback_param;
-		if (callback) {
-			spin_unlock(&dwc->lock);
+
+		if (callback)
 			callback(callback_param);
-			spin_lock(&dwc->lock);
-		}
 	}
 
 	/*
@@ -430,6 +466,9 @@
 		dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s "
 				"interrupt, stopping DMA transfer\n",
 				status_xfer ? "xfer" : "error");
+
+		spin_lock_irqsave(&dwc->lock, flags);
+
 		dev_err(chan2dev(&dwc->chan),
 			"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
 			channel_readl(dwc, SAR),
@@ -453,6 +492,8 @@
 
 		for (i = 0; i < dwc->cdesc->periods; i++)
 			dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli);
+
+		spin_unlock_irqrestore(&dwc->lock, flags);
 	}
 }
 
@@ -476,7 +517,6 @@
 
 	for (i = 0; i < dw->dma.chancnt; i++) {
 		dwc = &dw->chan[i];
-		spin_lock(&dwc->lock);
 		if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
 			dwc_handle_cyclic(dw, dwc, status_block, status_err,
 					status_xfer);
@@ -484,7 +524,6 @@
 			dwc_handle_error(dw, dwc);
 		else if ((status_block | status_xfer) & (1 << i))
 			dwc_scan_descriptors(dw, dwc);
-		spin_unlock(&dwc->lock);
 	}
 
 	/*
@@ -539,8 +578,9 @@
 	struct dw_desc		*desc = txd_to_dw_desc(tx);
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(tx->chan);
 	dma_cookie_t		cookie;
+	unsigned long		flags;
 
-	spin_lock_bh(&dwc->lock);
+	spin_lock_irqsave(&dwc->lock, flags);
 	cookie = dwc_assign_cookie(dwc, desc);
 
 	/*
@@ -560,7 +600,7 @@
 		list_add_tail(&desc->desc_node, &dwc->queue);
 	}
 
-	spin_unlock_bh(&dwc->lock);
+	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	return cookie;
 }
@@ -689,15 +729,7 @@
 		reg = dws->tx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
-			u32		len;
-			u32		mem;
-
-			desc = dwc_desc_get(dwc);
-			if (!desc) {
-				dev_err(chan2dev(chan),
-					"not enough descriptors available\n");
-				goto err_desc_get;
-			}
+			u32		len, dlen, mem;
 
 			mem = sg_phys(sg);
 			len = sg_dma_len(sg);
@@ -705,10 +737,27 @@
 			if (unlikely(mem & 3 || len & 3))
 				mem_width = 0;
 
+slave_sg_todev_fill_desc:
+			desc = dwc_desc_get(dwc);
+			if (!desc) {
+				dev_err(chan2dev(chan),
+					"not enough descriptors available\n");
+				goto err_desc_get;
+			}
+
 			desc->lli.sar = mem;
 			desc->lli.dar = reg;
 			desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
-			desc->lli.ctlhi = len >> mem_width;
+			if ((len >> mem_width) > DWC_MAX_COUNT) {
+				dlen = DWC_MAX_COUNT << mem_width;
+				mem += dlen;
+				len -= dlen;
+			} else {
+				dlen = len;
+				len = 0;
+			}
+
+			desc->lli.ctlhi = dlen >> mem_width;
 
 			if (!first) {
 				first = desc;
@@ -722,7 +771,10 @@
 						&first->tx_list);
 			}
 			prev = desc;
-			total_len += len;
+			total_len += dlen;
+
+			if (len)
+				goto slave_sg_todev_fill_desc;
 		}
 		break;
 	case DMA_FROM_DEVICE:
@@ -735,15 +787,7 @@
 		reg = dws->rx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
-			u32		len;
-			u32		mem;
-
-			desc = dwc_desc_get(dwc);
-			if (!desc) {
-				dev_err(chan2dev(chan),
-					"not enough descriptors available\n");
-				goto err_desc_get;
-			}
+			u32		len, dlen, mem;
 
 			mem = sg_phys(sg);
 			len = sg_dma_len(sg);
@@ -751,10 +795,26 @@
 			if (unlikely(mem & 3 || len & 3))
 				mem_width = 0;
 
+slave_sg_fromdev_fill_desc:
+			desc = dwc_desc_get(dwc);
+			if (!desc) {
+				dev_err(chan2dev(chan),
+						"not enough descriptors available\n");
+				goto err_desc_get;
+			}
+
 			desc->lli.sar = reg;
 			desc->lli.dar = mem;
 			desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
-			desc->lli.ctlhi = len >> reg_width;
+			if ((len >> reg_width) > DWC_MAX_COUNT) {
+				dlen = DWC_MAX_COUNT << reg_width;
+				mem += dlen;
+				len -= dlen;
+			} else {
+				dlen = len;
+				len = 0;
+			}
+			desc->lli.ctlhi = dlen >> reg_width;
 
 			if (!first) {
 				first = desc;
@@ -768,7 +828,10 @@
 						&first->tx_list);
 			}
 			prev = desc;
-			total_len += len;
+			total_len += dlen;
+
+			if (len)
+				goto slave_sg_fromdev_fill_desc;
 		}
 		break;
 	default:
@@ -799,35 +862,52 @@
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma		*dw = to_dw_dma(chan->device);
 	struct dw_desc		*desc, *_desc;
+	unsigned long		flags;
+	u32			cfglo;
 	LIST_HEAD(list);
 
-	/* Only supports DMA_TERMINATE_ALL */
-	if (cmd != DMA_TERMINATE_ALL)
+	if (cmd == DMA_PAUSE) {
+		spin_lock_irqsave(&dwc->lock, flags);
+
+		cfglo = channel_readl(dwc, CFG_LO);
+		channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+		while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY))
+			cpu_relax();
+
+		dwc->paused = true;
+		spin_unlock_irqrestore(&dwc->lock, flags);
+	} else if (cmd == DMA_RESUME) {
+		if (!dwc->paused)
+			return 0;
+
+		spin_lock_irqsave(&dwc->lock, flags);
+
+		cfglo = channel_readl(dwc, CFG_LO);
+		channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+		dwc->paused = false;
+
+		spin_unlock_irqrestore(&dwc->lock, flags);
+	} else if (cmd == DMA_TERMINATE_ALL) {
+		spin_lock_irqsave(&dwc->lock, flags);
+
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+		while (dma_readl(dw, CH_EN) & dwc->mask)
+			cpu_relax();
+
+		dwc->paused = false;
+
+		/* active_list entries will end up before queued entries */
+		list_splice_init(&dwc->queue, &list);
+		list_splice_init(&dwc->active_list, &list);
+
+		spin_unlock_irqrestore(&dwc->lock, flags);
+
+		/* Flush all pending and queued descriptors */
+		list_for_each_entry_safe(desc, _desc, &list, desc_node)
+			dwc_descriptor_complete(dwc, desc, false);
+	} else
 		return -ENXIO;
 
-	/*
-	 * This is only called when something went wrong elsewhere, so
-	 * we don't really care about the data. Just disable the
-	 * channel. We still have to poll the channel enable bit due
-	 * to AHB/HSB limitations.
-	 */
-	spin_lock_bh(&dwc->lock);
-
-	channel_clear_bit(dw, CH_EN, dwc->mask);
-
-	while (dma_readl(dw, CH_EN) & dwc->mask)
-		cpu_relax();
-
-	/* active_list entries will end up before queued entries */
-	list_splice_init(&dwc->queue, &list);
-	list_splice_init(&dwc->active_list, &list);
-
-	spin_unlock_bh(&dwc->lock);
-
-	/* Flush all pending and queued descriptors */
-	list_for_each_entry_safe(desc, _desc, &list, desc_node)
-		dwc_descriptor_complete(dwc, desc);
-
 	return 0;
 }
 
@@ -846,9 +926,7 @@
 
 	ret = dma_async_is_complete(cookie, last_complete, last_used);
 	if (ret != DMA_SUCCESS) {
-		spin_lock_bh(&dwc->lock);
 		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
-		spin_unlock_bh(&dwc->lock);
 
 		last_complete = dwc->completed;
 		last_used = chan->cookie;
@@ -856,7 +934,14 @@
 		ret = dma_async_is_complete(cookie, last_complete, last_used);
 	}
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
+	if (ret != DMA_SUCCESS)
+		dma_set_tx_state(txstate, last_complete, last_used,
+				dwc_first_active(dwc)->len);
+	else
+		dma_set_tx_state(txstate, last_complete, last_used, 0);
+
+	if (dwc->paused)
+		return DMA_PAUSED;
 
 	return ret;
 }
@@ -865,10 +950,8 @@
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 
-	spin_lock_bh(&dwc->lock);
 	if (!list_empty(&dwc->queue))
 		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
-	spin_unlock_bh(&dwc->lock);
 }
 
 static int dwc_alloc_chan_resources(struct dma_chan *chan)
@@ -880,6 +963,7 @@
 	int			i;
 	u32			cfghi;
 	u32			cfglo;
+	unsigned long		flags;
 
 	dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
 
@@ -917,16 +1001,16 @@
 	 * doesn't mean what you think it means), and status writeback.
 	 */
 
-	spin_lock_bh(&dwc->lock);
+	spin_lock_irqsave(&dwc->lock, flags);
 	i = dwc->descs_allocated;
 	while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
-		spin_unlock_bh(&dwc->lock);
+		spin_unlock_irqrestore(&dwc->lock, flags);
 
 		desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL);
 		if (!desc) {
 			dev_info(chan2dev(chan),
 				"only allocated %d descriptors\n", i);
-			spin_lock_bh(&dwc->lock);
+			spin_lock_irqsave(&dwc->lock, flags);
 			break;
 		}
 
@@ -938,7 +1022,7 @@
 				sizeof(desc->lli), DMA_TO_DEVICE);
 		dwc_desc_put(dwc, desc);
 
-		spin_lock_bh(&dwc->lock);
+		spin_lock_irqsave(&dwc->lock, flags);
 		i = ++dwc->descs_allocated;
 	}
 
@@ -947,7 +1031,7 @@
 	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 	channel_set_bit(dw, MASK.ERROR, dwc->mask);
 
-	spin_unlock_bh(&dwc->lock);
+	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	dev_dbg(chan2dev(chan),
 		"alloc_chan_resources allocated %d descriptors\n", i);
@@ -960,6 +1044,7 @@
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma		*dw = to_dw_dma(chan->device);
 	struct dw_desc		*desc, *_desc;
+	unsigned long		flags;
 	LIST_HEAD(list);
 
 	dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n",
@@ -970,7 +1055,7 @@
 	BUG_ON(!list_empty(&dwc->queue));
 	BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
 
-	spin_lock_bh(&dwc->lock);
+	spin_lock_irqsave(&dwc->lock, flags);
 	list_splice_init(&dwc->free_list, &list);
 	dwc->descs_allocated = 0;
 
@@ -979,7 +1064,7 @@
 	channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
 	channel_clear_bit(dw, MASK.ERROR, dwc->mask);
 
-	spin_unlock_bh(&dwc->lock);
+	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	list_for_each_entry_safe(desc, _desc, &list, desc_node) {
 		dev_vdbg(chan2dev(chan), "  freeing descriptor %p\n", desc);
@@ -1004,13 +1089,14 @@
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
+	unsigned long		flags;
 
 	if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) {
 		dev_err(chan2dev(&dwc->chan), "missing prep for cyclic DMA\n");
 		return -ENODEV;
 	}
 
-	spin_lock(&dwc->lock);
+	spin_lock_irqsave(&dwc->lock, flags);
 
 	/* assert channel is idle */
 	if (dma_readl(dw, CH_EN) & dwc->mask) {
@@ -1023,7 +1109,7 @@
 			channel_readl(dwc, LLP),
 			channel_readl(dwc, CTL_HI),
 			channel_readl(dwc, CTL_LO));
-		spin_unlock(&dwc->lock);
+		spin_unlock_irqrestore(&dwc->lock, flags);
 		return -EBUSY;
 	}
 
@@ -1038,7 +1124,7 @@
 
 	channel_set_bit(dw, CH_EN, dwc->mask);
 
-	spin_unlock(&dwc->lock);
+	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	return 0;
 }
@@ -1054,14 +1140,15 @@
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
+	unsigned long		flags;
 
-	spin_lock(&dwc->lock);
+	spin_lock_irqsave(&dwc->lock, flags);
 
 	channel_clear_bit(dw, CH_EN, dwc->mask);
 	while (dma_readl(dw, CH_EN) & dwc->mask)
 		cpu_relax();
 
-	spin_unlock(&dwc->lock);
+	spin_unlock_irqrestore(&dwc->lock, flags);
 }
 EXPORT_SYMBOL(dw_dma_cyclic_stop);
 
@@ -1090,17 +1177,18 @@
 	unsigned int			reg_width;
 	unsigned int			periods;
 	unsigned int			i;
+	unsigned long			flags;
 
-	spin_lock_bh(&dwc->lock);
+	spin_lock_irqsave(&dwc->lock, flags);
 	if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) {
-		spin_unlock_bh(&dwc->lock);
+		spin_unlock_irqrestore(&dwc->lock, flags);
 		dev_dbg(chan2dev(&dwc->chan),
 				"queue and/or active list are not empty\n");
 		return ERR_PTR(-EBUSY);
 	}
 
 	was_cyclic = test_and_set_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
-	spin_unlock_bh(&dwc->lock);
+	spin_unlock_irqrestore(&dwc->lock, flags);
 	if (was_cyclic) {
 		dev_dbg(chan2dev(&dwc->chan),
 				"channel already prepared for cyclic DMA\n");
@@ -1214,13 +1302,14 @@
 	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
 	struct dw_cyclic_desc	*cdesc = dwc->cdesc;
 	int			i;
+	unsigned long		flags;
 
 	dev_dbg(chan2dev(&dwc->chan), "cyclic free\n");
 
 	if (!cdesc)
 		return;
 
-	spin_lock_bh(&dwc->lock);
+	spin_lock_irqsave(&dwc->lock, flags);
 
 	channel_clear_bit(dw, CH_EN, dwc->mask);
 	while (dma_readl(dw, CH_EN) & dwc->mask)
@@ -1230,7 +1319,7 @@
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
 	dma_writel(dw, CLEAR.XFER, dwc->mask);
 
-	spin_unlock_bh(&dwc->lock);
+	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	for (i = 0; i < cdesc->periods; i++)
 		dwc_desc_put(dwc, cdesc->desc[i]);
@@ -1487,3 +1576,4 @@
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver");
 MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");

diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 720f821..c341951 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h

@@ -2,6 +2,7 @@
  * Driver for the Synopsys DesignWare AHB DMA Controller
  *
  * Copyright (C) 2005-2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -138,6 +139,7 @@
 	void __iomem		*ch_regs;
 	u8			mask;
 	u8			priority;
+	bool			paused;
 
 	spinlock_t		lock;
 

diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 3d4ec38..f653517 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c

@@ -1292,8 +1292,7 @@
 	if (err)
 		goto err_dma;
 
-	pm_runtime_set_active(&pdev->dev);
-	pm_runtime_enable(&pdev->dev);
+	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_allow(&pdev->dev);
 	return 0;
 
@@ -1322,6 +1321,9 @@
 static void __devexit intel_mid_dma_remove(struct pci_dev *pdev)
 {
 	struct middma_device *device = pci_get_drvdata(pdev);
+
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_forbid(&pdev->dev);
 	middma_shutdown(pdev);
 	pci_dev_put(pdev);
 	kfree(device);
@@ -1385,13 +1387,20 @@
 static int dma_runtime_suspend(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	return dma_suspend(pci_dev, PMSG_SUSPEND);
+	struct middma_device *device = pci_get_drvdata(pci_dev);
+
+	device->state = SUSPENDED;
+	return 0;
 }
 
 static int dma_runtime_resume(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	return dma_resume(pci_dev);
+	struct middma_device *device = pci_get_drvdata(pci_dev);
+
+	device->state = RUNNING;
+	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
+	return 0;
 }
 
 static int dma_runtime_idle(struct device *dev)

diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index f4a51d4..5d65f83 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c

@@ -508,6 +508,7 @@
 	struct ioat_ring_ent **ring;
 	u64 status;
 	int order;
+	int i = 0;
 
 	/* have we already been set up? */
 	if (ioat->ring)
@@ -548,8 +549,11 @@
 	ioat2_start_null_desc(ioat);
 
 	/* check that we got off the ground */
-	udelay(5);
-	status = ioat_chansts(chan);
+	do {
+		udelay(1);
+		status = ioat_chansts(chan);
+	} while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status));
+
 	if (is_ioat_active(status) || is_ioat_idle(status)) {
 		set_bit(IOAT_RUN, &chan->state);
 		return 1 << ioat->alloc_order;

diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index c6b01f5..e03f811 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c

@@ -619,7 +619,7 @@
 
 	if (unlikely(!len))
 		return NULL;
-	BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
+	BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT);
 
 	dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
 		__func__, len);
@@ -652,7 +652,7 @@
 
 	if (unlikely(!len))
 		return NULL;
-	BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
+	BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT);
 
 	dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
 		__func__, len);
@@ -686,7 +686,7 @@
 
 	if (unlikely(!len))
 		return NULL;
-	BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
+	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
 
 	dev_dbg(iop_chan->device->common.dev,
 		"%s src_cnt: %d len: %u flags: %lx\n",

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index a25f5f6..954e334 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c

@@ -671,7 +671,7 @@
 	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
 		return NULL;
 
-	BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
 
 	spin_lock_bh(&mv_chan->lock);
 	slot_cnt = mv_chan_memcpy_slot_count(len);
@@ -710,7 +710,7 @@
 	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
 		return NULL;
 
-	BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
 
 	spin_lock_bh(&mv_chan->lock);
 	slot_cnt = mv_chan_memset_slot_count(len);
@@ -744,7 +744,7 @@
 	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
 		return NULL;
 
-	BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
 
 	dev_dbg(mv_chan->device->common.dev,
 		"%s src_cnt: %d len: dest %x %u flags: %ld\n",

diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 8d8fef1..ff5b38f 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c

@@ -77,10 +77,10 @@
 	u32	dma_ctl0;
 	u32	dma_ctl1;
 	u32	dma_ctl2;
-	u32	reserved1;
+	u32	dma_ctl3;
 	u32	dma_sts0;
 	u32	dma_sts1;
-	u32	reserved2;
+	u32	dma_sts2;
 	u32	reserved3;
 	struct pch_dma_desc_regs desc[MAX_CHAN_NR];
 };
@@ -130,6 +130,7 @@
 #define PCH_DMA_CTL0	0x00
 #define PCH_DMA_CTL1	0x04
 #define PCH_DMA_CTL2	0x08
+#define PCH_DMA_CTL3	0x0C
 #define PCH_DMA_STS0	0x10
 #define PCH_DMA_STS1	0x14
 
@@ -138,7 +139,8 @@
 #define dma_writel(pd, name, val) \
 	writel((val), (pd)->membase + PCH_DMA_##name)
 
-static inline struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd)
+static inline
+struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd)
 {
 	return container_of(txd, struct pch_dma_desc, txd);
 }
@@ -163,13 +165,15 @@
 	return chan->dev->device.parent;
 }
 
-static inline struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan)
+static inline
+struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan)
 {
 	return list_first_entry(&pd_chan->active_list,
 				struct pch_dma_desc, desc_node);
 }
 
-static inline struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan)
+static inline
+struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan)
 {
 	return list_first_entry(&pd_chan->queue,
 				struct pch_dma_desc, desc_node);
@@ -199,16 +203,30 @@
 	struct pch_dma *pd = to_pd(chan->device);
 	u32 val;
 
-	val = dma_readl(pd, CTL0);
+	if (chan->chan_id < 8) {
+		val = dma_readl(pd, CTL0);
 
-	if (pd_chan->dir == DMA_TO_DEVICE)
-		val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
-			       DMA_CTL0_DIR_SHIFT_BITS);
-	else
-		val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
-				 DMA_CTL0_DIR_SHIFT_BITS));
+		if (pd_chan->dir == DMA_TO_DEVICE)
+			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+				       DMA_CTL0_DIR_SHIFT_BITS);
+		else
+			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+					 DMA_CTL0_DIR_SHIFT_BITS));
 
-	dma_writel(pd, CTL0, val);
+		dma_writel(pd, CTL0, val);
+	} else {
+		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
+		val = dma_readl(pd, CTL3);
+
+		if (pd_chan->dir == DMA_TO_DEVICE)
+			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
+				       DMA_CTL0_DIR_SHIFT_BITS);
+		else
+			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch +
+					 DMA_CTL0_DIR_SHIFT_BITS));
+
+		dma_writel(pd, CTL3, val);
+	}
 
 	dev_dbg(chan2dev(chan), "pdc_set_dir: chan %d -> %x\n",
 		chan->chan_id, val);
@@ -219,13 +237,26 @@
 	struct pch_dma *pd = to_pd(chan->device);
 	u32 val;
 
-	val = dma_readl(pd, CTL0);
+	if (chan->chan_id < 8) {
+		val = dma_readl(pd, CTL0);
 
-	val &= ~(DMA_CTL0_MODE_MASK_BITS <<
-		(DMA_CTL0_BITS_PER_CH * chan->chan_id));
-	val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
+		val &= ~(DMA_CTL0_MODE_MASK_BITS <<
+			(DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
 
-	dma_writel(pd, CTL0, val);
+		dma_writel(pd, CTL0, val);
+	} else {
+		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
+
+		val = dma_readl(pd, CTL3);
+
+		val &= ~(DMA_CTL0_MODE_MASK_BITS <<
+			(DMA_CTL0_BITS_PER_CH * ch));
+		val |= mode << (DMA_CTL0_BITS_PER_CH * ch);
+
+		dma_writel(pd, CTL3, val);
+
+	}
 
 	dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n",
 		chan->chan_id, val);
@@ -251,9 +282,6 @@
 
 static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc)
 {
-	struct pch_dma *pd = to_pd(pd_chan->chan.device);
-	u32 val;
-
 	if (!pdc_is_idle(pd_chan)) {
 		dev_err(chan2dev(&pd_chan->chan),
 			"BUG: Attempt to start non-idle channel\n");
@@ -279,10 +307,6 @@
 		channel_writel(pd_chan, NEXT, desc->txd.phys);
 		pdc_set_mode(&pd_chan->chan, DMA_CTL0_SG);
 	}
-
-	val = dma_readl(pd, CTL2);
-	val |= 1 << (DMA_CTL2_START_SHIFT_BITS + pd_chan->chan.chan_id);
-	dma_writel(pd, CTL2, val);
 }
 
 static void pdc_chain_complete(struct pch_dma_chan *pd_chan,
@@ -403,7 +427,7 @@
 {
 	struct pch_dma_desc *desc, *_d;
 	struct pch_dma_desc *ret = NULL;
-	int i;
+	int i = 0;
 
 	spin_lock(&pd_chan->lock);
 	list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) {
@@ -478,7 +502,6 @@
 	spin_unlock_bh(&pd_chan->lock);
 
 	pdc_enable_irq(chan, 1);
-	pdc_set_dir(chan);
 
 	return pd_chan->descs_allocated;
 }
@@ -561,6 +584,9 @@
 	else
 		return NULL;
 
+	pd_chan->dir = direction;
+	pdc_set_dir(chan);
+
 	for_each_sg(sgl, sg, sg_len, i) {
 		desc = pdc_desc_get(pd_chan);
 
@@ -703,6 +729,7 @@
 	pd->regs.dma_ctl0 = dma_readl(pd, CTL0);
 	pd->regs.dma_ctl1 = dma_readl(pd, CTL1);
 	pd->regs.dma_ctl2 = dma_readl(pd, CTL2);
+	pd->regs.dma_ctl3 = dma_readl(pd, CTL3);
 
 	list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
 		pd_chan = to_pd_chan(chan);
@@ -725,6 +752,7 @@
 	dma_writel(pd, CTL0, pd->regs.dma_ctl0);
 	dma_writel(pd, CTL1, pd->regs.dma_ctl1);
 	dma_writel(pd, CTL2, pd->regs.dma_ctl2);
+	dma_writel(pd, CTL3, pd->regs.dma_ctl3);
 
 	list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
 		pd_chan = to_pd_chan(chan);
@@ -850,8 +878,6 @@
 
 		pd_chan->membase = &regs->desc[i];
 
-		pd_chan->dir = (i % 2) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-
 		spin_lock_init(&pd_chan->lock);
 
 		INIT_LIST_HEAD(&pd_chan->active_list);
@@ -929,13 +955,23 @@
 #define PCI_DEVICE_ID_ML7213_DMA1_8CH	0x8026
 #define PCI_DEVICE_ID_ML7213_DMA2_8CH	0x802B
 #define PCI_DEVICE_ID_ML7213_DMA3_4CH	0x8034
+#define PCI_DEVICE_ID_ML7213_DMA4_12CH	0x8032
+#define PCI_DEVICE_ID_ML7223_DMA1_4CH	0x800B
+#define PCI_DEVICE_ID_ML7223_DMA2_4CH	0x800E
+#define PCI_DEVICE_ID_ML7223_DMA3_4CH	0x8017
+#define PCI_DEVICE_ID_ML7223_DMA4_4CH	0x803B
 
-static const struct pci_device_id pch_dma_id_table[] = {
+DEFINE_PCI_DEVICE_TABLE(pch_dma_id_table) = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 },
 	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */
 	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */
 	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA4_12CH), 12}, /* I2S */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA1_4CH), 4}, /* UART */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA2_4CH), 4}, /* Video SPI */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA3_4CH), 4}, /* Security */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA4_4CH), 4}, /* FPGA */
 	{ 0, },
 };
 

diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index 3b0247e..fc457a7 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c

@@ -2313,7 +2313,7 @@
 	if (unlikely(!len))
 		return NULL;
 
-	BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT));
+	BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT);
 
 	spin_lock_bh(&ppc440spe_chan->lock);
 
@@ -2354,7 +2354,7 @@
 	if (unlikely(!len))
 		return NULL;
 
-	BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT));
+	BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT);
 
 	spin_lock_bh(&ppc440spe_chan->lock);
 
@@ -2397,7 +2397,7 @@
 				     dma_dest, dma_src, src_cnt));
 	if (unlikely(!len))
 		return NULL;
-	BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT));
+	BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
 
 	dev_dbg(ppc440spe_chan->device->common.dev,
 		"ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
@@ -2887,7 +2887,7 @@
 	ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id,
 				    dst, src, src_cnt));
 	BUG_ON(!len);
-	BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT));
+	BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
 	BUG_ON(!src_cnt);
 
 	if (src_cnt == 1 && dst[1] == src[0]) {

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 94ee15d..8f222d4 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c

@@ -1829,7 +1829,7 @@
 {
 	struct stedma40_platform_data *plat = chan->base->plat_data;
 	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
-	dma_addr_t addr;
+	dma_addr_t addr = 0;
 
 	if (chan->runtime_addr)
 		return chan->runtime_addr;
@@ -2962,4 +2962,4 @@
 {
 	return platform_driver_probe(&d40_driver, d40_probe);
 }
-arch_initcall(stedma40_init);
+subsys_initcall(stedma40_init);

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index d213646..4a7f631 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig

@@ -86,6 +86,34 @@
 	help
 	  Say yes here to support GPIO functionality of IT8761E super I/O chip.
 
+config GPIO_EXYNOS4
+	bool "Samsung Exynos4 GPIO library support"
+	default y if CPU_EXYNOS4210
+	depends on ARM
+	help
+	  Say yes here to support Samsung Exynos4 series SoCs GPIO library
+
+config GPIO_PLAT_SAMSUNG
+	bool "Samsung SoCs GPIO library support"
+	default y if SAMSUNG_GPIOLIB_4BIT
+	depends on ARM
+	help
+	  Say yes here to support Samsung SoCs GPIO library
+
+config GPIO_S5PC100
+	bool "Samsung S5PC100 GPIO library support"
+	default y if CPU_S5PC100
+	depends on ARM
+	help
+	  Say yes here to support Samsung S5PC100 SoCs GPIO library
+
+config GPIO_S5PV210
+	bool "Samsung S5PV210/S5PC110 GPIO library support"
+	default y if CPU_S5PV210
+	depends on ARM
+	help
+	  Say yes here to support Samsung S5PV210/S5PC110 SoCs GPIO library
+
 config GPIO_PL061
 	bool "PrimeCell PL061 GPIO support"
 	depends on ARM_AMBA
@@ -303,7 +331,7 @@
 
 config GPIO_CS5535
 	tristate "AMD CS5535/CS5536 GPIO support"
-	depends on PCI && X86 && !CS5535_GPIO
+	depends on PCI && X86 && !CS5535_GPIO && MFD_CS5535
 	help
 	  The AMD CS5535 and CS5536 southbridges support 28 GPIO pins that
 	  can be used for quite a number of things.  The CS5535/6 is found on
@@ -334,13 +362,19 @@
 	  Say Y here to support Intel Langwell/Penwell GPIO.
 
 config GPIO_PCH
-	tristate "PCH GPIO of Intel Topcliff"
+	tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7223 IOH GPIO"
 	depends on PCI && X86
 	help
 	  This driver is for PCH(Platform controller Hub) GPIO of Intel Topcliff
 	  which is an IOH(Input/Output Hub) for x86 embedded processor.
 	  This driver can access PCH GPIO device.
 
+	  This driver also can be used for OKI SEMICONDUCTOR IOH(Input/
+	  Output Hub), ML7223.
+	  ML7223 IOH is for MP(Media Phone) use.
+	  ML7223 is companion chip for Intel Atom E6xx series.
+	  ML7223 is completely compatible for Intel EG20T PCH.
+
 config GPIO_ML_IOH
 	tristate "OKI SEMICONDUCTOR ML7213 IOH GPIO support"
 	depends on PCI

diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 6a3387a..b605f8e 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile

@@ -8,6 +8,10 @@
 obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
 obj-$(CONFIG_GPIO_BASIC_MMIO_CORE)	+= basic_mmio_gpio.o
 obj-$(CONFIG_GPIO_BASIC_MMIO)	+= basic_mmio_gpio.o
+obj-$(CONFIG_GPIO_EXYNOS4)	+= gpio-exynos4.o
+obj-$(CONFIG_GPIO_PLAT_SAMSUNG)	+= gpio-plat-samsung.o
+obj-$(CONFIG_GPIO_S5PC100)	+= gpio-s5pc100.o
+obj-$(CONFIG_GPIO_S5PV210)	+= gpio-s5pv210.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX730X)	+= max730x.o
 obj-$(CONFIG_GPIO_MAX7300)	+= max7300.o
@@ -16,6 +20,7 @@
 obj-$(CONFIG_GPIO_MC33880)	+= mc33880.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_74X164)	+= 74x164.o
+obj-$(CONFIG_ARCH_OMAP)         += gpio-omap.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
 obj-$(CONFIG_GPIO_PCH)		+= pch_gpio.o
@@ -34,6 +39,8 @@
 obj-$(CONFIG_GPIO_WM8350)	+= wm8350-gpiolib.o
 obj-$(CONFIG_GPIO_WM8994)	+= wm8994-gpio.o
 obj-$(CONFIG_GPIO_SCH)		+= sch_gpio.o
+obj-$(CONFIG_MACH_U300)		+= gpio-u300.o
+obj-$(CONFIG_PLAT_NOMADIK)	+= gpio-nomadik.o
 obj-$(CONFIG_GPIO_RDC321X)	+= rdc321x-gpio.o
 obj-$(CONFIG_GPIO_JANZ_TTL)	+= janz-ttl.o
 obj-$(CONFIG_GPIO_SX150X)	+= sx150x.o

diff --git a/arch/arm/mach-exynos4/gpiolib.c b/drivers/gpio/gpio-exynos4.c
similarity index 100%
rename from arch/arm/mach-exynos4/gpiolib.c
rename to drivers/gpio/gpio-exynos4.c


diff --git a/arch/arm/plat-nomadik/gpio.c b/drivers/gpio/gpio-nomadik.c
similarity index 94%
rename from arch/arm/plat-nomadik/gpio.c
rename to drivers/gpio/gpio-nomadik.c
index 307b813..4961ef9 100644
--- a/arch/arm/plat-nomadik/gpio.c
+++ b/drivers/gpio/gpio-nomadik.c

@@ -57,6 +57,7 @@
 	u32 fwimsc;
 	u32 slpm;
 	u32 enabled;
+	u32 pull_up;
 };
 
 static struct nmk_gpio_chip *
@@ -103,16 +104,22 @@
 	u32 pdis;
 
 	pdis = readl(nmk_chip->addr + NMK_GPIO_PDIS);
-	if (pull == NMK_GPIO_PULL_NONE)
+	if (pull == NMK_GPIO_PULL_NONE) {
 		pdis |= bit;
-	else
+		nmk_chip->pull_up &= ~bit;
+	} else {
 		pdis &= ~bit;
+	}
+
 	writel(pdis, nmk_chip->addr + NMK_GPIO_PDIS);
 
-	if (pull == NMK_GPIO_PULL_UP)
+	if (pull == NMK_GPIO_PULL_UP) {
+		nmk_chip->pull_up |= bit;
 		writel(bit, nmk_chip->addr + NMK_GPIO_DATS);
-	else if (pull == NMK_GPIO_PULL_DOWN)
+	} else if (pull == NMK_GPIO_PULL_DOWN) {
+		nmk_chip->pull_up &= ~bit;
 		writel(bit, nmk_chip->addr + NMK_GPIO_DATC);
+	}
 }
 
 static void __nmk_gpio_make_input(struct nmk_gpio_chip *nmk_chip,
@@ -811,20 +818,43 @@
 		bool pull;
 		u32 bit = 1 << i;
 
-		if (!label)
-			continue;
-
 		is_out = readl(nmk_chip->addr + NMK_GPIO_DIR) & bit;
 		pull = !(readl(nmk_chip->addr + NMK_GPIO_PDIS) & bit);
 		mode = nmk_gpio_get_mode(gpio);
 		seq_printf(s, " gpio-%-3d (%-20.20s) %s %s %s %s",
-			gpio, label,
+			gpio, label ?: "(none)",
 			is_out ? "out" : "in ",
 			chip->get
 				? (chip->get(chip, i) ? "hi" : "lo")
 				: "?  ",
 			(mode < 0) ? "unknown" : modes[mode],
 			pull ? "pull" : "none");
+
+		if (label && !is_out) {
+			int		irq = gpio_to_irq(gpio);
+			struct irq_desc	*desc = irq_to_desc(irq);
+
+			/* This races with request_irq(), set_irq_type(),
+			 * and set_irq_wake() ... but those are "rare".
+			 */
+			if (irq >= 0 && desc->action) {
+				char *trigger;
+				u32 bitmask = nmk_gpio_get_bitmask(gpio);
+
+				if (nmk_chip->edge_rising & bitmask)
+					trigger = "edge-rising";
+				else if (nmk_chip->edge_falling & bitmask)
+					trigger = "edge-falling";
+				else
+					trigger = "edge-undefined";
+
+				seq_printf(s, " irq-%d %s%s",
+					irq, trigger,
+					irqd_is_wakeup_set(&desc->irq_data)
+						? " wakeup" : "");
+			}
+		}
+
 		seq_printf(s, "\n");
 	}
 }
@@ -898,6 +928,25 @@
 	}
 }
 
+/*
+ * Read the pull up/pull down status.
+ * A bit set in 'pull_up' means that pull up
+ * is selected if pull is enabled in PDIS register.
+ * Note: only pull up/down set via this driver can
+ * be detected due to HW limitations.
+ */
+void nmk_gpio_read_pull(int gpio_bank, u32 *pull_up)
+{
+	if (gpio_bank < NUM_BANKS) {
+		struct nmk_gpio_chip *chip = nmk_gpio_chips[gpio_bank];
+
+		if (!chip)
+			return;
+
+		*pull_up = chip->pull_up;
+	}
+}
+
 static int __devinit nmk_gpio_probe(struct platform_device *dev)
 {
 	struct nmk_gpio_platform_data *pdata = dev->dev.platform_data;

diff --git a/arch/arm/plat-omap/gpio.c b/drivers/gpio/gpio-omap.c
similarity index 92%
rename from arch/arm/plat-omap/gpio.c
rename to drivers/gpio/gpio-omap.c
index efb8693..6c51191 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/drivers/gpio/gpio-omap.c

@@ -1,6 +1,4 @@
 /*
- *  linux/arch/arm/plat-omap/gpio.c
- *
  * Support functions for OMAP GPIO
  *
  * Copyright (C) 2003-2005 Nokia Corporation
@@ -30,109 +28,6 @@
 #include <mach/gpio.h>
 #include <asm/mach/irq.h>
 
-/*
- * OMAP1510 GPIO registers
- */
-#define OMAP1510_GPIO_DATA_INPUT	0x00
-#define OMAP1510_GPIO_DATA_OUTPUT	0x04
-#define OMAP1510_GPIO_DIR_CONTROL	0x08
-#define OMAP1510_GPIO_INT_CONTROL	0x0c
-#define OMAP1510_GPIO_INT_MASK		0x10
-#define OMAP1510_GPIO_INT_STATUS	0x14
-#define OMAP1510_GPIO_PIN_CONTROL	0x18
-
-#define OMAP1510_IH_GPIO_BASE		64
-
-/*
- * OMAP1610 specific GPIO registers
- */
-#define OMAP1610_GPIO_REVISION		0x0000
-#define OMAP1610_GPIO_SYSCONFIG		0x0010
-#define OMAP1610_GPIO_SYSSTATUS		0x0014
-#define OMAP1610_GPIO_IRQSTATUS1	0x0018
-#define OMAP1610_GPIO_IRQENABLE1	0x001c
-#define OMAP1610_GPIO_WAKEUPENABLE	0x0028
-#define OMAP1610_GPIO_DATAIN		0x002c
-#define OMAP1610_GPIO_DATAOUT		0x0030
-#define OMAP1610_GPIO_DIRECTION		0x0034
-#define OMAP1610_GPIO_EDGE_CTRL1	0x0038
-#define OMAP1610_GPIO_EDGE_CTRL2	0x003c
-#define OMAP1610_GPIO_CLEAR_IRQENABLE1	0x009c
-#define OMAP1610_GPIO_CLEAR_WAKEUPENA	0x00a8
-#define OMAP1610_GPIO_CLEAR_DATAOUT	0x00b0
-#define OMAP1610_GPIO_SET_IRQENABLE1	0x00dc
-#define OMAP1610_GPIO_SET_WAKEUPENA	0x00e8
-#define OMAP1610_GPIO_SET_DATAOUT	0x00f0
-
-/*
- * OMAP7XX specific GPIO registers
- */
-#define OMAP7XX_GPIO_DATA_INPUT		0x00
-#define OMAP7XX_GPIO_DATA_OUTPUT	0x04
-#define OMAP7XX_GPIO_DIR_CONTROL	0x08
-#define OMAP7XX_GPIO_INT_CONTROL	0x0c
-#define OMAP7XX_GPIO_INT_MASK		0x10
-#define OMAP7XX_GPIO_INT_STATUS		0x14
-
-/*
- * omap2+ specific GPIO registers
- */
-#define OMAP24XX_GPIO_REVISION		0x0000
-#define OMAP24XX_GPIO_IRQSTATUS1	0x0018
-#define OMAP24XX_GPIO_IRQSTATUS2	0x0028
-#define OMAP24XX_GPIO_IRQENABLE2	0x002c
-#define OMAP24XX_GPIO_IRQENABLE1	0x001c
-#define OMAP24XX_GPIO_WAKE_EN		0x0020
-#define OMAP24XX_GPIO_CTRL		0x0030
-#define OMAP24XX_GPIO_OE		0x0034
-#define OMAP24XX_GPIO_DATAIN		0x0038
-#define OMAP24XX_GPIO_DATAOUT		0x003c
-#define OMAP24XX_GPIO_LEVELDETECT0	0x0040
-#define OMAP24XX_GPIO_LEVELDETECT1	0x0044
-#define OMAP24XX_GPIO_RISINGDETECT	0x0048
-#define OMAP24XX_GPIO_FALLINGDETECT	0x004c
-#define OMAP24XX_GPIO_DEBOUNCE_EN	0x0050
-#define OMAP24XX_GPIO_DEBOUNCE_VAL	0x0054
-#define OMAP24XX_GPIO_CLEARIRQENABLE1	0x0060
-#define OMAP24XX_GPIO_SETIRQENABLE1	0x0064
-#define OMAP24XX_GPIO_CLEARWKUENA	0x0080
-#define OMAP24XX_GPIO_SETWKUENA		0x0084
-#define OMAP24XX_GPIO_CLEARDATAOUT	0x0090
-#define OMAP24XX_GPIO_SETDATAOUT	0x0094
-
-#define OMAP4_GPIO_REVISION		0x0000
-#define OMAP4_GPIO_EOI			0x0020
-#define OMAP4_GPIO_IRQSTATUSRAW0	0x0024
-#define OMAP4_GPIO_IRQSTATUSRAW1	0x0028
-#define OMAP4_GPIO_IRQSTATUS0		0x002c
-#define OMAP4_GPIO_IRQSTATUS1		0x0030
-#define OMAP4_GPIO_IRQSTATUSSET0	0x0034
-#define OMAP4_GPIO_IRQSTATUSSET1	0x0038
-#define OMAP4_GPIO_IRQSTATUSCLR0	0x003c
-#define OMAP4_GPIO_IRQSTATUSCLR1	0x0040
-#define OMAP4_GPIO_IRQWAKEN0		0x0044
-#define OMAP4_GPIO_IRQWAKEN1		0x0048
-#define OMAP4_GPIO_IRQENABLE1		0x011c
-#define OMAP4_GPIO_WAKE_EN		0x0120
-#define OMAP4_GPIO_IRQSTATUS2		0x0128
-#define OMAP4_GPIO_IRQENABLE2		0x012c
-#define OMAP4_GPIO_CTRL			0x0130
-#define OMAP4_GPIO_OE			0x0134
-#define OMAP4_GPIO_DATAIN		0x0138
-#define OMAP4_GPIO_DATAOUT		0x013c
-#define OMAP4_GPIO_LEVELDETECT0		0x0140
-#define OMAP4_GPIO_LEVELDETECT1		0x0144
-#define OMAP4_GPIO_RISINGDETECT		0x0148
-#define OMAP4_GPIO_FALLINGDETECT	0x014c
-#define OMAP4_GPIO_DEBOUNCENABLE	0x0150
-#define OMAP4_GPIO_DEBOUNCINGTIME	0x0154
-#define OMAP4_GPIO_CLEARIRQENABLE1	0x0160
-#define OMAP4_GPIO_SETIRQENABLE1	0x0164
-#define OMAP4_GPIO_CLEARWKUENA		0x0180
-#define OMAP4_GPIO_SETWKUENA		0x0184
-#define OMAP4_GPIO_CLEARDATAOUT		0x0190
-#define OMAP4_GPIO_SETDATAOUT		0x0194
-
 struct gpio_bank {
 	unsigned long pbase;
 	void __iomem *base;

diff --git a/arch/arm/plat-samsung/gpiolib.c b/drivers/gpio/gpio-plat-samsung.c
similarity index 100%
rename from arch/arm/plat-samsung/gpiolib.c
rename to drivers/gpio/gpio-plat-samsung.c


diff --git a/arch/arm/mach-s5pc100/gpiolib.c b/drivers/gpio/gpio-s5pc100.c
similarity index 100%
rename from arch/arm/mach-s5pc100/gpiolib.c
rename to drivers/gpio/gpio-s5pc100.c


diff --git a/arch/arm/mach-s5pv210/gpiolib.c b/drivers/gpio/gpio-s5pv210.c
similarity index 100%
rename from arch/arm/mach-s5pv210/gpiolib.c
rename to drivers/gpio/gpio-s5pv210.c


diff --git a/arch/arm/mach-u300/gpio.c b/drivers/gpio/gpio-u300.c
similarity index 100%
rename from arch/arm/mach-u300/gpio.c
rename to drivers/gpio/gpio-u300.c


diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 137a8ca..a971e3d 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c

@@ -1296,7 +1296,7 @@
  * @array:	array of the 'struct gpio'
  * @num:	how many GPIOs in the array
  */
-int gpio_request_array(struct gpio *array, size_t num)
+int gpio_request_array(const struct gpio *array, size_t num)
 {
 	int i, err;
 
@@ -1319,7 +1319,7 @@
  * @array:	array of the 'struct gpio'
  * @num:	how many GPIOs in the array
  */
-void gpio_free_array(struct gpio *array, size_t num)
+void gpio_free_array(const struct gpio *array, size_t num)
 {
 	while (num--)
 		gpio_free((array++)->gpio);

diff --git a/drivers/gpio/langwell_gpio.c b/drivers/gpio/langwell_gpio.c
index 1b06f67..bd6571e 100644
--- a/drivers/gpio/langwell_gpio.c
+++ b/drivers/gpio/langwell_gpio.c

@@ -33,6 +33,7 @@
 #include <linux/io.h>
 #include <linux/gpio.h>
 #include <linux/slab.h>
+#include <linux/pm_runtime.h>
 
 /*
  * Langwell chip has 64 pins and thus there are 2 32bit registers to control
@@ -63,6 +64,7 @@
 	void				*reg_base;
 	spinlock_t			lock;
 	unsigned			irq_base;
+	struct pci_dev			*pdev;
 };
 
 static void __iomem *gpio_reg(struct gpio_chip *chip, unsigned offset,
@@ -104,11 +106,18 @@
 	u32 value;
 	unsigned long flags;
 
+	if (lnw->pdev)
+		pm_runtime_get(&lnw->pdev->dev);
+
 	spin_lock_irqsave(&lnw->lock, flags);
 	value = readl(gpdr);
 	value &= ~BIT(offset % 32);
 	writel(value, gpdr);
 	spin_unlock_irqrestore(&lnw->lock, flags);
+
+	if (lnw->pdev)
+		pm_runtime_put(&lnw->pdev->dev);
+
 	return 0;
 }
 
@@ -120,11 +129,19 @@
 	unsigned long flags;
 
 	lnw_gpio_set(chip, offset, value);
+
+	if (lnw->pdev)
+		pm_runtime_get(&lnw->pdev->dev);
+
 	spin_lock_irqsave(&lnw->lock, flags);
 	value = readl(gpdr);
 	value |= BIT(offset % 32);
 	writel(value, gpdr);
 	spin_unlock_irqrestore(&lnw->lock, flags);
+
+	if (lnw->pdev)
+		pm_runtime_put(&lnw->pdev->dev);
+
 	return 0;
 }
 
@@ -145,6 +162,10 @@
 
 	if (gpio >= lnw->chip.ngpio)
 		return -EINVAL;
+
+	if (lnw->pdev)
+		pm_runtime_get(&lnw->pdev->dev);
+
 	spin_lock_irqsave(&lnw->lock, flags);
 	if (type & IRQ_TYPE_EDGE_RISING)
 		value = readl(grer) | BIT(gpio % 32);
@@ -159,6 +180,9 @@
 	writel(value, gfer);
 	spin_unlock_irqrestore(&lnw->lock, flags);
 
+	if (lnw->pdev)
+		pm_runtime_put(&lnw->pdev->dev);
+
 	return 0;
 }
 
@@ -211,6 +235,39 @@
 	chip->irq_eoi(data);
 }
 
+#ifdef CONFIG_PM
+static int lnw_gpio_runtime_resume(struct device *dev)
+{
+	return 0;
+}
+
+static int lnw_gpio_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int lnw_gpio_runtime_idle(struct device *dev)
+{
+	int err = pm_schedule_suspend(dev, 500);
+
+	if (!err)
+		return 0;
+
+	return -EBUSY;
+}
+
+#else
+#define lnw_gpio_runtime_suspend	NULL
+#define lnw_gpio_runtime_resume		NULL
+#define lnw_gpio_runtime_idle		NULL
+#endif
+
+static const struct dev_pm_ops lnw_gpio_pm_ops = {
+	.runtime_suspend = lnw_gpio_runtime_suspend,
+	.runtime_resume = lnw_gpio_runtime_resume,
+	.runtime_idle = lnw_gpio_runtime_idle,
+};
+
 static int __devinit lnw_gpio_probe(struct pci_dev *pdev,
 			const struct pci_device_id *id)
 {
@@ -270,6 +327,7 @@
 	lnw->chip.base = gpio_base;
 	lnw->chip.ngpio = id->driver_data;
 	lnw->chip.can_sleep = 0;
+	lnw->pdev = pdev;
 	pci_set_drvdata(pdev, lnw);
 	retval = gpiochip_add(&lnw->chip);
 	if (retval) {
@@ -285,6 +343,10 @@
 	}
 
 	spin_lock_init(&lnw->lock);
+
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_allow(&pdev->dev);
+
 	goto done;
 err5:
 	kfree(lnw);
@@ -302,6 +364,9 @@
 	.name		= "langwell_gpio",
 	.id_table	= lnw_gpio_ids,
 	.probe		= lnw_gpio_probe,
+	.driver		= {
+		.pm	= &lnw_gpio_pm_ops,
+	},
 };
 
 

diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 78a8439..0451d7a 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c

@@ -24,33 +24,46 @@
 #include <linux/of_gpio.h>
 #endif
 
-#define PCA953X_INPUT          0
-#define PCA953X_OUTPUT         1
-#define PCA953X_INVERT         2
-#define PCA953X_DIRECTION      3
+#define PCA953X_INPUT		0
+#define PCA953X_OUTPUT		1
+#define PCA953X_INVERT		2
+#define PCA953X_DIRECTION	3
 
-#define PCA953X_GPIOS	       0x00FF
-#define PCA953X_INT	       0x0100
+#define PCA957X_IN		0
+#define PCA957X_INVRT		1
+#define PCA957X_BKEN		2
+#define PCA957X_PUPD		3
+#define PCA957X_CFG		4
+#define PCA957X_OUT		5
+#define PCA957X_MSK		6
+#define PCA957X_INTS		7
+
+#define PCA_GPIO_MASK		0x00FF
+#define PCA_INT			0x0100
+#define PCA953X_TYPE		0x1000
+#define PCA957X_TYPE		0x2000
 
 static const struct i2c_device_id pca953x_id[] = {
-	{ "pca9534", 8  | PCA953X_INT, },
-	{ "pca9535", 16 | PCA953X_INT, },
-	{ "pca9536", 4, },
-	{ "pca9537", 4  | PCA953X_INT, },
-	{ "pca9538", 8  | PCA953X_INT, },
-	{ "pca9539", 16 | PCA953X_INT, },
-	{ "pca9554", 8  | PCA953X_INT, },
-	{ "pca9555", 16 | PCA953X_INT, },
-	{ "pca9556", 8, },
-	{ "pca9557", 8, },
+	{ "pca9534", 8  | PCA953X_TYPE | PCA_INT, },
+	{ "pca9535", 16 | PCA953X_TYPE | PCA_INT, },
+	{ "pca9536", 4  | PCA953X_TYPE, },
+	{ "pca9537", 4  | PCA953X_TYPE | PCA_INT, },
+	{ "pca9538", 8  | PCA953X_TYPE | PCA_INT, },
+	{ "pca9539", 16 | PCA953X_TYPE | PCA_INT, },
+	{ "pca9554", 8  | PCA953X_TYPE | PCA_INT, },
+	{ "pca9555", 16 | PCA953X_TYPE | PCA_INT, },
+	{ "pca9556", 8  | PCA953X_TYPE, },
+	{ "pca9557", 8  | PCA953X_TYPE, },
+	{ "pca9574", 8  | PCA957X_TYPE | PCA_INT, },
+	{ "pca9575", 16 | PCA957X_TYPE | PCA_INT, },
 
-	{ "max7310", 8, },
-	{ "max7312", 16 | PCA953X_INT, },
-	{ "max7313", 16 | PCA953X_INT, },
-	{ "max7315", 8  | PCA953X_INT, },
-	{ "pca6107", 8  | PCA953X_INT, },
-	{ "tca6408", 8  | PCA953X_INT, },
-	{ "tca6416", 16 | PCA953X_INT, },
+	{ "max7310", 8  | PCA953X_TYPE, },
+	{ "max7312", 16 | PCA953X_TYPE | PCA_INT, },
+	{ "max7313", 16 | PCA953X_TYPE | PCA_INT, },
+	{ "max7315", 8  | PCA953X_TYPE | PCA_INT, },
+	{ "pca6107", 8  | PCA953X_TYPE | PCA_INT, },
+	{ "tca6408", 8  | PCA953X_TYPE | PCA_INT, },
+	{ "tca6416", 16 | PCA953X_TYPE | PCA_INT, },
 	/* NYET:  { "tca6424", 24, }, */
 	{ }
 };
@@ -75,16 +88,32 @@
 	struct pca953x_platform_data *dyn_pdata;
 	struct gpio_chip gpio_chip;
 	const char *const *names;
+	int	chip_type;
 };
 
 static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val)
 {
-	int ret;
+	int ret = 0;
 
 	if (chip->gpio_chip.ngpio <= 8)
 		ret = i2c_smbus_write_byte_data(chip->client, reg, val);
-	else
-		ret = i2c_smbus_write_word_data(chip->client, reg << 1, val);
+	else {
+		switch (chip->chip_type) {
+		case PCA953X_TYPE:
+			ret = i2c_smbus_write_word_data(chip->client,
+							reg << 1, val);
+			break;
+		case PCA957X_TYPE:
+			ret = i2c_smbus_write_byte_data(chip->client, reg << 1,
+							val & 0xff);
+			if (ret < 0)
+				break;
+			ret = i2c_smbus_write_byte_data(chip->client,
+							(reg << 1) + 1,
+							(val & 0xff00) >> 8);
+			break;
+		}
+	}
 
 	if (ret < 0) {
 		dev_err(&chip->client->dev, "failed writing register\n");
@@ -116,13 +145,22 @@
 {
 	struct pca953x_chip *chip;
 	uint16_t reg_val;
-	int ret;
+	int ret, offset = 0;
 
 	chip = container_of(gc, struct pca953x_chip, gpio_chip);
 
 	mutex_lock(&chip->i2c_lock);
 	reg_val = chip->reg_direction | (1u << off);
-	ret = pca953x_write_reg(chip, PCA953X_DIRECTION, reg_val);
+
+	switch (chip->chip_type) {
+	case PCA953X_TYPE:
+		offset = PCA953X_DIRECTION;
+		break;
+	case PCA957X_TYPE:
+		offset = PCA957X_CFG;
+		break;
+	}
+	ret = pca953x_write_reg(chip, offset, reg_val);
 	if (ret)
 		goto exit;
 
@@ -138,7 +176,7 @@
 {
 	struct pca953x_chip *chip;
 	uint16_t reg_val;
-	int ret;
+	int ret, offset = 0;
 
 	chip = container_of(gc, struct pca953x_chip, gpio_chip);
 
@@ -149,7 +187,15 @@
 	else
 		reg_val = chip->reg_output & ~(1u << off);
 
-	ret = pca953x_write_reg(chip, PCA953X_OUTPUT, reg_val);
+	switch (chip->chip_type) {
+	case PCA953X_TYPE:
+		offset = PCA953X_OUTPUT;
+		break;
+	case PCA957X_TYPE:
+		offset = PCA957X_OUT;
+		break;
+	}
+	ret = pca953x_write_reg(chip, offset, reg_val);
 	if (ret)
 		goto exit;
 
@@ -157,7 +203,15 @@
 
 	/* then direction */
 	reg_val = chip->reg_direction & ~(1u << off);
-	ret = pca953x_write_reg(chip, PCA953X_DIRECTION, reg_val);
+	switch (chip->chip_type) {
+	case PCA953X_TYPE:
+		offset = PCA953X_DIRECTION;
+		break;
+	case PCA957X_TYPE:
+		offset = PCA957X_CFG;
+		break;
+	}
+	ret = pca953x_write_reg(chip, offset, reg_val);
 	if (ret)
 		goto exit;
 
@@ -172,12 +226,20 @@
 {
 	struct pca953x_chip *chip;
 	uint16_t reg_val;
-	int ret;
+	int ret, offset = 0;
 
 	chip = container_of(gc, struct pca953x_chip, gpio_chip);
 
 	mutex_lock(&chip->i2c_lock);
-	ret = pca953x_read_reg(chip, PCA953X_INPUT, &reg_val);
+	switch (chip->chip_type) {
+	case PCA953X_TYPE:
+		offset = PCA953X_INPUT;
+		break;
+	case PCA957X_TYPE:
+		offset = PCA957X_IN;
+		break;
+	}
+	ret = pca953x_read_reg(chip, offset, &reg_val);
 	mutex_unlock(&chip->i2c_lock);
 	if (ret < 0) {
 		/* NOTE:  diagnostic already emitted; that's all we should
@@ -194,7 +256,7 @@
 {
 	struct pca953x_chip *chip;
 	uint16_t reg_val;
-	int ret;
+	int ret, offset = 0;
 
 	chip = container_of(gc, struct pca953x_chip, gpio_chip);
 
@@ -204,7 +266,15 @@
 	else
 		reg_val = chip->reg_output & ~(1u << off);
 
-	ret = pca953x_write_reg(chip, PCA953X_OUTPUT, reg_val);
+	switch (chip->chip_type) {
+	case PCA953X_TYPE:
+		offset = PCA953X_OUTPUT;
+		break;
+	case PCA957X_TYPE:
+		offset = PCA957X_OUT;
+		break;
+	}
+	ret = pca953x_write_reg(chip, offset, reg_val);
 	if (ret)
 		goto exit;
 
@@ -322,9 +392,17 @@
 	uint16_t old_stat;
 	uint16_t pending;
 	uint16_t trigger;
-	int ret;
+	int ret, offset = 0;
 
-	ret = pca953x_read_reg(chip, PCA953X_INPUT, &cur_stat);
+	switch (chip->chip_type) {
+	case PCA953X_TYPE:
+		offset = PCA953X_INPUT;
+		break;
+	case PCA957X_TYPE:
+		offset = PCA957X_IN;
+		break;
+	}
+	ret = pca953x_read_reg(chip, offset, &cur_stat);
 	if (ret)
 		return 0;
 
@@ -372,14 +450,21 @@
 {
 	struct i2c_client *client = chip->client;
 	struct pca953x_platform_data *pdata = client->dev.platform_data;
-	int ret;
+	int ret, offset = 0;
 
 	if (pdata->irq_base != -1
-			&& (id->driver_data & PCA953X_INT)) {
+			&& (id->driver_data & PCA_INT)) {
 		int lvl;
 
-		ret = pca953x_read_reg(chip, PCA953X_INPUT,
-				       &chip->irq_stat);
+		switch (chip->chip_type) {
+		case PCA953X_TYPE:
+			offset = PCA953X_INPUT;
+			break;
+		case PCA957X_TYPE:
+			offset = PCA957X_IN;
+			break;
+		}
+		ret = pca953x_read_reg(chip, offset, &chip->irq_stat);
 		if (ret)
 			goto out_failed;
 
@@ -439,7 +524,7 @@
 	struct i2c_client *client = chip->client;
 	struct pca953x_platform_data *pdata = client->dev.platform_data;
 
-	if (pdata->irq_base != -1 && (id->driver_data & PCA953X_INT))
+	if (pdata->irq_base != -1 && (id->driver_data & PCA_INT))
 		dev_warn(&client->dev, "interrupt support not compiled in\n");
 
 	return 0;
@@ -499,12 +584,65 @@
 }
 #endif
 
+static int __devinit device_pca953x_init(struct pca953x_chip *chip, int invert)
+{
+	int ret;
+
+	ret = pca953x_read_reg(chip, PCA953X_OUTPUT, &chip->reg_output);
+	if (ret)
+		goto out;
+
+	ret = pca953x_read_reg(chip, PCA953X_DIRECTION,
+			       &chip->reg_direction);
+	if (ret)
+		goto out;
+
+	/* set platform specific polarity inversion */
+	ret = pca953x_write_reg(chip, PCA953X_INVERT, invert);
+	if (ret)
+		goto out;
+	return 0;
+out:
+	return ret;
+}
+
+static int __devinit device_pca957x_init(struct pca953x_chip *chip, int invert)
+{
+	int ret;
+	uint16_t val = 0;
+
+	/* Let every port in proper state, that could save power */
+	pca953x_write_reg(chip, PCA957X_PUPD, 0x0);
+	pca953x_write_reg(chip, PCA957X_CFG, 0xffff);
+	pca953x_write_reg(chip, PCA957X_OUT, 0x0);
+
+	ret = pca953x_read_reg(chip, PCA957X_IN, &val);
+	if (ret)
+		goto out;
+	ret = pca953x_read_reg(chip, PCA957X_OUT, &chip->reg_output);
+	if (ret)
+		goto out;
+	ret = pca953x_read_reg(chip, PCA957X_CFG, &chip->reg_direction);
+	if (ret)
+		goto out;
+
+	/* set platform specific polarity inversion */
+	pca953x_write_reg(chip, PCA957X_INVRT, invert);
+
+	/* To enable register 6, 7 to controll pull up and pull down */
+	pca953x_write_reg(chip, PCA957X_BKEN, 0x202);
+
+	return 0;
+out:
+	return ret;
+}
+
 static int __devinit pca953x_probe(struct i2c_client *client,
 				   const struct i2c_device_id *id)
 {
 	struct pca953x_platform_data *pdata;
 	struct pca953x_chip *chip;
-	int ret;
+	int ret = 0;
 
 	chip = kzalloc(sizeof(struct pca953x_chip), GFP_KERNEL);
 	if (chip == NULL)
@@ -531,25 +669,20 @@
 	chip->gpio_start = pdata->gpio_base;
 
 	chip->names = pdata->names;
+	chip->chip_type = id->driver_data & (PCA953X_TYPE | PCA957X_TYPE);
 
 	mutex_init(&chip->i2c_lock);
 
 	/* initialize cached registers from their original values.
 	 * we can't share this chip with another i2c master.
 	 */
-	pca953x_setup_gpio(chip, id->driver_data & PCA953X_GPIOS);
+	pca953x_setup_gpio(chip, id->driver_data & PCA_GPIO_MASK);
 
-	ret = pca953x_read_reg(chip, PCA953X_OUTPUT, &chip->reg_output);
-	if (ret)
-		goto out_failed;
-
-	ret = pca953x_read_reg(chip, PCA953X_DIRECTION, &chip->reg_direction);
-	if (ret)
-		goto out_failed;
-
-	/* set platform specific polarity inversion */
-	ret = pca953x_write_reg(chip, PCA953X_INVERT, pdata->invert);
-	if (ret)
+	if (chip->chip_type == PCA953X_TYPE)
+		device_pca953x_init(chip, pdata->invert);
+	else if (chip->chip_type == PCA957X_TYPE)
+		device_pca957x_init(chip, pdata->invert);
+	else
 		goto out_failed;
 
 	ret = pca953x_irq_setup(chip, id);

diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c
index f970a5f..36919e7 100644
--- a/drivers/gpio/pch_gpio.c
+++ b/drivers/gpio/pch_gpio.c

@@ -283,8 +283,10 @@
 #define pch_gpio_resume NULL
 #endif
 
+#define PCI_VENDOR_ID_ROHM             0x10DB
 static DEFINE_PCI_DEVICE_TABLE(pch_gpio_pcidev_id) = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x8014) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, pch_gpio_pcidev_id);

diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 76a5af0..2067288 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c

@@ -19,6 +19,8 @@
 #define DM_MSG_PREFIX "io"
 
 #define DM_IO_MAX_REGIONS	BITS_PER_LONG
+#define MIN_IOS		16
+#define MIN_BIOS	16
 
 struct dm_io_client {
 	mempool_t *pool;
@@ -41,33 +43,21 @@
 static struct kmem_cache *_dm_io_cache;
 
 /*
- * io contexts are only dynamically allocated for asynchronous
- * io.  Since async io is likely to be the majority of io we'll
- * have the same number of io contexts as bios! (FIXME: must reduce this).
- */
-
-static unsigned int pages_to_ios(unsigned int pages)
-{
-	return 4 * pages;	/* too many ? */
-}
-
-/*
  * Create a client with mempool and bioset.
  */
-struct dm_io_client *dm_io_client_create(unsigned num_pages)
+struct dm_io_client *dm_io_client_create(void)
 {
-	unsigned ios = pages_to_ios(num_pages);
 	struct dm_io_client *client;
 
 	client = kmalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
 		return ERR_PTR(-ENOMEM);
 
-	client->pool = mempool_create_slab_pool(ios, _dm_io_cache);
+	client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache);
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(16, 0);
+	client->bios = bioset_create(MIN_BIOS, 0);
 	if (!client->bios)
 		goto bad;
 
@@ -81,13 +71,6 @@
 }
 EXPORT_SYMBOL(dm_io_client_create);
 
-int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client)
-{
-	return mempool_resize(client->pool, pages_to_ios(num_pages),
-			      GFP_KERNEL);
-}
-EXPORT_SYMBOL(dm_io_client_resize);
-
 void dm_io_client_destroy(struct dm_io_client *client)
 {
 	mempool_destroy(client->pool);

diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 1bb73a1..819e37e 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c

@@ -27,15 +27,19 @@
 
 #include "dm.h"
 
+#define SUB_JOB_SIZE	128
+#define SPLIT_COUNT	8
+#define MIN_JOBS	8
+#define RESERVE_PAGES	(DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE))
+
 /*-----------------------------------------------------------------
  * Each kcopyd client has its own little pool of preallocated
  * pages for kcopyd io.
  *---------------------------------------------------------------*/
 struct dm_kcopyd_client {
-	spinlock_t lock;
 	struct page_list *pages;
-	unsigned int nr_pages;
-	unsigned int nr_free_pages;
+	unsigned nr_reserved_pages;
+	unsigned nr_free_pages;
 
 	struct dm_io_client *io_client;
 
@@ -67,15 +71,18 @@
 	queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
 }
 
-static struct page_list *alloc_pl(void)
+/*
+ * Obtain one page for the use of kcopyd.
+ */
+static struct page_list *alloc_pl(gfp_t gfp)
 {
 	struct page_list *pl;
 
-	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
+	pl = kmalloc(sizeof(*pl), gfp);
 	if (!pl)
 		return NULL;
 
-	pl->page = alloc_page(GFP_KERNEL);
+	pl->page = alloc_page(gfp);
 	if (!pl->page) {
 		kfree(pl);
 		return NULL;
@@ -90,41 +97,56 @@
 	kfree(pl);
 }
 
+/*
+ * Add the provided pages to a client's free page list, releasing
+ * back to the system any beyond the reserved_pages limit.
+ */
+static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl)
+{
+	struct page_list *next;
+
+	do {
+		next = pl->next;
+
+		if (kc->nr_free_pages >= kc->nr_reserved_pages)
+			free_pl(pl);
+		else {
+			pl->next = kc->pages;
+			kc->pages = pl;
+			kc->nr_free_pages++;
+		}
+
+		pl = next;
+	} while (pl);
+}
+
 static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
 			    unsigned int nr, struct page_list **pages)
 {
 	struct page_list *pl;
 
-	spin_lock(&kc->lock);
-	if (kc->nr_free_pages < nr) {
-		spin_unlock(&kc->lock);
-		return -ENOMEM;
-	}
+	*pages = NULL;
 
-	kc->nr_free_pages -= nr;
-	for (*pages = pl = kc->pages; --nr; pl = pl->next)
-		;
-
-	kc->pages = pl->next;
-	pl->next = NULL;
-
-	spin_unlock(&kc->lock);
+	do {
+		pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY);
+		if (unlikely(!pl)) {
+			/* Use reserved pages */
+			pl = kc->pages;
+			if (unlikely(!pl))
+				goto out_of_memory;
+			kc->pages = pl->next;
+			kc->nr_free_pages--;
+		}
+		pl->next = *pages;
+		*pages = pl;
+	} while (--nr);
 
 	return 0;
-}
 
-static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl)
-{
-	struct page_list *cursor;
-
-	spin_lock(&kc->lock);
-	for (cursor = pl; cursor->next; cursor = cursor->next)
-		kc->nr_free_pages++;
-
-	kc->nr_free_pages++;
-	cursor->next = kc->pages;
-	kc->pages = pl;
-	spin_unlock(&kc->lock);
+out_of_memory:
+	if (*pages)
+		kcopyd_put_pages(kc, *pages);
+	return -ENOMEM;
 }
 
 /*
@@ -141,13 +163,16 @@
 	}
 }
 
-static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr)
+/*
+ * Allocate and reserve nr_pages for the use of a specific client.
+ */
+static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned nr_pages)
 {
-	unsigned int i;
+	unsigned i;
 	struct page_list *pl = NULL, *next;
 
-	for (i = 0; i < nr; i++) {
-		next = alloc_pl();
+	for (i = 0; i < nr_pages; i++) {
+		next = alloc_pl(GFP_KERNEL);
 		if (!next) {
 			if (pl)
 				drop_pages(pl);
@@ -157,17 +182,18 @@
 		pl = next;
 	}
 
+	kc->nr_reserved_pages += nr_pages;
 	kcopyd_put_pages(kc, pl);
-	kc->nr_pages += nr;
+
 	return 0;
 }
 
 static void client_free_pages(struct dm_kcopyd_client *kc)
 {
-	BUG_ON(kc->nr_free_pages != kc->nr_pages);
+	BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages);
 	drop_pages(kc->pages);
 	kc->pages = NULL;
-	kc->nr_free_pages = kc->nr_pages = 0;
+	kc->nr_free_pages = kc->nr_reserved_pages = 0;
 }
 
 /*-----------------------------------------------------------------
@@ -216,16 +242,17 @@
 	struct mutex lock;
 	atomic_t sub_jobs;
 	sector_t progress;
-};
 
-/* FIXME: this should scale with the number of pages */
-#define MIN_JOBS 512
+	struct kcopyd_job *master_job;
+};
 
 static struct kmem_cache *_job_cache;
 
 int __init dm_kcopyd_init(void)
 {
-	_job_cache = KMEM_CACHE(kcopyd_job, 0);
+	_job_cache = kmem_cache_create("kcopyd_job",
+				sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1),
+				__alignof__(struct kcopyd_job), 0, NULL);
 	if (!_job_cache)
 		return -ENOMEM;
 
@@ -299,7 +326,12 @@
 
 	if (job->pages)
 		kcopyd_put_pages(kc, job->pages);
-	mempool_free(job, kc->job_pool);
+	/*
+	 * If this is the master job, the sub jobs have already
+	 * completed so we can free everything.
+	 */
+	if (job->master_job == job)
+		mempool_free(job, kc->job_pool);
 	fn(read_err, write_err, context);
 
 	if (atomic_dec_and_test(&kc->nr_jobs))
@@ -460,14 +492,14 @@
 	wake(kc);
 }
 
-#define SUB_JOB_SIZE 128
 static void segment_complete(int read_err, unsigned long write_err,
 			     void *context)
 {
 	/* FIXME: tidy this function */
 	sector_t progress = 0;
 	sector_t count = 0;
-	struct kcopyd_job *job = (struct kcopyd_job *) context;
+	struct kcopyd_job *sub_job = (struct kcopyd_job *) context;
+	struct kcopyd_job *job = sub_job->master_job;
 	struct dm_kcopyd_client *kc = job->kc;
 
 	mutex_lock(&job->lock);
@@ -498,8 +530,6 @@
 
 	if (count) {
 		int i;
-		struct kcopyd_job *sub_job = mempool_alloc(kc->job_pool,
-							   GFP_NOIO);
 
 		*sub_job = *job;
 		sub_job->source.sector += progress;
@@ -511,7 +541,7 @@
 		}
 
 		sub_job->fn = segment_complete;
-		sub_job->context = job;
+		sub_job->context = sub_job;
 		dispatch_job(sub_job);
 
 	} else if (atomic_dec_and_test(&job->sub_jobs)) {
@@ -531,19 +561,19 @@
 }
 
 /*
- * Create some little jobs that will do the move between
- * them.
+ * Create some sub jobs to share the work between them.
  */
-#define SPLIT_COUNT 8
-static void split_job(struct kcopyd_job *job)
+static void split_job(struct kcopyd_job *master_job)
 {
 	int i;
 
-	atomic_inc(&job->kc->nr_jobs);
+	atomic_inc(&master_job->kc->nr_jobs);
 
-	atomic_set(&job->sub_jobs, SPLIT_COUNT);
-	for (i = 0; i < SPLIT_COUNT; i++)
-		segment_complete(0, 0u, job);
+	atomic_set(&master_job->sub_jobs, SPLIT_COUNT);
+	for (i = 0; i < SPLIT_COUNT; i++) {
+		master_job[i + 1].master_job = master_job;
+		segment_complete(0, 0u, &master_job[i + 1]);
+	}
 }
 
 int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
@@ -553,7 +583,8 @@
 	struct kcopyd_job *job;
 
 	/*
-	 * Allocate a new job.
+	 * Allocate an array of jobs consisting of one master job
+	 * followed by SPLIT_COUNT sub jobs.
 	 */
 	job = mempool_alloc(kc->job_pool, GFP_NOIO);
 
@@ -577,10 +608,10 @@
 
 	job->fn = fn;
 	job->context = context;
+	job->master_job = job;
 
-	if (job->source.count < SUB_JOB_SIZE)
+	if (job->source.count <= SUB_JOB_SIZE)
 		dispatch_job(job);
-
 	else {
 		mutex_init(&job->lock);
 		job->progress = 0;
@@ -606,17 +637,15 @@
 /*-----------------------------------------------------------------
  * Client setup
  *---------------------------------------------------------------*/
-int dm_kcopyd_client_create(unsigned int nr_pages,
-			    struct dm_kcopyd_client **result)
+struct dm_kcopyd_client *dm_kcopyd_client_create(void)
 {
 	int r = -ENOMEM;
 	struct dm_kcopyd_client *kc;
 
 	kc = kmalloc(sizeof(*kc), GFP_KERNEL);
 	if (!kc)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	spin_lock_init(&kc->lock);
 	spin_lock_init(&kc->job_lock);
 	INIT_LIST_HEAD(&kc->complete_jobs);
 	INIT_LIST_HEAD(&kc->io_jobs);
@@ -633,12 +662,12 @@
 		goto bad_workqueue;
 
 	kc->pages = NULL;
-	kc->nr_pages = kc->nr_free_pages = 0;
-	r = client_alloc_pages(kc, nr_pages);
+	kc->nr_reserved_pages = kc->nr_free_pages = 0;
+	r = client_reserve_pages(kc, RESERVE_PAGES);
 	if (r)
 		goto bad_client_pages;
 
-	kc->io_client = dm_io_client_create(nr_pages);
+	kc->io_client = dm_io_client_create();
 	if (IS_ERR(kc->io_client)) {
 		r = PTR_ERR(kc->io_client);
 		goto bad_io_client;
@@ -647,8 +676,7 @@
 	init_waitqueue_head(&kc->destroyq);
 	atomic_set(&kc->nr_jobs, 0);
 
-	*result = kc;
-	return 0;
+	return kc;
 
 bad_io_client:
 	client_free_pages(kc);
@@ -659,7 +687,7 @@
 bad_slab:
 	kfree(kc);
 
-	return r;
+	return ERR_PTR(r);
 }
 EXPORT_SYMBOL(dm_kcopyd_client_create);
 

diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index a1f3218..948e3f4 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c

@@ -449,8 +449,7 @@
 
 		lc->io_req.mem.type = DM_IO_VMA;
 		lc->io_req.notify.fn = NULL;
-		lc->io_req.client = dm_io_client_create(dm_div_up(buf_size,
-								   PAGE_SIZE));
+		lc->io_req.client = dm_io_client_create();
 		if (IS_ERR(lc->io_req.client)) {
 			r = PTR_ERR(lc->io_req.client);
 			DMWARN("couldn't allocate disk io client");

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index a550a05..aa4e570 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c

@@ -1290,7 +1290,7 @@
 	if (!error && !clone->errors)
 		return 0;	/* I/O complete */
 
-	if (error == -EOPNOTSUPP || error == -EREMOTEIO)
+	if (error == -EOPNOTSUPP || error == -EREMOTEIO || error == -EILSEQ)
 		return error;
 
 	if (mpio->pgpath)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 976ad46..9bfd057 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c

@@ -22,8 +22,6 @@
 #define DM_MSG_PREFIX "raid1"
 
 #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
-#define DM_IO_PAGES 64
-#define DM_KCOPYD_PAGES 64
 
 #define DM_RAID1_HANDLE_ERRORS 0x01
 #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
@@ -887,7 +885,7 @@
 		return NULL;
 	}
 
-	ms->io_client = dm_io_client_create(DM_IO_PAGES);
+	ms->io_client = dm_io_client_create();
 	if (IS_ERR(ms->io_client)) {
 		ti->error = "Error creating dm_io client";
 		mempool_destroy(ms->read_record_pool);
@@ -1117,9 +1115,11 @@
 		goto err_destroy_wq;
 	}
 
-	r = dm_kcopyd_client_create(DM_KCOPYD_PAGES, &ms->kcopyd_client);
-	if (r)
+	ms->kcopyd_client = dm_kcopyd_client_create();
+	if (IS_ERR(ms->kcopyd_client)) {
+		r = PTR_ERR(ms->kcopyd_client);
 		goto err_destroy_wq;
+	}
 
 	wakeup_mirrord(ms);
 	return 0;

diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 95891df..135c2f1 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c

@@ -154,11 +154,6 @@
 	struct workqueue_struct *metadata_wq;
 };
 
-static unsigned sectors_to_pages(unsigned sectors)
-{
-	return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
-}
-
 static int alloc_area(struct pstore *ps)
 {
 	int r = -ENOMEM;
@@ -318,8 +313,7 @@
 		chunk_size_supplied = 0;
 	}
 
-	ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
-							     chunk_size));
+	ps->io_client = dm_io_client_create();
 	if (IS_ERR(ps->io_client))
 		return PTR_ERR(ps->io_client);
 
@@ -368,11 +362,6 @@
 		return r;
 	}
 
-	r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
-				ps->io_client);
-	if (r)
-		return r;
-
 	r = alloc_area(ps);
 	return r;
 

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index a2d3309..9ecff5f 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c

@@ -40,11 +40,6 @@
 #define SNAPSHOT_COPY_PRIORITY 2
 
 /*
- * Reserve 1MB for each snapshot initially (with minimum of 1 page).
- */
-#define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1)
-
-/*
  * The size of the mempool used to track chunks in use.
  */
 #define MIN_IOS 256
@@ -1116,8 +1111,9 @@
 		goto bad_hash_tables;
 	}
 
-	r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
-	if (r) {
+	s->kcopyd_client = dm_kcopyd_client_create();
+	if (IS_ERR(s->kcopyd_client)) {
+		r = PTR_ERR(s->kcopyd_client);
 		ti->error = "Could not create kcopyd client";
 		goto bad_kcopyd;
 	}

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index cb8380c..451c3bb 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c

@@ -362,6 +362,7 @@
 static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
 				  sector_t start, sector_t len, void *data)
 {
+	struct request_queue *q;
 	struct queue_limits *limits = data;
 	struct block_device *bdev = dev->bdev;
 	sector_t dev_size =
@@ -370,6 +371,22 @@
 		limits->logical_block_size >> SECTOR_SHIFT;
 	char b[BDEVNAME_SIZE];
 
+	/*
+	 * Some devices exist without request functions,
+	 * such as loop devices not yet bound to backing files.
+	 * Forbid the use of such devices.
+	 */
+	q = bdev_get_queue(bdev);
+	if (!q || !q->make_request_fn) {
+		DMWARN("%s: %s is not yet initialised: "
+		       "start=%llu, len=%llu, dev_size=%llu",
+		       dm_device_name(ti->table->md), bdevname(bdev, b),
+		       (unsigned long long)start,
+		       (unsigned long long)len,
+		       (unsigned long long)dev_size);
+		return 1;
+	}
+
 	if (!dev_size)
 		return 0;
 
@@ -1346,7 +1363,8 @@
 		return 0;
 
 	/*
-	 * Ensure that at least one underlying device supports discards.
+	 * Unless any target used by the table set discards_supported,
+	 * require at least one underlying device to support discards.
 	 * t->devices includes internal dm devices such as mirror logs
 	 * so we need to use iterate_devices here, which targets
 	 * supporting discard must provide.
@@ -1354,6 +1372,9 @@
 	while (i < dm_table_get_num_targets(t)) {
 		ti = dm_table_get_target(t, i++);
 
+		if (ti->discards_supported)
+			return 1;
+
 		if (ti->type->iterate_devices &&
 		    ti->type->iterate_devices(ti, device_discard_capable, NULL))
 			return 1;

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b6c2677..0f09c05 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig

@@ -721,7 +721,7 @@
 
 config MFD_TPS65910
 	bool "TPS65910 Power Management chip"
-	depends on I2C=y
+	depends on I2C=y && GPIOLIB
 	select MFD_CORE
 	select GPIO_TPS65910
 	help

diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index e637821..02a15d7 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c

@@ -2005,7 +2005,8 @@
 static struct mfd_cell db8500_prcmu_devs[] = {
 	{
 		.name = "db8500-prcmu-regulators",
-		.mfd_data = &db8500_regulators,
+		.platform_data = &db8500_regulators,
+		.pdata_size = sizeof(db8500_regulators),
 	},
 	{
 		.name = "cpufreq-u8500",

diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index b0c5631..8cebec5 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c

@@ -304,7 +304,10 @@
 		return 1;
 	}
 	/* Readjust the instruction pointer if needed */
-	instruction_pointer_set(&kgdbts_regs, ip + offset);
+	ip += offset;
+#ifdef GDB_ADJUSTS_BREAK_OFFSET
+	instruction_pointer_set(&kgdbts_regs, ip);
+#endif
 	return 0;
 }
 

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0cb0b06..f685324 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c

@@ -609,7 +609,7 @@
 	 * before it gets out of hand.  Naturally, this wastes entries. */
 	if (capacity < 2+MAX_SKB_FRAGS) {
 		netif_stop_queue(dev);
-		if (unlikely(!virtqueue_enable_cb(vi->svq))) {
+		if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
 			/* More just got used, free them then recheck. */
 			capacity += free_old_xmit_skbs(vi);
 			if (capacity >= 2+MAX_SKB_FRAGS) {

diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h
index 4e70749..a8d5bb3 100644
--- a/drivers/oprofile/event_buffer.h
+++ b/drivers/oprofile/event_buffer.h

@@ -11,7 +11,7 @@
 #define EVENT_BUFFER_H
 
 #include <linux/types.h>
-#include <asm/mutex.h>
+#include <linux/mutex.h>
 
 int alloc_event_buffer(void);
 

diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index f9bda64f..dccd863 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c

@@ -14,7 +14,7 @@
 #include <linux/moduleparam.h>
 #include <linux/workqueue.h>
 #include <linux/time.h>
-#include <asm/mutex.h>
+#include <linux/mutex.h>
 
 #include "oprof.h"
 #include "event_buffer.h"

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 7c3b18e..d36f41e 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c

@@ -195,6 +195,8 @@
 		return PCI_D2;
 	case ACPI_STATE_D3:
 		return PCI_D3hot;
+	case ACPI_STATE_D3_COLD:
+		return PCI_D3cold;
 	}
 	return PCI_POWER_ERROR;
 }

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 5cb999b..45e0191 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig

@@ -39,7 +39,7 @@
 
 config ACERHDF
 	tristate "Acer Aspire One temperature and fan driver"
-	depends on THERMAL && THERMAL_HWMON && ACPI
+	depends on THERMAL && ACPI
 	---help---
 	  This is a driver for Acer Aspire One netbooks. It allows to access
 	  the temperature sensor and to control the fan.
@@ -760,4 +760,13 @@
           MXM is a standard for laptop graphics cards, the WMI interface
 	  is required for switchable nvidia graphics machines
 
+config INTEL_OAKTRAIL
+	tristate "Intel Oaktrail Platform Extras"
+	depends on ACPI
+	depends on RFKILL && BACKLIGHT_CLASS_DEVICE && ACPI
+	---help---
+	  Intel Oaktrail platform need this driver to provide interfaces to
+	  enable/disable the Camera, WiFi, BT etc. devices. If in doubt, say Y
+	  here; it will only load on supported platforms.
+
 endif # X86_PLATFORM_DEVICES

diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index a7ab3bc..afc1f83 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile

@@ -41,5 +41,6 @@
 obj-$(CONFIG_XO15_EBOOK)	+= xo15-ebook.o
 obj-$(CONFIG_IBM_RTL)		+= ibm_rtl.o
 obj-$(CONFIG_SAMSUNG_LAPTOP)	+= samsung-laptop.o
-obj-$(CONFIG_INTEL_MFLD_THERMAL)	+= intel_mid_thermal.o
 obj-$(CONFIG_MXM_WMI)		+= mxm-wmi.o
+obj-$(CONFIG_INTEL_MID_POWER_BUTTON)	+= intel_mid_powerbtn.o
+obj-$(CONFIG_INTEL_OAKTRAIL)	+= intel_oaktrail.o

diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index ac4e7f8..005417b 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c

@@ -98,13 +98,26 @@
 
 static const struct key_entry acer_wmi_keymap[] = {
 	{KE_KEY, 0x01, {KEY_WLAN} },     /* WiFi */
+	{KE_KEY, 0x03, {KEY_WLAN} },     /* WiFi */
 	{KE_KEY, 0x12, {KEY_BLUETOOTH} },	/* BT */
 	{KE_KEY, 0x21, {KEY_PROG1} },    /* Backup */
 	{KE_KEY, 0x22, {KEY_PROG2} },    /* Arcade */
 	{KE_KEY, 0x23, {KEY_PROG3} },    /* P_Key */
 	{KE_KEY, 0x24, {KEY_PROG4} },    /* Social networking_Key */
+	{KE_IGNORE, 0x41, {KEY_MUTE} },
+	{KE_IGNORE, 0x42, {KEY_PREVIOUSSONG} },
+	{KE_IGNORE, 0x43, {KEY_NEXTSONG} },
+	{KE_IGNORE, 0x44, {KEY_PLAYPAUSE} },
+	{KE_IGNORE, 0x45, {KEY_STOP} },
+	{KE_IGNORE, 0x48, {KEY_VOLUMEUP} },
+	{KE_IGNORE, 0x49, {KEY_VOLUMEDOWN} },
+	{KE_IGNORE, 0x61, {KEY_SWITCHVIDEOMODE} },
+	{KE_IGNORE, 0x62, {KEY_BRIGHTNESSUP} },
+	{KE_IGNORE, 0x63, {KEY_BRIGHTNESSDOWN} },
 	{KE_KEY, 0x64, {KEY_SWITCHVIDEOMODE} },	/* Display Switch */
+	{KE_IGNORE, 0x81, {KEY_SLEEP} },
 	{KE_KEY, 0x82, {KEY_TOUCHPAD_TOGGLE} },	/* Touch Pad On/Off */
+	{KE_IGNORE, 0x83, {KEY_TOUCHPAD_TOGGLE} },
 	{KE_END, 0}
 };
 
@@ -122,6 +135,7 @@
  */
 #define ACER_WMID3_GDS_WIRELESS		(1<<0)	/* WiFi */
 #define ACER_WMID3_GDS_THREEG		(1<<6)	/* 3G */
+#define ACER_WMID3_GDS_WIMAX		(1<<7)	/* WiMAX */
 #define ACER_WMID3_GDS_BLUETOOTH	(1<<11)	/* BT */
 
 struct lm_input_params {
@@ -737,8 +751,11 @@
 
 	obj = (union acpi_object *) result.pointer;
 	if (obj && obj->type == ACPI_TYPE_BUFFER &&
-		obj->buffer.length == sizeof(u32)) {
+		(obj->buffer.length == sizeof(u32) ||
+		obj->buffer.length == sizeof(u64))) {
 		tmp = *((u32 *) obj->buffer.pointer);
+	} else if (obj->type == ACPI_TYPE_INTEGER) {
+		tmp = (u32) obj->integer.value;
 	} else {
 		tmp = 0;
 	}
@@ -866,8 +883,11 @@
 
 	obj = (union acpi_object *) out.pointer;
 	if (obj && obj->type == ACPI_TYPE_BUFFER &&
-		obj->buffer.length == sizeof(u32)) {
+		(obj->buffer.length == sizeof(u32) ||
+		obj->buffer.length == sizeof(u64))) {
 		devices = *((u32 *) obj->buffer.pointer);
+	} else if (obj->type == ACPI_TYPE_INTEGER) {
+		devices = (u32) obj->integer.value;
 	} else {
 		kfree(out.pointer);
 		return AE_ERROR;
@@ -876,7 +896,8 @@
 	dmi_walk(type_aa_dmi_decode, NULL);
 	if (!has_type_aa) {
 		interface->capability |= ACER_CAP_WIRELESS;
-		interface->capability |= ACER_CAP_THREEG;
+		if (devices & 0x40)
+			interface->capability |= ACER_CAP_THREEG;
 		if (devices & 0x10)
 			interface->capability |= ACER_CAP_BLUETOOTH;
 	}
@@ -961,10 +982,12 @@
 	 * These will all fail silently if the value given is invalid, or the
 	 * capability isn't available on the given interface
 	 */
-	set_u32(mailled, ACER_CAP_MAILLED);
-	if (!has_type_aa)
+	if (mailled >= 0)
+		set_u32(mailled, ACER_CAP_MAILLED);
+	if (!has_type_aa && threeg >= 0)
 		set_u32(threeg, ACER_CAP_THREEG);
-	set_u32(brightness, ACER_CAP_BRIGHTNESS);
+	if (brightness >= 0)
+		set_u32(brightness, ACER_CAP_BRIGHTNESS);
 }
 
 /*
@@ -1081,7 +1104,7 @@
 		return AE_ERROR;
 	}
 	if (obj->buffer.length != 8) {
-		pr_warning("Unknown buffer length %d\n", obj->buffer.length);
+		pr_warn("Unknown buffer length %d\n", obj->buffer.length);
 		kfree(obj);
 		return AE_ERROR;
 	}
@@ -1090,8 +1113,8 @@
 	kfree(obj);
 
 	if (return_value.error_code || return_value.ec_return_value)
-		pr_warning("Get Device Status failed: "
-			"0x%x - 0x%x\n", return_value.error_code,
+		pr_warn("Get Device Status failed: 0x%x - 0x%x\n",
+			return_value.error_code,
 			return_value.ec_return_value);
 	else
 		*value = !!(return_value.devices & device);
@@ -1124,6 +1147,114 @@
 	}
 }
 
+static acpi_status wmid3_set_device_status(u32 value, u16 device)
+{
+	struct wmid3_gds_return_value return_value;
+	acpi_status status;
+	union acpi_object *obj;
+	u16 devices;
+	struct wmid3_gds_input_param params = {
+		.function_num = 0x1,
+		.hotkey_number = 0x01,
+		.devices = ACER_WMID3_GDS_WIRELESS &
+				ACER_WMID3_GDS_THREEG &
+				ACER_WMID3_GDS_WIMAX &
+				ACER_WMID3_GDS_BLUETOOTH,
+	};
+	struct acpi_buffer input = {
+		sizeof(struct wmid3_gds_input_param),
+		&params
+	};
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer output2 = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	status = wmi_evaluate_method(WMID_GUID3, 0, 0x2, &input, &output);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	obj = output.pointer;
+
+	if (!obj)
+		return AE_ERROR;
+	else if (obj->type != ACPI_TYPE_BUFFER) {
+		kfree(obj);
+		return AE_ERROR;
+	}
+	if (obj->buffer.length != 8) {
+		pr_warning("Unknown buffer length %d\n", obj->buffer.length);
+		kfree(obj);
+		return AE_ERROR;
+	}
+
+	return_value = *((struct wmid3_gds_return_value *)obj->buffer.pointer);
+	kfree(obj);
+
+	if (return_value.error_code || return_value.ec_return_value) {
+		pr_warning("Get Current Device Status failed: "
+			"0x%x - 0x%x\n", return_value.error_code,
+			return_value.ec_return_value);
+		return status;
+	}
+
+	devices = return_value.devices;
+	params.function_num = 0x2;
+	params.hotkey_number = 0x01;
+	params.devices = (value) ? (devices | device) : (devices & ~device);
+
+	status = wmi_evaluate_method(WMID_GUID3, 0, 0x1, &input, &output2);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	obj = output2.pointer;
+
+	if (!obj)
+		return AE_ERROR;
+	else if (obj->type != ACPI_TYPE_BUFFER) {
+		kfree(obj);
+		return AE_ERROR;
+	}
+	if (obj->buffer.length != 4) {
+		pr_warning("Unknown buffer length %d\n", obj->buffer.length);
+		kfree(obj);
+		return AE_ERROR;
+	}
+
+	return_value = *((struct wmid3_gds_return_value *)obj->buffer.pointer);
+	kfree(obj);
+
+	if (return_value.error_code || return_value.ec_return_value)
+		pr_warning("Set Device Status failed: "
+			"0x%x - 0x%x\n", return_value.error_code,
+			return_value.ec_return_value);
+
+	return status;
+}
+
+static acpi_status set_device_status(u32 value, u32 cap)
+{
+	if (wmi_has_guid(WMID_GUID3)) {
+		u16 device;
+
+		switch (cap) {
+		case ACER_CAP_WIRELESS:
+			device = ACER_WMID3_GDS_WIRELESS;
+			break;
+		case ACER_CAP_BLUETOOTH:
+			device = ACER_WMID3_GDS_BLUETOOTH;
+			break;
+		case ACER_CAP_THREEG:
+			device = ACER_WMID3_GDS_THREEG;
+			break;
+		default:
+			return AE_ERROR;
+		}
+		return wmid3_set_device_status(value, device);
+
+	} else {
+		return set_u32(value, cap);
+	}
+}
+
 /*
  * Rfkill devices
  */
@@ -1160,7 +1291,7 @@
 	u32 cap = (unsigned long)data;
 
 	if (rfkill_inited) {
-		status = set_u32(!blocked, cap);
+		status = set_device_status(!blocked, cap);
 		if (ACPI_FAILURE(status))
 			return -ENODEV;
 	}
@@ -1317,7 +1448,7 @@
 
 	status = wmi_get_event_data(value, &response);
 	if (status != AE_OK) {
-		pr_warning("bad event status 0x%x\n", status);
+		pr_warn("bad event status 0x%x\n", status);
 		return;
 	}
 
@@ -1326,12 +1457,12 @@
 	if (!obj)
 		return;
 	if (obj->type != ACPI_TYPE_BUFFER) {
-		pr_warning("Unknown response received %d\n", obj->type);
+		pr_warn("Unknown response received %d\n", obj->type);
 		kfree(obj);
 		return;
 	}
 	if (obj->buffer.length != 8) {
-		pr_warning("Unknown buffer length %d\n", obj->buffer.length);
+		pr_warn("Unknown buffer length %d\n", obj->buffer.length);
 		kfree(obj);
 		return;
 	}
@@ -1343,7 +1474,7 @@
 	case WMID_HOTKEY_EVENT:
 		if (return_value.device_state) {
 			u16 device_state = return_value.device_state;
-			pr_debug("deivces states: 0x%x\n", device_state);
+			pr_debug("device state: 0x%x\n", device_state);
 			if (has_cap(ACER_CAP_WIRELESS))
 				rfkill_set_sw_state(wireless_rfkill,
 				!(device_state & ACER_WMID3_GDS_WIRELESS));
@@ -1356,11 +1487,11 @@
 		}
 		if (!sparse_keymap_report_event(acer_wmi_input_dev,
 				return_value.key_num, 1, true))
-			pr_warning("Unknown key number - 0x%x\n",
+			pr_warn("Unknown key number - 0x%x\n",
 				return_value.key_num);
 		break;
 	default:
-		pr_warning("Unknown function number - %d - %d\n",
+		pr_warn("Unknown function number - %d - %d\n",
 			return_value.function, return_value.key_num);
 		break;
 	}
@@ -1389,7 +1520,7 @@
 		return AE_ERROR;
 	}
 	if (obj->buffer.length != 4) {
-		pr_warning("Unknown buffer length %d\n", obj->buffer.length);
+		pr_warn("Unknown buffer length %d\n", obj->buffer.length);
 		kfree(obj);
 		return AE_ERROR;
 	}
@@ -1414,11 +1545,11 @@
 	status = wmid3_set_lm_mode(&params, &return_value);
 
 	if (return_value.error_code || return_value.ec_return_value)
-		pr_warning("Enabling EC raw mode failed: "
-		       "0x%x - 0x%x\n", return_value.error_code,
-		       return_value.ec_return_value);
+		pr_warn("Enabling EC raw mode failed: 0x%x - 0x%x\n",
+			return_value.error_code,
+			return_value.ec_return_value);
 	else
-		pr_info("Enabled EC raw mode");
+		pr_info("Enabled EC raw mode\n");
 
 	return status;
 }
@@ -1437,9 +1568,9 @@
 	status = wmid3_set_lm_mode(&params, &return_value);
 
 	if (return_value.error_code || return_value.ec_return_value)
-		pr_warning("Enabling Launch Manager failed: "
-		       "0x%x - 0x%x\n", return_value.error_code,
-		       return_value.ec_return_value);
+		pr_warn("Enabling Launch Manager failed: 0x%x - 0x%x\n",
+			return_value.error_code,
+			return_value.ec_return_value);
 
 	return status;
 }
@@ -1506,8 +1637,11 @@
 
 	obj = (union acpi_object *) out.pointer;
 	if (obj && obj->type == ACPI_TYPE_BUFFER &&
-		obj->buffer.length == sizeof(u32)) {
+		(obj->buffer.length == sizeof(u32) ||
+		obj->buffer.length == sizeof(u64))) {
 		devices = *((u32 *) obj->buffer.pointer);
+	} else if (obj->type == ACPI_TYPE_INTEGER) {
+		devices = (u32) obj->integer.value;
 	}
 
 	kfree(out.pointer);

diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index 60f9cfc..fca3489 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c

@@ -35,10 +35,8 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/fs.h>
 #include <linux/dmi.h>
-#include <acpi/acpi_drivers.h>
-#include <linux/sched.h>
+#include <linux/acpi.h>
 #include <linux/thermal.h>
 #include <linux/platform_device.h>
 

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index c53b3ff..d65df92 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c

@@ -318,7 +318,7 @@
 
 	if (status != AE_OK) {
 		if (ret)
-			pr_warning("Error finding %s\n", method);
+			pr_warn("Error finding %s\n", method);
 		return -ENODEV;
 	}
 	return 0;
@@ -383,7 +383,7 @@
 	rv = acpi_evaluate_integer(asus->handle, METHOD_KBD_LIGHT_GET,
 				   &params, &kblv);
 	if (ACPI_FAILURE(rv)) {
-		pr_warning("Error reading kled level\n");
+		pr_warn("Error reading kled level\n");
 		return -ENODEV;
 	}
 	return kblv;
@@ -397,7 +397,7 @@
 		kblv = 0;
 
 	if (write_acpi_int(asus->handle, METHOD_KBD_LIGHT_SET, kblv)) {
-		pr_warning("Keyboard LED display write failed\n");
+		pr_warn("Keyboard LED display write failed\n");
 		return -EINVAL;
 	}
 	return 0;
@@ -531,7 +531,7 @@
 	rv = acpi_evaluate_integer(asus->handle, METHOD_BRIGHTNESS_GET,
 				   NULL, &value);
 	if (ACPI_FAILURE(rv))
-		pr_warning("Error reading brightness\n");
+		pr_warn("Error reading brightness\n");
 
 	return value;
 }
@@ -541,7 +541,7 @@
 	struct asus_laptop *asus = bl_get_data(bd);
 
 	if (write_acpi_int(asus->handle, METHOD_BRIGHTNESS_SET, value)) {
-		pr_warning("Error changing brightness\n");
+		pr_warn("Error changing brightness\n");
 		return -EIO;
 	}
 	return 0;
@@ -730,7 +730,7 @@
 	rv = parse_arg(buf, count, &value);
 	if (rv > 0) {
 		if (write_acpi_int(asus->handle, METHOD_LEDD, value)) {
-			pr_warning("LED display write failed\n");
+			pr_warn("LED display write failed\n");
 			return -ENODEV;
 		}
 		asus->ledd_status = (u32) value;
@@ -752,7 +752,7 @@
 	rv = acpi_evaluate_integer(asus->handle, METHOD_WL_STATUS,
 				   NULL, &status);
 	if (ACPI_FAILURE(rv)) {
-		pr_warning("Error reading Wireless status\n");
+		pr_warn("Error reading Wireless status\n");
 		return -EINVAL;
 	}
 	return !!(status & mask);
@@ -764,7 +764,7 @@
 static int asus_wlan_set(struct asus_laptop *asus, int status)
 {
 	if (write_acpi_int(asus->handle, METHOD_WLAN, !!status)) {
-		pr_warning("Error setting wlan status to %d", status);
+		pr_warn("Error setting wlan status to %d\n", status);
 		return -EIO;
 	}
 	return 0;
@@ -792,7 +792,7 @@
 static int asus_bluetooth_set(struct asus_laptop *asus, int status)
 {
 	if (write_acpi_int(asus->handle, METHOD_BLUETOOTH, !!status)) {
-		pr_warning("Error setting bluetooth status to %d", status);
+		pr_warn("Error setting bluetooth status to %d\n", status);
 		return -EIO;
 	}
 	return 0;
@@ -821,7 +821,7 @@
 static int asus_wimax_set(struct asus_laptop *asus, int status)
 {
 	if (write_acpi_int(asus->handle, METHOD_WIMAX, !!status)) {
-		pr_warning("Error setting wimax status to %d", status);
+		pr_warn("Error setting wimax status to %d\n", status);
 		return -EIO;
 	}
 	return 0;
@@ -850,7 +850,7 @@
 static int asus_wwan_set(struct asus_laptop *asus, int status)
 {
 	if (write_acpi_int(asus->handle, METHOD_WWAN, !!status)) {
-		pr_warning("Error setting wwan status to %d", status);
+		pr_warn("Error setting wwan status to %d\n", status);
 		return -EIO;
 	}
 	return 0;
@@ -880,7 +880,7 @@
 {
 	/* no sanity check needed for now */
 	if (write_acpi_int(asus->handle, METHOD_SWITCH_DISPLAY, value))
-		pr_warning("Error setting display\n");
+		pr_warn("Error setting display\n");
 	return;
 }
 
@@ -909,7 +909,7 @@
 static void asus_als_switch(struct asus_laptop *asus, int value)
 {
 	if (write_acpi_int(asus->handle, METHOD_ALS_CONTROL, value))
-		pr_warning("Error setting light sensor switch\n");
+		pr_warn("Error setting light sensor switch\n");
 	asus->light_switch = value;
 }
 
@@ -937,7 +937,7 @@
 static void asus_als_level(struct asus_laptop *asus, int value)
 {
 	if (write_acpi_int(asus->handle, METHOD_ALS_LEVEL, value))
-		pr_warning("Error setting light sensor level\n");
+		pr_warn("Error setting light sensor level\n");
 	asus->light_level = value;
 }
 
@@ -976,7 +976,7 @@
 	rv = acpi_evaluate_integer(asus->handle, METHOD_GPS_STATUS,
 				   NULL, &status);
 	if (ACPI_FAILURE(rv)) {
-		pr_warning("Error reading GPS status\n");
+		pr_warn("Error reading GPS status\n");
 		return -ENODEV;
 	}
 	return !!status;
@@ -1284,7 +1284,7 @@
 	 */
 	status = acpi_get_table(ACPI_SIG_DSDT, 1, &asus->dsdt_info);
 	if (ACPI_FAILURE(status))
-		pr_warning("Couldn't get the DSDT table header\n");
+		pr_warn("Couldn't get the DSDT table header\n");
 
 	/* We have to write 0 on init this far for all ASUS models */
 	if (write_acpi_int_ret(asus->handle, "INIT", 0, &buffer)) {
@@ -1296,7 +1296,7 @@
 	status =
 	    acpi_evaluate_integer(asus->handle, "BSTS", NULL, &bsts_result);
 	if (ACPI_FAILURE(status))
-		pr_warning("Error calling BSTS\n");
+		pr_warn("Error calling BSTS\n");
 	else if (bsts_result)
 		pr_notice("BSTS called, 0x%02x returned\n",
 		       (uint) bsts_result);

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 832a3fd7..00460cb 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c

@@ -425,7 +425,7 @@
 	if (asus->hotplug_slot) {
 		bus = pci_find_bus(0, 1);
 		if (!bus) {
-			pr_warning("Unable to find PCI bus 1?\n");
+			pr_warn("Unable to find PCI bus 1?\n");
 			goto out_unlock;
 		}
 
@@ -436,12 +436,12 @@
 		absent = (l == 0xffffffff);
 
 		if (blocked != absent) {
-			pr_warning("BIOS says wireless lan is %s, "
-				   "but the pci device is %s\n",
-				   blocked ? "blocked" : "unblocked",
-				   absent ? "absent" : "present");
-			pr_warning("skipped wireless hotplug as probably "
-				   "inappropriate for this model\n");
+			pr_warn("BIOS says wireless lan is %s, "
+				"but the pci device is %s\n",
+				blocked ? "blocked" : "unblocked",
+				absent ? "absent" : "present");
+			pr_warn("skipped wireless hotplug as probably "
+				"inappropriate for this model\n");
 			goto out_unlock;
 		}
 
@@ -500,7 +500,7 @@
 						     ACPI_SYSTEM_NOTIFY,
 						     asus_rfkill_notify, asus);
 		if (ACPI_FAILURE(status))
-			pr_warning("Failed to register notify on %s\n", node);
+			pr_warn("Failed to register notify on %s\n", node);
 	} else
 		return -ENODEV;
 
@@ -1223,7 +1223,7 @@
 /*
  * Platform device
  */
-static int __init asus_wmi_platform_init(struct asus_wmi *asus)
+static int asus_wmi_platform_init(struct asus_wmi *asus)
 {
 	int rv;
 
@@ -1583,12 +1583,12 @@
 	int ret;
 
 	if (!wmi_has_guid(ASUS_WMI_MGMT_GUID)) {
-		pr_warning("Management GUID not found\n");
+		pr_warn("Management GUID not found\n");
 		return -ENODEV;
 	}
 
 	if (wdrv->event_guid && !wmi_has_guid(wdrv->event_guid)) {
-		pr_warning("Event GUID not found\n");
+		pr_warn("Event GUID not found\n");
 		return -ENODEV;
 	}
 

diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index f503607..d9312b3 100644
--- a/drivers/platform/x86/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c

@@ -30,6 +30,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -581,8 +583,7 @@
 		if (read_acpi_int(NULL, ledname, &led_status))
 			return led_status;
 		else
-			printk(KERN_WARNING "Asus ACPI: Error reading LED "
-			       "status\n");
+			pr_warn("Error reading LED status\n");
 	}
 	return (hotk->status & ledmask) ? 1 : 0;
 }
@@ -621,8 +622,7 @@
 		led_out = !led_out;
 
 	if (!write_acpi_int(hotk->handle, ledname, led_out, NULL))
-		printk(KERN_WARNING "Asus ACPI: LED (%s) write failed\n",
-		       ledname);
+		pr_warn("LED (%s) write failed\n", ledname);
 
 	return rv;
 }
@@ -679,8 +679,7 @@
 	if (rv > 0) {
 		if (!write_acpi_int
 		    (hotk->handle, hotk->methods->mt_ledd, value, NULL))
-			printk(KERN_WARNING
-			       "Asus ACPI: LED display write failed\n");
+			pr_warn("LED display write failed\n");
 		else
 			hotk->ledd_status = (u32) value;
 	}
@@ -838,8 +837,7 @@
 	} else {
 		/* We don't have to check anything if we are here */
 		if (!read_acpi_int(NULL, hotk->methods->lcd_status, &lcd))
-			printk(KERN_WARNING
-			       "Asus ACPI: Error reading LCD status\n");
+			pr_warn("Error reading LCD status\n");
 
 		if (hotk->model == L2D)
 			lcd = ~lcd;
@@ -871,7 +869,7 @@
 			   the exact behaviour is simulated here */
 		}
 		if (ACPI_FAILURE(status))
-			printk(KERN_WARNING "Asus ACPI: Error switching LCD\n");
+			pr_warn("Error switching LCD\n");
 	}
 	return 0;
 
@@ -915,13 +913,11 @@
 	if (hotk->methods->brightness_get) {	/* SPLV/GPLV laptop */
 		if (!read_acpi_int(hotk->handle, hotk->methods->brightness_get,
 				   &value))
-			printk(KERN_WARNING
-			       "Asus ACPI: Error reading brightness\n");
+			pr_warn("Error reading brightness\n");
 	} else if (hotk->methods->brightness_status) {	/* For D1 for example */
 		if (!read_acpi_int(NULL, hotk->methods->brightness_status,
 				   &value))
-			printk(KERN_WARNING
-			       "Asus ACPI: Error reading brightness\n");
+			pr_warn("Error reading brightness\n");
 	} else			/* No GPLV method */
 		value = hotk->brightness;
 	return value;
@@ -939,8 +935,7 @@
 	if (hotk->methods->brightness_set) {
 		if (!write_acpi_int(hotk->handle, hotk->methods->brightness_set,
 				    value, NULL)) {
-			printk(KERN_WARNING
-			       "Asus ACPI: Error changing brightness\n");
+			pr_warn("Error changing brightness\n");
 			ret = -EIO;
 		}
 		goto out;
@@ -955,8 +950,7 @@
 					      NULL, NULL);
 		(value > 0) ? value-- : value++;
 		if (ACPI_FAILURE(status)) {
-			printk(KERN_WARNING
-			       "Asus ACPI: Error changing brightness\n");
+			pr_warn("Error changing brightness\n");
 			ret = -EIO;
 		}
 	}
@@ -1008,7 +1002,7 @@
 	/* no sanity check needed for now */
 	if (!write_acpi_int(hotk->handle, hotk->methods->display_set,
 			    value, NULL))
-		printk(KERN_WARNING "Asus ACPI: Error setting display\n");
+		pr_warn("Error setting display\n");
 	return;
 }
 
@@ -1021,8 +1015,7 @@
 	int value = 0;
 
 	if (!read_acpi_int(hotk->handle, hotk->methods->display_get, &value))
-		printk(KERN_WARNING
-		       "Asus ACPI: Error reading display status\n");
+		pr_warn("Error reading display status\n");
 	value &= 0x07;	/* needed for some models, shouldn't hurt others */
 	seq_printf(m, "%d\n", value);
 	return 0;
@@ -1068,7 +1061,7 @@
 	proc = proc_create_data(name, mode, acpi_device_dir(device),
 				proc_fops, acpi_driver_data(device));
 	if (!proc) {
-		printk(KERN_WARNING "  Unable to create %s fs entry\n", name);
+		pr_warn("  Unable to create %s fs entry\n", name);
 		return -1;
 	}
 	proc->uid = asus_uid;
@@ -1085,8 +1078,8 @@
 		mode = S_IFREG | S_IRUGO | S_IWUSR | S_IWGRP;
 	} else {
 		mode = S_IFREG | S_IRUSR | S_IRGRP | S_IWUSR | S_IWGRP;
-		printk(KERN_WARNING "  asus_uid and asus_gid parameters are "
-		       "deprecated, use chown and chmod instead!\n");
+		pr_warn("  asus_uid and asus_gid parameters are "
+			"deprecated, use chown and chmod instead!\n");
 	}
 
 	acpi_device_dir(device) = asus_proc_dir;
@@ -1099,8 +1092,7 @@
 		proc->uid = asus_uid;
 		proc->gid = asus_gid;
 	} else {
-		printk(KERN_WARNING "  Unable to create " PROC_INFO
-		       " fs entry\n");
+		pr_warn("  Unable to create " PROC_INFO " fs entry\n");
 	}
 
 	if (hotk->methods->mt_wled) {
@@ -1283,20 +1275,19 @@
 	 */
 	status = acpi_get_table(ACPI_SIG_DSDT, 1, &asus_info);
 	if (ACPI_FAILURE(status))
-		printk(KERN_WARNING "  Couldn't get the DSDT table header\n");
+		pr_warn("  Couldn't get the DSDT table header\n");
 
 	/* We have to write 0 on init this far for all ASUS models */
 	if (!write_acpi_int(hotk->handle, "INIT", 0, &buffer)) {
-		printk(KERN_ERR "  Hotkey initialization failed\n");
+		pr_err("  Hotkey initialization failed\n");
 		return -ENODEV;
 	}
 
 	/* This needs to be called for some laptops to init properly */
 	if (!read_acpi_int(hotk->handle, "BSTS", &bsts_result))
-		printk(KERN_WARNING "  Error calling BSTS\n");
+		pr_warn("  Error calling BSTS\n");
 	else if (bsts_result)
-		printk(KERN_NOTICE "  BSTS called, 0x%02x returned\n",
-		       bsts_result);
+		pr_notice("  BSTS called, 0x%02x returned\n", bsts_result);
 
 	/*
 	 * Try to match the object returned by INIT to the specific model.
@@ -1324,23 +1315,21 @@
 		if (asus_info &&
 		    strncmp(asus_info->oem_table_id, "ODEM", 4) == 0) {
 			hotk->model = P30;
-			printk(KERN_NOTICE
-			       "  Samsung P30 detected, supported\n");
+			pr_notice("  Samsung P30 detected, supported\n");
 			hotk->methods = &model_conf[hotk->model];
 			kfree(model);
 			return 0;
 		} else {
 			hotk->model = M2E;
-			printk(KERN_NOTICE "  unsupported model %s, trying "
-			       "default values\n", string);
-			printk(KERN_NOTICE
-			       "  send /proc/acpi/dsdt to the developers\n");
+			pr_notice("  unsupported model %s, trying default values\n",
+				  string);
+			pr_notice("  send /proc/acpi/dsdt to the developers\n");
 			kfree(model);
 			return -ENODEV;
 		}
 	}
 	hotk->methods = &model_conf[hotk->model];
-	printk(KERN_NOTICE "  %s model detected, supported\n", string);
+	pr_notice("  %s model detected, supported\n", string);
 
 	/* Sort of per-model blacklist */
 	if (strncmp(string, "L2B", 3) == 0)
@@ -1385,7 +1374,7 @@
 	if (hotk->device->status.present) {
 		result = asus_hotk_get_info();
 	} else {
-		printk(KERN_ERR "  Hotkey device not present, aborting\n");
+		pr_err("  Hotkey device not present, aborting\n");
 		return -EINVAL;
 	}
 
@@ -1399,8 +1388,7 @@
 	acpi_status status = AE_OK;
 	int result;
 
-	printk(KERN_NOTICE "Asus Laptop ACPI Extras version %s\n",
-	       ASUS_ACPI_VERSION);
+	pr_notice("Asus Laptop ACPI Extras version %s\n", ASUS_ACPI_VERSION);
 
 	hotk = kzalloc(sizeof(struct asus_hotk), GFP_KERNEL);
 	if (!hotk)
@@ -1428,15 +1416,14 @@
 		    acpi_evaluate_object(NULL, hotk->methods->brightness_down,
 					 NULL, NULL);
 		if (ACPI_FAILURE(status))
-			printk(KERN_WARNING "  Error changing brightness\n");
+			pr_warn("  Error changing brightness\n");
 		else {
 			status =
 			    acpi_evaluate_object(NULL,
 						 hotk->methods->brightness_up,
 						 NULL, NULL);
 			if (ACPI_FAILURE(status))
-				printk(KERN_WARNING "  Strange, error changing"
-				       " brightness\n");
+				pr_warn("  Strange, error changing brightness\n");
 		}
 	}
 
@@ -1488,7 +1475,7 @@
 
 	asus_proc_dir = proc_mkdir(PROC_ASUS, acpi_root_dir);
 	if (!asus_proc_dir) {
-		printk(KERN_ERR "Asus ACPI: Unable to create /proc entry\n");
+		pr_err("Unable to create /proc entry\n");
 		acpi_bus_unregister_driver(&asus_hotk_driver);
 		return -ENODEV;
 	}
@@ -1513,7 +1500,7 @@
 							  &asus_backlight_data,
 							  &props);
 	if (IS_ERR(asus_backlight_device)) {
-		printk(KERN_ERR "Could not register asus backlight device\n");
+		pr_err("Could not register asus backlight device\n");
 		asus_backlight_device = NULL;
 		asus_acpi_exit();
 		return -ENODEV;

diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index c16a276..3f204fd 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c

@@ -68,6 +68,8 @@
  * only enabled on a JHL90 board until it is verified that they work on the
  * other boards too.  See the extra_features variable. */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -200,8 +202,8 @@
  * watching the output of address 0x4F (do an ec_transaction writing 0x33
  * into 0x4F and read a few bytes from the output, like so:
  *	u8 writeData = 0x33;
- *	ec_transaction(0x4F, &writeData, 1, buffer, 32, 0);
- * That address is labelled "fan1 table information" in the service manual.
+ *	ec_transaction(0x4F, &writeData, 1, buffer, 32);
+ * That address is labeled "fan1 table information" in the service manual.
  * It should be clear which value in 'buffer' changes). This seems to be
  * related to fan speed. It isn't a proper 'realtime' fan speed value
  * though, because physically stopping or speeding up the fan doesn't
@@ -286,7 +288,7 @@
 static void set_backlight_state(bool on)
 {
 	u8 data = on ? BACKLIGHT_STATE_ON_DATA : BACKLIGHT_STATE_OFF_DATA;
-	ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0, 0);
+	ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0);
 }
 
 
@@ -294,24 +296,24 @@
 static void pwm_enable_control(void)
 {
 	unsigned char writeData = PWM_ENABLE_DATA;
-	ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0, 0);
+	ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0);
 }
 
 static void pwm_disable_control(void)
 {
 	unsigned char writeData = PWM_DISABLE_DATA;
-	ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0, 0);
+	ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0);
 }
 
 static void set_pwm(int pwm)
 {
-	ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0, 0);
+	ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0);
 }
 
 static int get_fan_rpm(void)
 {
 	u8 value, data = FAN_DATA;
-	ec_transaction(FAN_ADDRESS, &data, 1, &value, 1, 0);
+	ec_transaction(FAN_ADDRESS, &data, 1, &value, 1);
 	return 100 * (int)value;
 }
 
@@ -760,16 +762,14 @@
 
 static int dmi_check_cb(const struct dmi_system_id *id)
 {
-	printk(KERN_INFO DRIVER_NAME": Identified laptop model '%s'\n",
-		id->ident);
+	pr_info("Identified laptop model '%s'\n", id->ident);
 	extra_features = false;
 	return 1;
 }
 
 static int dmi_check_cb_extra(const struct dmi_system_id *id)
 {
-	printk(KERN_INFO DRIVER_NAME": Identified laptop model '%s', "
-		"enabling extra features\n",
+	pr_info("Identified laptop model '%s', enabling extra features\n",
 		id->ident);
 	extra_features = true;
 	return 1;
@@ -956,14 +956,12 @@
 	int ret;
 
 	if (acpi_disabled) {
-		printk(KERN_ERR DRIVER_NAME": ACPI needs to be enabled for "
-						"this driver to work!\n");
+		pr_err("ACPI needs to be enabled for this driver to work!\n");
 		return -ENODEV;
 	}
 
 	if (!force && !dmi_check_system(compal_dmi_table)) {
-		printk(KERN_ERR DRIVER_NAME": Motherboard not recognized (You "
-				"could try the module's force-parameter)");
+		pr_err("Motherboard not recognized (You could try the module's force-parameter)\n");
 		return -ENODEV;
 	}
 
@@ -998,8 +996,7 @@
 	if (ret)
 		goto err_rfkill;
 
-	printk(KERN_INFO DRIVER_NAME": Driver "DRIVER_VERSION
-						" successfully loaded\n");
+	pr_info("Driver " DRIVER_VERSION " successfully loaded\n");
 	return 0;
 
 err_rfkill:
@@ -1064,7 +1061,7 @@
 	rfkill_destroy(wifi_rfkill);
 	rfkill_destroy(bt_rfkill);
 
-	printk(KERN_INFO DRIVER_NAME": Driver unloaded\n");
+	pr_info("Driver unloaded\n");
 }
 
 static int __devexit compal_remove(struct platform_device *pdev)
@@ -1074,8 +1071,7 @@
 	if (!extra_features)
 		return 0;
 
-	printk(KERN_INFO DRIVER_NAME": Unloading: resetting fan control "
-							"to motherboard\n");
+	pr_info("Unloading: resetting fan control to motherboard\n");
 	pwm_disable_control();
 
 	data = platform_get_drvdata(pdev);

diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index de301aa8..d3841de 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c

@@ -11,6 +11,8 @@
  *  published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -434,8 +436,7 @@
 	int ret;
 
 	if (dmi_check_system(dell_blacklist)) {
-		printk(KERN_INFO "dell-laptop: Blacklisted hardware detected - "
-				"not enabling rfkill\n");
+		pr_info("Blacklisted hardware detected - not enabling rfkill\n");
 		return 0;
 	}
 
@@ -606,7 +607,7 @@
 	dmi_walk(find_tokens, NULL);
 
 	if (!da_tokens)  {
-		printk(KERN_INFO "dell-laptop: Unable to find dmi tokens\n");
+		pr_info("Unable to find dmi tokens\n");
 		return -ENODEV;
 	}
 
@@ -636,14 +637,13 @@
 	ret = dell_setup_rfkill();
 
 	if (ret) {
-		printk(KERN_WARNING "dell-laptop: Unable to setup rfkill\n");
+		pr_warn("Unable to setup rfkill\n");
 		goto fail_rfkill;
 	}
 
 	ret = i8042_install_filter(dell_laptop_i8042_filter);
 	if (ret) {
-		printk(KERN_WARNING
-		       "dell-laptop: Unable to install key filter\n");
+		pr_warn("Unable to install key filter\n");
 		goto fail_filter;
 	}
 

diff --git a/drivers/platform/x86/dell-wmi-aio.c b/drivers/platform/x86/dell-wmi-aio.c
index 0ed8457..3f94545 100644
--- a/drivers/platform/x86/dell-wmi-aio.c
+++ b/drivers/platform/x86/dell-wmi-aio.c

@@ -15,6 +15,7 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
@@ -138,7 +139,7 @@
 
 	guid = dell_wmi_aio_find();
 	if (!guid) {
-		pr_warning("No known WMI GUID found\n");
+		pr_warn("No known WMI GUID found\n");
 		return -ENXIO;
 	}
 

diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 77f1d55..ce79082 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c

@@ -23,6 +23,8 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -141,7 +143,7 @@
 
 	status = wmi_get_event_data(value, &response);
 	if (status != AE_OK) {
-		printk(KERN_INFO "dell-wmi: bad event status 0x%x\n", status);
+		pr_info("bad event status 0x%x\n", status);
 		return;
 	}
 
@@ -153,8 +155,8 @@
 		u16 *buffer_entry = (u16 *)obj->buffer.pointer;
 
 		if (dell_new_hk_type && (buffer_entry[1] != 0x10)) {
-			printk(KERN_INFO "dell-wmi: Received unknown WMI event"
-					 " (0x%x)\n", buffer_entry[1]);
+			pr_info("Received unknown WMI event (0x%x)\n",
+				buffer_entry[1]);
 			kfree(obj);
 			return;
 		}
@@ -167,8 +169,7 @@
 		key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev,
 							reported_key);
 		if (!key) {
-			printk(KERN_INFO "dell-wmi: Unknown key %x pressed\n",
-				reported_key);
+			pr_info("Unknown key %x pressed\n", reported_key);
 		} else if ((key->keycode == KEY_BRIGHTNESSUP ||
 			    key->keycode == KEY_BRIGHTNESSDOWN) && acpi_video) {
 			/* Don't report brightness notifications that will also
@@ -275,7 +276,7 @@
 	acpi_status status;
 
 	if (!wmi_has_guid(DELL_EVENT_GUID)) {
-		printk(KERN_WARNING "dell-wmi: No known WMI GUID found\n");
+		pr_warn("No known WMI GUID found\n");
 		return -ENODEV;
 	}
 
@@ -290,9 +291,7 @@
 					 dell_wmi_notify, NULL);
 	if (ACPI_FAILURE(status)) {
 		dell_wmi_input_destroy();
-		printk(KERN_ERR
-			"dell-wmi: Unable to register notify handler - %d\n",
-			status);
+		pr_err("Unable to register notify handler - %d\n", status);
 		return -ENODEV;
 	}
 

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 2c1abf6..1c45d92 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c

@@ -228,7 +228,7 @@
 		return -ENODEV;
 
 	if (write_acpi_int(eeepc->handle, method, value))
-		pr_warning("Error writing %s\n", method);
+		pr_warn("Error writing %s\n", method);
 	return 0;
 }
 
@@ -243,7 +243,7 @@
 		return -ENODEV;
 
 	if (read_acpi_int(eeepc->handle, method, &value))
-		pr_warning("Error reading %s\n", method);
+		pr_warn("Error reading %s\n", method);
 	return value;
 }
 
@@ -261,7 +261,7 @@
 	status = acpi_get_handle(eeepc->handle, (char *)method,
 				 handle);
 	if (status != AE_OK) {
-		pr_warning("Error finding %s\n", method);
+		pr_warn("Error finding %s\n", method);
 		return -ENODEV;
 	}
 	return 0;
@@ -417,7 +417,7 @@
 	switch (value) {
 	case 0:
 		if (eeepc->cpufv_disabled)
-			pr_warning("cpufv enabled (not officially supported "
+			pr_warn("cpufv enabled (not officially supported "
 				"on this model)\n");
 		eeepc->cpufv_disabled = false;
 		return rv;
@@ -609,7 +609,7 @@
 		bus = port->subordinate;
 
 		if (!bus) {
-			pr_warning("Unable to find PCI bus?\n");
+			pr_warn("Unable to find PCI bus 1?\n");
 			goto out_unlock;
 		}
 
@@ -621,12 +621,12 @@
 		absent = (l == 0xffffffff);
 
 		if (blocked != absent) {
-			pr_warning("BIOS says wireless lan is %s, "
-					"but the pci device is %s\n",
+			pr_warn("BIOS says wireless lan is %s, "
+				"but the pci device is %s\n",
 				blocked ? "blocked" : "unblocked",
 				absent ? "absent" : "present");
-			pr_warning("skipped wireless hotplug as probably "
-					"inappropriate for this model\n");
+			pr_warn("skipped wireless hotplug as probably "
+				"inappropriate for this model\n");
 			goto out_unlock;
 		}
 
@@ -691,7 +691,8 @@
 						     eeepc_rfkill_notify,
 						     eeepc);
 		if (ACPI_FAILURE(status))
-			pr_warning("Failed to register notify on %s\n", node);
+			pr_warn("Failed to register notify on %s\n", node);
+
 		/*
 		 * Refresh pci hotplug in case the rfkill state was
 		 * changed during setup.

diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c
index 649dcad..4aa867a 100644
--- a/drivers/platform/x86/eeepc-wmi.c
+++ b/drivers/platform/x86/eeepc-wmi.c

@@ -84,7 +84,7 @@
 static acpi_status eeepc_wmi_parse_device(acpi_handle handle, u32 level,
 						 void *context, void **retval)
 {
-	pr_warning("Found legacy ATKD device (%s)", EEEPC_ACPI_HID);
+	pr_warn("Found legacy ATKD device (%s)\n", EEEPC_ACPI_HID);
 	*(bool *)context = true;
 	return AE_CTRL_TERMINATE;
 }
@@ -105,12 +105,12 @@
 static int eeepc_wmi_probe(struct platform_device *pdev)
 {
 	if (eeepc_wmi_check_atkd()) {
-		pr_warning("WMI device present, but legacy ATKD device is also "
-			   "present and enabled.");
-		pr_warning("You probably booted with acpi_osi=\"Linux\" or "
-			   "acpi_osi=\"!Windows 2009\"");
-		pr_warning("Can't load eeepc-wmi, use default acpi_osi "
-			   "(preferred) or eeepc-laptop");
+		pr_warn("WMI device present, but legacy ATKD device is also "
+			"present and enabled\n");
+		pr_warn("You probably booted with acpi_osi=\"Linux\" or "
+			"acpi_osi=\"!Windows 2009\"\n");
+		pr_warn("Can't load eeepc-wmi, use default acpi_osi "
+			"(preferred) or eeepc-laptop\n");
 		return -EBUSY;
 	}
 	return 0;

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 493054c..6b26666 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c

@@ -56,6 +56,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -585,8 +587,7 @@
 static void dmi_check_cb_common(const struct dmi_system_id *id)
 {
 	acpi_handle handle;
-	printk(KERN_INFO "fujitsu-laptop: Identified laptop model '%s'.\n",
-	       id->ident);
+	pr_info("Identified laptop model '%s'\n", id->ident);
 	if (use_alt_lcd_levels == -1) {
 		if (ACPI_SUCCESS(acpi_get_handle(NULL,
 				"\\_SB.PCI0.LPCB.FJEX.SBL2", &handle)))
@@ -691,11 +692,11 @@
 
 	result = acpi_bus_update_power(fujitsu->acpi_handle, &state);
 	if (result) {
-		printk(KERN_ERR "Error reading power state\n");
+		pr_err("Error reading power state\n");
 		goto err_unregister_input_dev;
 	}
 
-	printk(KERN_INFO "ACPI: %s [%s] (%s)\n",
+	pr_info("ACPI: %s [%s] (%s)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
 	       !device->power.state ? "on" : "off");
 
@@ -707,7 +708,7 @@
 		if (ACPI_FAILURE
 		    (acpi_evaluate_object
 		     (device->handle, METHOD_NAME__INI, NULL, NULL)))
-			printk(KERN_ERR "_INI Method failed\n");
+			pr_err("_INI Method failed\n");
 	}
 
 	/* do config (detect defaults) */
@@ -827,7 +828,7 @@
 	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
 			GFP_KERNEL);
 	if (error) {
-		printk(KERN_ERR "kfifo_alloc failed\n");
+		pr_err("kfifo_alloc failed\n");
 		goto err_stop;
 	}
 
@@ -859,13 +860,13 @@
 
 	result = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state);
 	if (result) {
-		printk(KERN_ERR "Error reading power state\n");
+		pr_err("Error reading power state\n");
 		goto err_unregister_input_dev;
 	}
 
-	printk(KERN_INFO "ACPI: %s [%s] (%s)\n",
-	       acpi_device_name(device), acpi_device_bid(device),
-	       !device->power.state ? "on" : "off");
+	pr_info("ACPI: %s [%s] (%s)\n",
+		acpi_device_name(device), acpi_device_bid(device),
+		!device->power.state ? "on" : "off");
 
 	fujitsu_hotkey->dev = device;
 
@@ -875,7 +876,7 @@
 		if (ACPI_FAILURE
 		    (acpi_evaluate_object
 		     (device->handle, METHOD_NAME__INI, NULL, NULL)))
-			printk(KERN_ERR "_INI Method failed\n");
+			pr_err("_INI Method failed\n");
 	}
 
 	i = 0;
@@ -897,8 +898,7 @@
 			call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
 
 	/* Suspect this is a keymap of the application panel, print it */
-	printk(KERN_INFO "fujitsu-laptop: BTNI: [0x%x]\n",
-		call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
+	pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
 
 #if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
 	if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
@@ -907,8 +907,8 @@
 		if (result == 0) {
 			fujitsu_hotkey->logolamp_registered = 1;
 		} else {
-			printk(KERN_ERR "fujitsu-laptop: Could not register "
-			"LED handler for logo lamp, error %i\n", result);
+			pr_err("Could not register LED handler for logo lamp, error %i\n",
+			       result);
 		}
 	}
 
@@ -919,8 +919,8 @@
 		if (result == 0) {
 			fujitsu_hotkey->kblamps_registered = 1;
 		} else {
-			printk(KERN_ERR "fujitsu-laptop: Could not register "
-			"LED handler for keyboard lamps, error %i\n", result);
+			pr_err("Could not register LED handler for keyboard lamps, error %i\n",
+			       result);
 		}
 	}
 #endif
@@ -1169,8 +1169,7 @@
 			fujitsu->bl_device->props.power = 0;
 	}
 
-	printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION
-	       " successfully loaded.\n");
+	pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n");
 
 	return 0;
 
@@ -1216,7 +1215,7 @@
 
 	kfree(fujitsu);
 
-	printk(KERN_INFO "fujitsu-laptop: driver unloaded.\n");
+	pr_info("driver unloaded\n");
 }
 
 module_init(fujitsu_init);

diff --git a/drivers/platform/x86/hdaps.c b/drivers/platform/x86/hdaps.c
index 067bf36..5a34973 100644
--- a/drivers/platform/x86/hdaps.c
+++ b/drivers/platform/x86/hdaps.c

@@ -26,6 +26,8 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/input-polldev.h>
@@ -238,7 +240,7 @@
 		     __check_latch(0x1611, 0x01))
 		goto out;
 
-	printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x).\n",
+	printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x)\n",
 	       __get_latch(0x1611));
 
 	outb(0x17, 0x1610);
@@ -299,7 +301,7 @@
 	if (ret)
 		return ret;
 
-	printk(KERN_INFO "hdaps: device successfully initialized.\n");
+	pr_info("device successfully initialized\n");
 	return 0;
 }
 
@@ -480,7 +482,7 @@
 /* hdaps_dmi_match - found a match.  return one, short-circuiting the hunt. */
 static int __init hdaps_dmi_match(const struct dmi_system_id *id)
 {
-	printk(KERN_INFO "hdaps: %s detected.\n", id->ident);
+	pr_info("%s detected\n", id->ident);
 	return 1;
 }
 
@@ -488,8 +490,7 @@
 static int __init hdaps_dmi_match_invert(const struct dmi_system_id *id)
 {
 	hdaps_invert = (unsigned long)id->driver_data;
-	printk(KERN_INFO "hdaps: inverting axis (%u) readings.\n",
-	       hdaps_invert);
+	pr_info("inverting axis (%u) readings\n", hdaps_invert);
 	return hdaps_dmi_match(id);
 }
 
@@ -543,7 +544,7 @@
 	int ret;
 
 	if (!dmi_check_system(hdaps_whitelist)) {
-		printk(KERN_WARNING "hdaps: supported laptop not found!\n");
+		pr_warn("supported laptop not found!\n");
 		ret = -ENODEV;
 		goto out;
 	}
@@ -595,7 +596,7 @@
 	if (ret)
 		goto out_idev;
 
-	printk(KERN_INFO "hdaps: driver successfully loaded.\n");
+	pr_info("driver successfully loaded\n");
 	return 0;
 
 out_idev:
@@ -609,7 +610,7 @@
 out_region:
 	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
 out:
-	printk(KERN_WARNING "hdaps: driver init failed (ret=%d)!\n", ret);
+	pr_warn("driver init failed (ret=%d)!\n", ret);
 	return ret;
 }
 
@@ -622,7 +623,7 @@
 	platform_driver_unregister(&hdaps_driver);
 	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
 
-	printk(KERN_INFO "hdaps: driver unloaded.\n");
+	pr_info("driver unloaded\n");
 }
 
 module_init(hdaps_init);

diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 1bc4a75..f94017b 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c

@@ -24,6 +24,8 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -54,9 +56,6 @@
 #define HPWMI_HOTKEY_QUERY 0xc
 #define HPWMI_WIRELESS2_QUERY 0x1b
 
-#define PREFIX "HP WMI: "
-#define UNIMP "Unimplemented "
-
 enum hp_wmi_radio {
 	HPWMI_WIFI = 0,
 	HPWMI_BLUETOOTH = 1,
@@ -228,9 +227,8 @@
 
 	if (bios_return->return_code) {
 		if (bios_return->return_code != HPWMI_RET_UNKNOWN_CMDTYPE)
-			printk(KERN_WARNING PREFIX "query 0x%x returned "
-						   "error 0x%x\n",
-			       query, bios_return->return_code);
+			pr_warn("query 0x%x returned error 0x%x\n",
+				query, bios_return->return_code);
 		kfree(obj);
 		return bios_return->return_code;
 	}
@@ -384,8 +382,7 @@
 
 		if (num >= state.count ||
 		    devstate->rfkill_id != rfkill2[i].id) {
-			printk(KERN_WARNING PREFIX "power configuration of "
-			       "the wireless devices unexpectedly changed\n");
+			pr_warn("power configuration of the wireless devices unexpectedly changed\n");
 			continue;
 		}
 
@@ -471,7 +468,7 @@
 
 	status = wmi_get_event_data(value, &response);
 	if (status != AE_OK) {
-		printk(KERN_INFO PREFIX "bad event status 0x%x\n", status);
+		pr_info("bad event status 0x%x\n", status);
 		return;
 	}
 
@@ -480,8 +477,7 @@
 	if (!obj)
 		return;
 	if (obj->type != ACPI_TYPE_BUFFER) {
-		printk(KERN_INFO "hp-wmi: Unknown response received %d\n",
-		       obj->type);
+		pr_info("Unknown response received %d\n", obj->type);
 		kfree(obj);
 		return;
 	}
@@ -498,8 +494,7 @@
 		event_id = *location;
 		event_data = *(location + 2);
 	} else {
-		printk(KERN_INFO "hp-wmi: Unknown buffer length %d\n",
-		       obj->buffer.length);
+		pr_info("Unknown buffer length %d\n", obj->buffer.length);
 		kfree(obj);
 		return;
 	}
@@ -527,8 +522,7 @@
 
 		if (!sparse_keymap_report_event(hp_wmi_input_dev,
 						key_code, 1, true))
-			printk(KERN_INFO PREFIX "Unknown key code - 0x%x\n",
-			       key_code);
+			pr_info("Unknown key code - 0x%x\n", key_code);
 		break;
 	case HPWMI_WIRELESS:
 		if (rfkill2_count) {
@@ -550,14 +544,12 @@
 					  hp_wmi_get_hw_state(HPWMI_WWAN));
 		break;
 	case HPWMI_CPU_BATTERY_THROTTLE:
-		printk(KERN_INFO PREFIX UNIMP "CPU throttle because of 3 Cell"
-		       " battery event detected\n");
+		pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n");
 		break;
 	case HPWMI_LOCK_SWITCH:
 		break;
 	default:
-		printk(KERN_INFO PREFIX "Unknown event_id - %d - 0x%x\n",
-		       event_id, event_data);
+		pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data);
 		break;
 	}
 }
@@ -705,7 +697,7 @@
 		return err;
 
 	if (state.count > HPWMI_MAX_RFKILL2_DEVICES) {
-		printk(KERN_WARNING PREFIX "unable to parse 0x1b query output\n");
+		pr_warn("unable to parse 0x1b query output\n");
 		return -EINVAL;
 	}
 
@@ -727,14 +719,14 @@
 			name = "hp-wwan";
 			break;
 		default:
-			printk(KERN_WARNING PREFIX "unknown device type 0x%x\n",
-				 state.device[i].radio_type);
+			pr_warn("unknown device type 0x%x\n",
+				state.device[i].radio_type);
 			continue;
 		}
 
 		if (!state.device[i].vendor_id) {
-			printk(KERN_WARNING PREFIX "zero device %d while %d "
-			       "reported\n", i, state.count);
+			pr_warn("zero device %d while %d reported\n",
+				i, state.count);
 			continue;
 		}
 
@@ -755,8 +747,7 @@
 				    IS_HWBLOCKED(state.device[i].power));
 
 		if (!(state.device[i].power & HPWMI_POWER_BIOS))
-			printk(KERN_INFO PREFIX "device %s blocked by BIOS\n",
-			       name);
+			pr_info("device %s blocked by BIOS\n", name);
 
 		err = rfkill_register(rfkill);
 		if (err) {

diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c
index b1396e5..811d436 100644
--- a/drivers/platform/x86/ibm_rtl.c
+++ b/drivers/platform/x86/ibm_rtl.c

@@ -22,6 +22,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/module.h>
@@ -69,9 +71,10 @@
 #define RTL_SIGNATURE 0x0000005f4c54525fULL
 #define RTL_MASK      0x000000ffffffffffULL
 
-#define RTL_DEBUG(A, ...) do { \
-	if (debug) \
-		pr_info("ibm-rtl: " A, ##__VA_ARGS__ ); \
+#define RTL_DEBUG(fmt, ...)				\
+do {							\
+	if (debug)					\
+		pr_info(fmt, ##__VA_ARGS__);		\
 } while (0)
 
 static DEFINE_MUTEX(rtl_lock);
@@ -114,7 +117,7 @@
 	int ret = 0, count = 0;
 	static u32 cmd_port_val;
 
-	RTL_DEBUG("%s(%d)\n", __FUNCTION__, value);
+	RTL_DEBUG("%s(%d)\n", __func__, value);
 
 	value = value == 1 ? RTL_CMD_ENTER_PRTM : RTL_CMD_EXIT_PRTM;
 
@@ -144,8 +147,8 @@
 		while (ioread8(&rtl_table->command)) {
 			msleep(10);
 			if (count++ > 500) {
-				pr_err("ibm-rtl: Hardware not responding to "
-					"mode switch request\n");
+				pr_err("Hardware not responding to "
+				       "mode switch request\n");
 				ret = -EIO;
 				break;
 			}
@@ -250,7 +253,7 @@
 	int ret = -ENODEV, i;
 
 	if (force)
-		pr_warning("ibm-rtl: module loaded by force\n");
+		pr_warn("module loaded by force\n");
 	/* first ensure that we are running on IBM HW */
 	else if (efi_enabled || !dmi_check_system(ibm_rtl_dmi_table))
 		return -ENODEV;
@@ -288,19 +291,19 @@
 		if ((readq(&tmp->signature) & RTL_MASK) == RTL_SIGNATURE) {
 			phys_addr_t addr;
 			unsigned int plen;
-			RTL_DEBUG("found RTL_SIGNATURE at %#llx\n", (u64)tmp);
+			RTL_DEBUG("found RTL_SIGNATURE at %p\n", tmp);
 			rtl_table = tmp;
 			/* The address, value, width and offset are platform
 			 * dependent and found in the ibm_rtl_table */
 			rtl_cmd_width = ioread8(&rtl_table->cmd_granularity);
 			rtl_cmd_type = ioread8(&rtl_table->cmd_address_type);
 			RTL_DEBUG("rtl_cmd_width = %u, rtl_cmd_type = %u\n",
-			      rtl_cmd_width, rtl_cmd_type);
+				  rtl_cmd_width, rtl_cmd_type);
 			addr = ioread32(&rtl_table->cmd_port_address);
 			RTL_DEBUG("addr = %#llx\n", (unsigned long long)addr);
 			plen = rtl_cmd_width/sizeof(char);
 			rtl_cmd_addr = rtl_port_map(addr, plen);
-			RTL_DEBUG("rtl_cmd_addr = %#llx\n", (u64)rtl_cmd_addr);
+			RTL_DEBUG("rtl_cmd_addr = %p\n", rtl_cmd_addr);
 			if (!rtl_cmd_addr) {
 				ret = -ENOMEM;
 				break;

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 21b1018..bfdda33 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c

@@ -20,6 +20,8 @@
  *  02110-1301, USA.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>

diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c
index eacd5da..809adea 100644
--- a/drivers/platform/x86/intel_menlow.c
+++ b/drivers/platform/x86/intel_menlow.c

@@ -27,6 +27,8 @@
  *  to get/set bandwidth.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -135,8 +137,7 @@
 	    acpi_evaluate_integer(handle, MEMORY_SET_BANDWIDTH, &arg_list,
 				  &temp);
 
-	printk(KERN_INFO
-	       "Bandwidth value was %ld: status is %d\n", state, status);
+	pr_info("Bandwidth value was %ld: status is %d\n", state, status);
 	if (ACPI_FAILURE(status))
 		return -EFAULT;
 

diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c
index 213e79b..f1ae507 100644
--- a/drivers/platform/x86/intel_mid_powerbtn.c
+++ b/drivers/platform/x86/intel_mid_powerbtn.c

@@ -23,58 +23,48 @@
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/input.h>
+
 #include <asm/intel_scu_ipc.h>
 
 #define DRIVER_NAME "msic_power_btn"
 
-#define MSIC_IRQ_STAT	0x02
-  #define MSIC_IRQ_PB	(1 << 0)
-#define MSIC_PB_CONFIG	0x3e
 #define MSIC_PB_STATUS	0x3f
-  #define MSIC_PB_LEVEL (1 << 3) /* 1 - release, 0 - press */
-
-struct mfld_pb_priv {
-	struct input_dev *input;
-	unsigned int irq;
-};
+#define MSIC_PB_LEVEL	(1 << 3) /* 1 - release, 0 - press */
 
 static irqreturn_t mfld_pb_isr(int irq, void *dev_id)
 {
-	struct mfld_pb_priv *priv = dev_id;
+	struct input_dev *input = dev_id;
 	int ret;
 	u8 pbstat;
 
 	ret = intel_scu_ipc_ioread8(MSIC_PB_STATUS, &pbstat);
-	if (ret < 0)
-		return IRQ_HANDLED;
-
-	input_event(priv->input, EV_KEY, KEY_POWER, !(pbstat & MSIC_PB_LEVEL));
-	input_sync(priv->input);
+	if (ret < 0) {
+		dev_err(input->dev.parent, "Read error %d while reading"
+			       " MSIC_PB_STATUS\n", ret);
+	} else {
+		input_event(input, EV_KEY, KEY_POWER,
+			       !(pbstat & MSIC_PB_LEVEL));
+		input_sync(input);
+	}
 
 	return IRQ_HANDLED;
 }
 
 static int __devinit mfld_pb_probe(struct platform_device *pdev)
 {
-	struct mfld_pb_priv *priv;
 	struct input_dev *input;
-	int irq;
+	int irq = platform_get_irq(pdev, 0);
 	int error;
 
-	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return -EINVAL;
 
-	priv = kzalloc(sizeof(struct mfld_pb_priv), GFP_KERNEL);
 	input = input_allocate_device();
-	if (!priv || !input) {
-		error = -ENOMEM;
-		goto err_free_mem;
+	if (!input) {
+		dev_err(&pdev->dev, "Input device allocation error\n");
+		return -ENOMEM;
 	}
 
-	priv->input = input;
-	priv->irq = irq;
-
 	input->name = pdev->name;
 	input->phys = "power-button/input0";
 	input->id.bustype = BUS_HOST;
@@ -82,42 +72,40 @@
 
 	input_set_capability(input, EV_KEY, KEY_POWER);
 
-	error = request_threaded_irq(priv->irq, NULL, mfld_pb_isr,
-				     0, DRIVER_NAME, priv);
+	error = request_threaded_irq(irq, NULL, mfld_pb_isr, 0,
+			DRIVER_NAME, input);
 	if (error) {
-		dev_err(&pdev->dev,
-			"unable to request irq %d for mfld power button\n",
-			irq);
-		goto err_free_mem;
+		dev_err(&pdev->dev, "Unable to request irq %d for mfld power"
+				"button\n", irq);
+		goto err_free_input;
 	}
 
 	error = input_register_device(input);
 	if (error) {
-		dev_err(&pdev->dev,
-			"unable to register input dev, error %d\n", error);
+		dev_err(&pdev->dev, "Unable to register input dev, error "
+				"%d\n", error);
 		goto err_free_irq;
 	}
 
-	platform_set_drvdata(pdev, priv);
+	platform_set_drvdata(pdev, input);
 	return 0;
 
 err_free_irq:
-	free_irq(priv->irq, priv);
-err_free_mem:
+	free_irq(irq, input);
+err_free_input:
 	input_free_device(input);
-	kfree(priv);
 	return error;
 }
 
 static int __devexit mfld_pb_remove(struct platform_device *pdev)
 {
-	struct mfld_pb_priv *priv = platform_get_drvdata(pdev);
+	struct input_dev *input = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
 
-	free_irq(priv->irq, priv);
-	input_unregister_device(priv->input);
-	kfree(priv);
-
+	free_irq(irq, input);
+	input_unregister_device(input);
 	platform_set_drvdata(pdev, NULL);
+
 	return 0;
 }
 

diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c
index c2f4bd8..3a57832 100644
--- a/drivers/platform/x86/intel_mid_thermal.c
+++ b/drivers/platform/x86/intel_mid_thermal.c

@@ -37,49 +37,50 @@
 #include <asm/intel_scu_ipc.h>
 
 /* Number of thermal sensors */
-#define MSIC_THERMAL_SENSORS   4
+#define MSIC_THERMAL_SENSORS	4
 
 /* ADC1 - thermal registers */
-#define MSIC_THERM_ADC1CNTL1   0x1C0
-#define MSIC_ADC_ENBL          0x10
-#define MSIC_ADC_START         0x08
+#define MSIC_THERM_ADC1CNTL1	0x1C0
+#define MSIC_ADC_ENBL		0x10
+#define MSIC_ADC_START		0x08
 
-#define MSIC_THERM_ADC1CNTL3   0x1C2
-#define MSIC_ADCTHERM_ENBL     0x04
-#define MSIC_ADCRRDATA_ENBL    0x05
-#define MSIC_CHANL_MASK_VAL    0x0F
+#define MSIC_THERM_ADC1CNTL3	0x1C2
+#define MSIC_ADCTHERM_ENBL	0x04
+#define MSIC_ADCRRDATA_ENBL	0x05
+#define MSIC_CHANL_MASK_VAL	0x0F
 
-#define MSIC_STOPBIT_MASK      16
-#define MSIC_ADCTHERM_MASK     4
-#define ADC_CHANLS_MAX         15 /* Number of ADC channels */
-#define ADC_LOOP_MAX           (ADC_CHANLS_MAX - MSIC_THERMAL_SENSORS)
+#define MSIC_STOPBIT_MASK	16
+#define MSIC_ADCTHERM_MASK	4
+/* Number of ADC channels */
+#define ADC_CHANLS_MAX		15
+#define ADC_LOOP_MAX		(ADC_CHANLS_MAX - MSIC_THERMAL_SENSORS)
 
 /* ADC channel code values */
-#define SKIN_SENSOR0_CODE      0x08
-#define SKIN_SENSOR1_CODE      0x09
-#define SYS_SENSOR_CODE                0x0A
-#define MSIC_DIE_SENSOR_CODE   0x03
+#define SKIN_SENSOR0_CODE	0x08
+#define SKIN_SENSOR1_CODE	0x09
+#define SYS_SENSOR_CODE		0x0A
+#define MSIC_DIE_SENSOR_CODE	0x03
 
-#define SKIN_THERM_SENSOR0     0
-#define SKIN_THERM_SENSOR1     1
-#define SYS_THERM_SENSOR2      2
-#define MSIC_DIE_THERM_SENSOR3 3
+#define SKIN_THERM_SENSOR0	0
+#define SKIN_THERM_SENSOR1	1
+#define SYS_THERM_SENSOR2	2
+#define MSIC_DIE_THERM_SENSOR3	3
 
 /* ADC code range */
-#define ADC_MAX                        977
-#define ADC_MIN                        162
-#define ADC_VAL0C              887
-#define ADC_VAL20C             720
-#define ADC_VAL40C             508
-#define ADC_VAL60C             315
+#define ADC_MAX			977
+#define ADC_MIN			162
+#define ADC_VAL0C		887
+#define ADC_VAL20C		720
+#define ADC_VAL40C		508
+#define ADC_VAL60C		315
 
 /* ADC base addresses */
-#define ADC_CHNL_START_ADDR    0x1C5   /* increments by 1 */
-#define ADC_DATA_START_ADDR     0x1D4   /* increments by 2 */
+#define ADC_CHNL_START_ADDR	0x1C5	/* increments by 1 */
+#define ADC_DATA_START_ADDR	0x1D4	/* increments by 2 */
 
 /* MSIC die attributes */
-#define MSIC_DIE_ADC_MIN       488
-#define MSIC_DIE_ADC_MAX       1004
+#define MSIC_DIE_ADC_MIN	488
+#define MSIC_DIE_ADC_MAX	1004
 
 /* This holds the address of the first free ADC channel,
  * among the 15 channels
@@ -87,15 +88,15 @@
 static int channel_index;
 
 struct platform_info {
-       struct platform_device *pdev;
-       struct thermal_zone_device *tzd[MSIC_THERMAL_SENSORS];
+	struct platform_device *pdev;
+	struct thermal_zone_device *tzd[MSIC_THERMAL_SENSORS];
 };
 
 struct thermal_device_info {
-       unsigned int chnl_addr;
-       int direct;
-       /* This holds the current temperature in millidegree celsius */
-       long curr_temp;
+	unsigned int chnl_addr;
+	int direct;
+	/* This holds the current temperature in millidegree celsius */
+	long curr_temp;
 };
 
 /**
@@ -106,7 +107,7 @@
  */
 static int to_msic_die_temp(uint16_t adc_val)
 {
-       return (368 * (adc_val) / 1000) - 220;
+	return (368 * (adc_val) / 1000) - 220;
 }
 
 /**
@@ -118,7 +119,7 @@
  */
 static int is_valid_adc(uint16_t adc_val, uint16_t min, uint16_t max)
 {
-       return (adc_val >= min) && (adc_val <= max);
+	return (adc_val >= min) && (adc_val <= max);
 }
 
 /**
@@ -136,35 +137,35 @@
  */
 static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp)
 {
-       int temp;
+	int temp;
 
-       /* Direct conversion for die temperature */
-       if (direct) {
-               if (is_valid_adc(adc_val, MSIC_DIE_ADC_MIN, MSIC_DIE_ADC_MAX)) {
-                       *tp = to_msic_die_temp(adc_val) * 1000;
-                       return 0;
-               }
-               return -ERANGE;
-       }
+	/* Direct conversion for die temperature */
+	if (direct) {
+		if (is_valid_adc(adc_val, MSIC_DIE_ADC_MIN, MSIC_DIE_ADC_MAX)) {
+			*tp = to_msic_die_temp(adc_val) * 1000;
+			return 0;
+		}
+		return -ERANGE;
+	}
 
-       if (!is_valid_adc(adc_val, ADC_MIN, ADC_MAX))
-               return -ERANGE;
+	if (!is_valid_adc(adc_val, ADC_MIN, ADC_MAX))
+		return -ERANGE;
 
-       /* Linear approximation for skin temperature */
-       if (adc_val > ADC_VAL0C)
-               temp = 177 - (adc_val/5);
-       else if ((adc_val <= ADC_VAL0C) && (adc_val > ADC_VAL20C))
-               temp = 111 - (adc_val/8);
-       else if ((adc_val <= ADC_VAL20C) && (adc_val > ADC_VAL40C))
-               temp = 92 - (adc_val/10);
-       else if ((adc_val <= ADC_VAL40C) && (adc_val > ADC_VAL60C))
-               temp = 91 - (adc_val/10);
-       else
-               temp = 112 - (adc_val/6);
+	/* Linear approximation for skin temperature */
+	if (adc_val > ADC_VAL0C)
+		temp = 177 - (adc_val/5);
+	else if ((adc_val <= ADC_VAL0C) && (adc_val > ADC_VAL20C))
+		temp = 111 - (adc_val/8);
+	else if ((adc_val <= ADC_VAL20C) && (adc_val > ADC_VAL40C))
+		temp = 92 - (adc_val/10);
+	else if ((adc_val <= ADC_VAL40C) && (adc_val > ADC_VAL60C))
+		temp = 91 - (adc_val/10);
+	else
+		temp = 112 - (adc_val/6);
 
-       /* Convert temperature in celsius to milli degree celsius */
-       *tp = temp * 1000;
-       return 0;
+	/* Convert temperature in celsius to milli degree celsius */
+	*tp = temp * 1000;
+	return 0;
 }
 
 /**
@@ -178,47 +179,47 @@
  */
 static int mid_read_temp(struct thermal_zone_device *tzd, unsigned long *temp)
 {
-       struct thermal_device_info *td_info = tzd->devdata;
-       uint16_t adc_val, addr;
-       uint8_t data = 0;
-       int ret;
-       unsigned long curr_temp;
+	struct thermal_device_info *td_info = tzd->devdata;
+	uint16_t adc_val, addr;
+	uint8_t data = 0;
+	int ret;
+	unsigned long curr_temp;
 
 
-       addr = td_info->chnl_addr;
+	addr = td_info->chnl_addr;
 
-       /* Enable the msic for conversion before reading */
-       ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCRRDATA_ENBL);
-       if (ret)
-               return ret;
+	/* Enable the msic for conversion before reading */
+	ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCRRDATA_ENBL);
+	if (ret)
+		return ret;
 
-       /* Re-toggle the RRDATARD bit (temporary workaround) */
-       ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCTHERM_ENBL);
-       if (ret)
-               return ret;
+	/* Re-toggle the RRDATARD bit (temporary workaround) */
+	ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCTHERM_ENBL);
+	if (ret)
+		return ret;
 
-       /* Read the higher bits of data */
-       ret = intel_scu_ipc_ioread8(addr, &data);
-       if (ret)
-               return ret;
+	/* Read the higher bits of data */
+	ret = intel_scu_ipc_ioread8(addr, &data);
+	if (ret)
+		return ret;
 
-       /* Shift bits to accommodate the lower two data bits */
-       adc_val = (data << 2);
-       addr++;
+	/* Shift bits to accommodate the lower two data bits */
+	adc_val = (data << 2);
+	addr++;
 
-       ret = intel_scu_ipc_ioread8(addr, &data);/* Read lower bits */
-       if (ret)
-               return ret;
+	ret = intel_scu_ipc_ioread8(addr, &data);/* Read lower bits */
+	if (ret)
+		return ret;
 
-       /* Adding lower two bits to the higher bits */
-       data &= 03;
-       adc_val += data;
+	/* Adding lower two bits to the higher bits */
+	data &= 03;
+	adc_val += data;
 
-       /* Convert ADC value to temperature */
-       ret = adc_to_temp(td_info->direct, adc_val, &curr_temp);
-       if (ret == 0)
-               *temp = td_info->curr_temp = curr_temp;
-       return ret;
+	/* Convert ADC value to temperature */
+	ret = adc_to_temp(td_info->direct, adc_val, &curr_temp);
+	if (ret == 0)
+		*temp = td_info->curr_temp = curr_temp;
+	return ret;
 }
 
 /**
@@ -231,22 +232,21 @@
  */
 static int configure_adc(int val)
 {
-       int ret;
-       uint8_t data;
+	int ret;
+	uint8_t data;
 
-       ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data);
-       if (ret)
-               return ret;
+	ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data);
+	if (ret)
+		return ret;
 
-       if (val) {
-               /* Enable and start the ADC */
-               data |= (MSIC_ADC_ENBL | MSIC_ADC_START);
-       } else {
-               /* Just stop the ADC */
-               data &= (~MSIC_ADC_START);
-       }
-
-       return intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL1, data);
+	if (val) {
+		/* Enable and start the ADC */
+		data |= (MSIC_ADC_ENBL | MSIC_ADC_START);
+	} else {
+		/* Just stop the ADC */
+		data &= (~MSIC_ADC_START);
+	}
+	return intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL1, data);
 }
 
 /**
@@ -259,30 +259,30 @@
  */
 static int set_up_therm_channel(u16 base_addr)
 {
-       int ret;
+	int ret;
 
-       /* Enable all the sensor channels */
-       ret = intel_scu_ipc_iowrite8(base_addr, SKIN_SENSOR0_CODE);
-       if (ret)
-               return ret;
+	/* Enable all the sensor channels */
+	ret = intel_scu_ipc_iowrite8(base_addr, SKIN_SENSOR0_CODE);
+	if (ret)
+		return ret;
 
-       ret = intel_scu_ipc_iowrite8(base_addr + 1, SKIN_SENSOR1_CODE);
-       if (ret)
-               return ret;
+	ret = intel_scu_ipc_iowrite8(base_addr + 1, SKIN_SENSOR1_CODE);
+	if (ret)
+		return ret;
 
-       ret = intel_scu_ipc_iowrite8(base_addr + 2, SYS_SENSOR_CODE);
-       if (ret)
-               return ret;
+	ret = intel_scu_ipc_iowrite8(base_addr + 2, SYS_SENSOR_CODE);
+	if (ret)
+		return ret;
 
-       /* Since this is the last channel, set the stop bit
-          to 1 by ORing the DIE_SENSOR_CODE with 0x10 */
-       ret = intel_scu_ipc_iowrite8(base_addr + 3,
-                                       (MSIC_DIE_SENSOR_CODE | 0x10));
-       if (ret)
-               return ret;
+	/* Since this is the last channel, set the stop bit
+	 * to 1 by ORing the DIE_SENSOR_CODE with 0x10 */
+	ret = intel_scu_ipc_iowrite8(base_addr + 3,
+			(MSIC_DIE_SENSOR_CODE | 0x10));
+	if (ret)
+		return ret;
 
-       /* Enable ADC and start it */
-       return configure_adc(1);
+	/* Enable ADC and start it */
+	return configure_adc(1);
 }
 
 /**
@@ -293,13 +293,13 @@
  */
 static int reset_stopbit(uint16_t addr)
 {
-       int ret;
-       uint8_t data;
-       ret = intel_scu_ipc_ioread8(addr, &data);
-       if (ret)
-               return ret;
-       /* Set the stop bit to zero */
-       return intel_scu_ipc_iowrite8(addr, (data & 0xEF));
+	int ret;
+	uint8_t data;
+	ret = intel_scu_ipc_ioread8(addr, &data);
+	if (ret)
+		return ret;
+	/* Set the stop bit to zero */
+	return intel_scu_ipc_iowrite8(addr, (data & 0xEF));
 }
 
 /**
@@ -317,30 +317,30 @@
  */
 static int find_free_channel(void)
 {
-       int ret;
-       int i;
-       uint8_t data;
+	int ret;
+	int i;
+	uint8_t data;
 
-       /* check whether ADC is enabled */
-       ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data);
-       if (ret)
-               return ret;
+	/* check whether ADC is enabled */
+	ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data);
+	if (ret)
+		return ret;
 
-       if ((data & MSIC_ADC_ENBL) == 0)
-               return 0;
+	if ((data & MSIC_ADC_ENBL) == 0)
+		return 0;
 
-       /* ADC is already enabled; Looking for an empty channel */
-       for (i = 0; i < ADC_CHANLS_MAX; i++) {
-               ret = intel_scu_ipc_ioread8(ADC_CHNL_START_ADDR + i, &data);
-               if (ret)
-                       return ret;
+	/* ADC is already enabled; Looking for an empty channel */
+	for (i = 0; i < ADC_CHANLS_MAX; i++) {
+		ret = intel_scu_ipc_ioread8(ADC_CHNL_START_ADDR + i, &data);
+		if (ret)
+			return ret;
 
-               if (data & MSIC_STOPBIT_MASK) {
-                       ret = i;
-                       break;
-               }
-       }
-       return (ret > ADC_LOOP_MAX) ? (-EINVAL) : ret;
+		if (data & MSIC_STOPBIT_MASK) {
+			ret = i;
+			break;
+		}
+	}
+	return (ret > ADC_LOOP_MAX) ? (-EINVAL) : ret;
 }
 
 /**
@@ -351,48 +351,48 @@
  */
 static int mid_initialize_adc(struct device *dev)
 {
-       u8  data;
-       u16 base_addr;
-       int ret;
+	u8  data;
+	u16 base_addr;
+	int ret;
 
-       /*
-        * Ensure that adctherm is disabled before we
-        * initialize the ADC
-        */
-       ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL3, &data);
-       if (ret)
-               return ret;
+	/*
+	 * Ensure that adctherm is disabled before we
+	 * initialize the ADC
+	 */
+	ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL3, &data);
+	if (ret)
+		return ret;
 
-       if (data & MSIC_ADCTHERM_MASK)
-               dev_warn(dev, "ADCTHERM already set");
+	if (data & MSIC_ADCTHERM_MASK)
+		dev_warn(dev, "ADCTHERM already set");
 
-       /* Index of the first channel in which the stop bit is set */
-       channel_index = find_free_channel();
-       if (channel_index < 0) {
-               dev_err(dev, "No free ADC channels");
-               return channel_index;
-       }
+	/* Index of the first channel in which the stop bit is set */
+	channel_index = find_free_channel();
+	if (channel_index < 0) {
+		dev_err(dev, "No free ADC channels");
+		return channel_index;
+	}
 
-       base_addr = ADC_CHNL_START_ADDR + channel_index;
+	base_addr = ADC_CHNL_START_ADDR + channel_index;
 
-       if (!(channel_index == 0 || channel_index == ADC_LOOP_MAX)) {
-               /* Reset stop bit for channels other than 0 and 12 */
-               ret = reset_stopbit(base_addr);
-               if (ret)
-                       return ret;
+	if (!(channel_index == 0 || channel_index == ADC_LOOP_MAX)) {
+		/* Reset stop bit for channels other than 0 and 12 */
+		ret = reset_stopbit(base_addr);
+		if (ret)
+			return ret;
 
-               /* Index of the first free channel */
-               base_addr++;
-               channel_index++;
-       }
+		/* Index of the first free channel */
+		base_addr++;
+		channel_index++;
+	}
 
-       ret = set_up_therm_channel(base_addr);
-       if (ret) {
-               dev_err(dev, "unable to enable ADC");
-               return ret;
-       }
-       dev_dbg(dev, "ADC initialization successful");
-       return ret;
+	ret = set_up_therm_channel(base_addr);
+	if (ret) {
+		dev_err(dev, "unable to enable ADC");
+		return ret;
+	}
+	dev_dbg(dev, "ADC initialization successful");
+	return ret;
 }
 
 /**
@@ -403,18 +403,18 @@
  */
 static struct thermal_device_info *initialize_sensor(int index)
 {
-       struct thermal_device_info *td_info =
-               kzalloc(sizeof(struct thermal_device_info), GFP_KERNEL);
+	struct thermal_device_info *td_info =
+		kzalloc(sizeof(struct thermal_device_info), GFP_KERNEL);
 
-       if (!td_info)
-               return NULL;
+	if (!td_info)
+		return NULL;
 
-       /* Set the base addr of the channel for this sensor */
-       td_info->chnl_addr = ADC_DATA_START_ADDR + 2 * (channel_index + index);
-       /* Sensor 3 is direct conversion */
-       if (index == 3)
-               td_info->direct = 1;
-       return td_info;
+	/* Set the base addr of the channel for this sensor */
+	td_info->chnl_addr = ADC_DATA_START_ADDR + 2 * (channel_index + index);
+	/* Sensor 3 is direct conversion */
+	if (index == 3)
+		td_info->direct = 1;
+	return td_info;
 }
 
 /**
@@ -425,7 +425,7 @@
  */
 static int mid_thermal_resume(struct platform_device *pdev)
 {
-       return mid_initialize_adc(&pdev->dev);
+	return mid_initialize_adc(&pdev->dev);
 }
 
 /**
@@ -437,12 +437,12 @@
  */
 static int mid_thermal_suspend(struct platform_device *pdev, pm_message_t mesg)
 {
-       /*
-        * This just stops the ADC and does not disable it.
-        * temporary workaround until we have a generic ADC driver.
-        * If 0 is passed, it disables the ADC.
-        */
-       return configure_adc(0);
+	/*
+	 * This just stops the ADC and does not disable it.
+	 * temporary workaround until we have a generic ADC driver.
+	 * If 0 is passed, it disables the ADC.
+	 */
+	return configure_adc(0);
 }
 
 /**
@@ -453,16 +453,15 @@
  */
 static int read_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp)
 {
-       WARN_ON(tzd == NULL);
-       return mid_read_temp(tzd, temp);
+	WARN_ON(tzd == NULL);
+	return mid_read_temp(tzd, temp);
 }
 
 /* Can't be const */
 static struct thermal_zone_device_ops tzd_ops = {
-       .get_temp = read_curr_temp,
+	.get_temp = read_curr_temp,
 };
 
-
 /**
  * mid_thermal_probe - mfld thermal initialize
  * @pdev: platform device structure
@@ -472,46 +471,45 @@
  */
 static int mid_thermal_probe(struct platform_device *pdev)
 {
-       static char *name[MSIC_THERMAL_SENSORS] = {
-               "skin0", "skin1", "sys", "msicdie"
-       };
+	static char *name[MSIC_THERMAL_SENSORS] = {
+		"skin0", "skin1", "sys", "msicdie"
+	};
 
-       int ret;
-       int i;
-       struct platform_info *pinfo;
+	int ret;
+	int i;
+	struct platform_info *pinfo;
 
-       pinfo = kzalloc(sizeof(struct platform_info), GFP_KERNEL);
-       if (!pinfo)
-               return -ENOMEM;
+	pinfo = kzalloc(sizeof(struct platform_info), GFP_KERNEL);
+	if (!pinfo)
+		return -ENOMEM;
 
-       /* Initializing the hardware */
-       ret = mid_initialize_adc(&pdev->dev);
-       if (ret) {
-               dev_err(&pdev->dev, "ADC init failed");
-               kfree(pinfo);
-               return ret;
-       }
+	/* Initializing the hardware */
+	ret = mid_initialize_adc(&pdev->dev);
+	if (ret) {
+		dev_err(&pdev->dev, "ADC init failed");
+		kfree(pinfo);
+		return ret;
+	}
 
-       /* Register each sensor with the generic thermal framework*/
-       for (i = 0; i < MSIC_THERMAL_SENSORS; i++) {
-               pinfo->tzd[i] = thermal_zone_device_register(name[i],
-                                       0, initialize_sensor(i),
-                                       &tzd_ops, 0, 0, 0, 0);
-               if (IS_ERR(pinfo->tzd[i]))
-                       goto reg_fail;
-       }
+	/* Register each sensor with the generic thermal framework*/
+	for (i = 0; i < MSIC_THERMAL_SENSORS; i++) {
+		pinfo->tzd[i] = thermal_zone_device_register(name[i],
+				0, initialize_sensor(i), &tzd_ops, 0, 0, 0, 0);
+		if (IS_ERR(pinfo->tzd[i]))
+			goto reg_fail;
+	}
 
-       pinfo->pdev = pdev;
-       platform_set_drvdata(pdev, pinfo);
-       return 0;
+	pinfo->pdev = pdev;
+	platform_set_drvdata(pdev, pinfo);
+	return 0;
 
 reg_fail:
-       ret = PTR_ERR(pinfo->tzd[i]);
-       while (--i >= 0)
-               thermal_zone_device_unregister(pinfo->tzd[i]);
-       configure_adc(0);
-       kfree(pinfo);
-       return ret;
+	ret = PTR_ERR(pinfo->tzd[i]);
+	while (--i >= 0)
+		thermal_zone_device_unregister(pinfo->tzd[i]);
+	configure_adc(0);
+	kfree(pinfo);
+	return ret;
 }
 
 /**
@@ -523,49 +521,46 @@
  */
 static int mid_thermal_remove(struct platform_device *pdev)
 {
-       int i;
-       struct platform_info *pinfo = platform_get_drvdata(pdev);
+	int i;
+	struct platform_info *pinfo = platform_get_drvdata(pdev);
 
-       for (i = 0; i < MSIC_THERMAL_SENSORS; i++)
-               thermal_zone_device_unregister(pinfo->tzd[i]);
+	for (i = 0; i < MSIC_THERMAL_SENSORS; i++)
+		thermal_zone_device_unregister(pinfo->tzd[i]);
 
-       platform_set_drvdata(pdev, NULL);
+	kfree(pinfo);
+	platform_set_drvdata(pdev, NULL);
 
-       /* Stop the ADC */
-       return configure_adc(0);
+	/* Stop the ADC */
+	return configure_adc(0);
 }
 
-/*********************************************************************
- *             Driver initialisation and finalization
- *********************************************************************/
-
 #define DRIVER_NAME "msic_sensor"
 
 static const struct platform_device_id therm_id_table[] = {
-       { DRIVER_NAME, 1 },
-       { }
+	{ DRIVER_NAME, 1 },
+	{ }
 };
 
 static struct platform_driver mid_thermal_driver = {
-       .driver = {
-               .name = DRIVER_NAME,
-               .owner = THIS_MODULE,
-       },
-       .probe = mid_thermal_probe,
-       .suspend = mid_thermal_suspend,
-       .resume = mid_thermal_resume,
-       .remove = __devexit_p(mid_thermal_remove),
-       .id_table = therm_id_table,
+	.driver = {
+		.name = DRIVER_NAME,
+		.owner = THIS_MODULE,
+	},
+	.probe = mid_thermal_probe,
+	.suspend = mid_thermal_suspend,
+	.resume = mid_thermal_resume,
+	.remove = __devexit_p(mid_thermal_remove),
+	.id_table = therm_id_table,
 };
 
 static int __init mid_thermal_module_init(void)
 {
-       return platform_driver_register(&mid_thermal_driver);
+	return platform_driver_register(&mid_thermal_driver);
 }
 
 static void __exit mid_thermal_module_exit(void)
 {
-       platform_driver_unregister(&mid_thermal_driver);
+	platform_driver_unregister(&mid_thermal_driver);
 }
 
 module_init(mid_thermal_module_init);

diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c
new file mode 100644
index 0000000..e936364
--- /dev/null
+++ b/drivers/platform/x86/intel_oaktrail.c

@@ -0,0 +1,396 @@
+/*
+ * intel_oaktrail.c - Intel OakTrail Platform support.
+ *
+ * Copyright (C) 2010-2011 Intel Corporation
+ * Author: Yin Kangkai (kangkai.yin@intel.com)
+ *
+ * based on Compal driver, Copyright (C) 2008 Cezary Jackiewicz
+ * <cezary.jackiewicz (at) gmail.com>, based on MSI driver
+ * Copyright (C) 2006 Lennart Poettering <mzxreary (at) 0pointer (dot) de>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ *  02110-1301, USA.
+ *
+ * This driver does below things:
+ * 1. registers itself in the Linux backlight control in
+ *    /sys/class/backlight/intel_oaktrail/
+ *
+ * 2. registers in the rfkill subsystem here: /sys/class/rfkill/rfkillX/
+ *    for these components: wifi, bluetooth, wwan (3g), gps
+ *
+ * This driver might work on other products based on Oaktrail. If you
+ * want to try it you can pass force=1 as argument to the module which
+ * will force it to load even when the DMI data doesn't identify the
+ * product as compatible.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/fb.h>
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/backlight.h>
+#include <linux/platform_device.h>
+#include <linux/dmi.h>
+#include <linux/rfkill.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+
+
+#define DRIVER_NAME	"intel_oaktrail"
+#define DRIVER_VERSION	"0.4ac1"
+
+/*
+ * This is the devices status address in EC space, and the control bits
+ * definition:
+ *
+ * (1 << 0):	Camera enable/disable, RW.
+ * (1 << 1):	Bluetooth enable/disable, RW.
+ * (1 << 2):	GPS enable/disable, RW.
+ * (1 << 3):	WiFi enable/disable, RW.
+ * (1 << 4):	WWAN (3G) enable/disalbe, RW.
+ * (1 << 5):	Touchscreen enable/disable, Read Only.
+ */
+#define OT_EC_DEVICE_STATE_ADDRESS	0xD6
+
+#define OT_EC_CAMERA_MASK	(1 << 0)
+#define OT_EC_BT_MASK		(1 << 1)
+#define OT_EC_GPS_MASK		(1 << 2)
+#define OT_EC_WIFI_MASK		(1 << 3)
+#define OT_EC_WWAN_MASK		(1 << 4)
+#define OT_EC_TS_MASK		(1 << 5)
+
+/*
+ * This is the address in EC space and commands used to control LCD backlight:
+ *
+ * Two steps needed to change the LCD backlight:
+ *   1. write the backlight percentage into OT_EC_BL_BRIGHTNESS_ADDRESS;
+ *   2. write OT_EC_BL_CONTROL_ON_DATA into OT_EC_BL_CONTROL_ADDRESS.
+ *
+ * To read the LCD back light, just read out the value from
+ * OT_EC_BL_BRIGHTNESS_ADDRESS.
+ *
+ * LCD backlight brightness range: 0 - 100 (OT_EC_BL_BRIGHTNESS_MAX)
+ */
+#define OT_EC_BL_BRIGHTNESS_ADDRESS	0x44
+#define OT_EC_BL_BRIGHTNESS_MAX		100
+#define OT_EC_BL_CONTROL_ADDRESS	0x3A
+#define OT_EC_BL_CONTROL_ON_DATA	0x1A
+
+
+static int force;
+module_param(force, bool, 0);
+MODULE_PARM_DESC(force, "Force driver load, ignore DMI data");
+
+static struct platform_device *oaktrail_device;
+static struct backlight_device *oaktrail_bl_device;
+static struct rfkill *bt_rfkill;
+static struct rfkill *gps_rfkill;
+static struct rfkill *wifi_rfkill;
+static struct rfkill *wwan_rfkill;
+
+
+/* rfkill */
+static int oaktrail_rfkill_set(void *data, bool blocked)
+{
+	u8 value;
+	u8 result;
+	unsigned long radio = (unsigned long) data;
+
+	ec_read(OT_EC_DEVICE_STATE_ADDRESS, &result);
+
+	if (!blocked)
+		value = (u8) (result | radio);
+	else
+		value = (u8) (result & ~radio);
+
+	ec_write(OT_EC_DEVICE_STATE_ADDRESS, value);
+
+	return 0;
+}
+
+static const struct rfkill_ops oaktrail_rfkill_ops = {
+	.set_block = oaktrail_rfkill_set,
+};
+
+static struct rfkill *oaktrail_rfkill_new(char *name, enum rfkill_type type,
+					  unsigned long mask)
+{
+	struct rfkill *rfkill_dev;
+	u8 value;
+	int err;
+
+	rfkill_dev = rfkill_alloc(name, &oaktrail_device->dev, type,
+				  &oaktrail_rfkill_ops, (void *)mask);
+	if (!rfkill_dev)
+		return ERR_PTR(-ENOMEM);
+
+	ec_read(OT_EC_DEVICE_STATE_ADDRESS, &value);
+	rfkill_init_sw_state(rfkill_dev, (value & mask) != 1);
+
+	err = rfkill_register(rfkill_dev);
+	if (err) {
+		rfkill_destroy(rfkill_dev);
+		return ERR_PTR(err);
+	}
+
+	return rfkill_dev;
+}
+
+static inline void __oaktrail_rfkill_cleanup(struct rfkill *rf)
+{
+	if (rf) {
+		rfkill_unregister(rf);
+		rfkill_destroy(rf);
+	}
+}
+
+static void oaktrail_rfkill_cleanup(void)
+{
+	__oaktrail_rfkill_cleanup(wifi_rfkill);
+	__oaktrail_rfkill_cleanup(bt_rfkill);
+	__oaktrail_rfkill_cleanup(gps_rfkill);
+	__oaktrail_rfkill_cleanup(wwan_rfkill);
+}
+
+static int oaktrail_rfkill_init(void)
+{
+	int ret;
+
+	wifi_rfkill = oaktrail_rfkill_new("oaktrail-wifi",
+					  RFKILL_TYPE_WLAN,
+					  OT_EC_WIFI_MASK);
+	if (IS_ERR(wifi_rfkill)) {
+		ret = PTR_ERR(wifi_rfkill);
+		wifi_rfkill = NULL;
+		goto cleanup;
+	}
+
+	bt_rfkill = oaktrail_rfkill_new("oaktrail-bluetooth",
+					RFKILL_TYPE_BLUETOOTH,
+					OT_EC_BT_MASK);
+	if (IS_ERR(bt_rfkill)) {
+		ret = PTR_ERR(bt_rfkill);
+		bt_rfkill = NULL;
+		goto cleanup;
+	}
+
+	gps_rfkill = oaktrail_rfkill_new("oaktrail-gps",
+					 RFKILL_TYPE_GPS,
+					 OT_EC_GPS_MASK);
+	if (IS_ERR(gps_rfkill)) {
+		ret = PTR_ERR(gps_rfkill);
+		gps_rfkill = NULL;
+		goto cleanup;
+	}
+
+	wwan_rfkill = oaktrail_rfkill_new("oaktrail-wwan",
+					  RFKILL_TYPE_WWAN,
+					  OT_EC_WWAN_MASK);
+	if (IS_ERR(wwan_rfkill)) {
+		ret = PTR_ERR(wwan_rfkill);
+		wwan_rfkill = NULL;
+		goto cleanup;
+	}
+
+	return 0;
+
+cleanup:
+	oaktrail_rfkill_cleanup();
+	return ret;
+}
+
+
+/* backlight */
+static int get_backlight_brightness(struct backlight_device *b)
+{
+	u8 value;
+	ec_read(OT_EC_BL_BRIGHTNESS_ADDRESS, &value);
+
+	return value;
+}
+
+static int set_backlight_brightness(struct backlight_device *b)
+{
+	u8 percent = (u8) b->props.brightness;
+	if (percent < 0 || percent > OT_EC_BL_BRIGHTNESS_MAX)
+		return -EINVAL;
+
+	ec_write(OT_EC_BL_BRIGHTNESS_ADDRESS, percent);
+	ec_write(OT_EC_BL_CONTROL_ADDRESS, OT_EC_BL_CONTROL_ON_DATA);
+
+	return 0;
+}
+
+static const struct backlight_ops oaktrail_bl_ops = {
+	.get_brightness = get_backlight_brightness,
+	.update_status	= set_backlight_brightness,
+};
+
+static int oaktrail_backlight_init(void)
+{
+	struct backlight_device *bd;
+	struct backlight_properties props;
+
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = OT_EC_BL_BRIGHTNESS_MAX;
+	bd = backlight_device_register(DRIVER_NAME,
+				       &oaktrail_device->dev, NULL,
+				       &oaktrail_bl_ops,
+				       &props);
+
+	if (IS_ERR(bd)) {
+		oaktrail_bl_device = NULL;
+		pr_warning("Unable to register backlight device\n");
+		return PTR_ERR(bd);
+	}
+
+	oaktrail_bl_device = bd;
+
+	bd->props.brightness = get_backlight_brightness(bd);
+	bd->props.power = FB_BLANK_UNBLANK;
+	backlight_update_status(bd);
+
+	return 0;
+}
+
+static void oaktrail_backlight_exit(void)
+{
+	if (oaktrail_bl_device)
+		backlight_device_unregister(oaktrail_bl_device);
+}
+
+static int __devinit oaktrail_probe(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int __devexit oaktrail_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static struct platform_driver oaktrail_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.owner = THIS_MODULE,
+	},
+	.probe	= oaktrail_probe,
+	.remove	= __devexit_p(oaktrail_remove)
+};
+
+static int dmi_check_cb(const struct dmi_system_id *id)
+{
+	pr_info("Identified model '%s'\n", id->ident);
+	return 0;
+}
+
+static struct dmi_system_id __initdata oaktrail_dmi_table[] = {
+	{
+		.ident = "OakTrail platform",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "OakTrail platform"),
+		},
+		.callback = dmi_check_cb
+	},
+	{ }
+};
+
+static int __init oaktrail_init(void)
+{
+	int ret;
+
+	if (acpi_disabled) {
+		pr_err("ACPI needs to be enabled for this driver to work!\n");
+		return -ENODEV;
+	}
+
+	if (!force && !dmi_check_system(oaktrail_dmi_table)) {
+		pr_err("Platform not recognized (You could try the module's force-parameter)");
+		return -ENODEV;
+	}
+
+	ret = platform_driver_register(&oaktrail_driver);
+	if (ret) {
+		pr_warning("Unable to register platform driver\n");
+		goto err_driver_reg;
+	}
+
+	oaktrail_device = platform_device_alloc(DRIVER_NAME, -1);
+	if (!oaktrail_device) {
+		pr_warning("Unable to allocate platform device\n");
+		ret = -ENOMEM;
+		goto err_device_alloc;
+	}
+
+	ret = platform_device_add(oaktrail_device);
+	if (ret) {
+		pr_warning("Unable to add platform device\n");
+		goto err_device_add;
+	}
+
+	if (!acpi_video_backlight_support()) {
+		ret = oaktrail_backlight_init();
+		if (ret)
+			goto err_backlight;
+
+	} else
+		pr_info("Backlight controlled by ACPI video driver\n");
+
+	ret = oaktrail_rfkill_init();
+	if (ret) {
+		pr_warning("Setup rfkill failed\n");
+		goto err_rfkill;
+	}
+
+	pr_info("Driver "DRIVER_VERSION" successfully loaded\n");
+	return 0;
+
+err_rfkill:
+	oaktrail_backlight_exit();
+err_backlight:
+	platform_device_del(oaktrail_device);
+err_device_add:
+	platform_device_put(oaktrail_device);
+err_device_alloc:
+	platform_driver_unregister(&oaktrail_driver);
+err_driver_reg:
+
+	return ret;
+}
+
+static void __exit oaktrail_cleanup(void)
+{
+	oaktrail_backlight_exit();
+	oaktrail_rfkill_cleanup();
+	platform_device_unregister(oaktrail_device);
+	platform_driver_unregister(&oaktrail_driver);
+
+	pr_info("Driver unloaded\n");
+}
+
+module_init(oaktrail_init);
+module_exit(oaktrail_cleanup);
+
+MODULE_AUTHOR("Yin Kangkai (kangkai.yin@intel.com)");
+MODULE_DESCRIPTION("Intel Oaktrail Platform ACPI Extras");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("dmi:*:svnIntelCorporation:pnOakTrailplatform:*");

diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c
index 464bb3f..1686c1e 100644
--- a/drivers/platform/x86/intel_pmic_gpio.c
+++ b/drivers/platform/x86/intel_pmic_gpio.c

@@ -19,6 +19,8 @@
  * Moorestown platform PMIC chip
  */
 
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
@@ -90,8 +92,7 @@
 static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 {
 	if (offset > 8) {
-		printk(KERN_ERR
-			"%s: only pin 0-7 support input\n", __func__);
+		pr_err("only pin 0-7 support input\n");
 		return -1;/* we only have 8 GPIO can use as input */
 	}
 	return intel_scu_ipc_update_register(GPIO0 + offset,
@@ -116,8 +117,7 @@
 				value ? 1 << (offset - 16) : 0,
 				1 << (offset - 16));
 	else {
-		printk(KERN_ERR
-			"%s: invalid PMIC GPIO pin %d!\n", __func__, offset);
+		pr_err("invalid PMIC GPIO pin %d!\n", offset);
 		WARN_ON(1);
 	}
 
@@ -260,7 +260,7 @@
 	/* setting up SRAM mapping for GPIOINT register */
 	pg->gpiointr = ioremap_nocache(pdata->gpiointr, 8);
 	if (!pg->gpiointr) {
-		printk(KERN_ERR "%s: Can not map GPIOINT.\n", __func__);
+		pr_err("Can not map GPIOINT\n");
 		retval = -EINVAL;
 		goto err2;
 	}
@@ -281,13 +281,13 @@
 	pg->chip.dev = dev;
 	retval = gpiochip_add(&pg->chip);
 	if (retval) {
-		printk(KERN_ERR "%s: Can not add pmic gpio chip.\n", __func__);
+		pr_err("Can not add pmic gpio chip\n");
 		goto err;
 	}
 
 	retval = request_irq(pg->irq, pmic_irq_handler, 0, "pmic", pg);
 	if (retval) {
-		printk(KERN_WARNING "pmic: Interrupt request failed\n");
+		pr_warn("Interrupt request failed\n");
 		goto err;
 	}
 

diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index 23fb2af..3ff629d 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c

@@ -135,7 +135,7 @@
 	buf[1] = (u8) (level*31);
 
 	return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, buf, sizeof(buf),
-			      NULL, 0, 1);
+			      NULL, 0);
 }
 
 static int get_lcd_level(void)
@@ -144,7 +144,7 @@
 	int result;
 
 	result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1,
-				&rdata, 1, 1);
+				&rdata, 1);
 	if (result < 0)
 		return result;
 
@@ -157,7 +157,7 @@
 	int result;
 
 	result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1,
-				&rdata, 1, 1);
+				&rdata, 1);
 	if (result < 0)
 		return result;
 
@@ -172,7 +172,7 @@
 	wdata[0] = 4;
 
 	result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 1,
-				&rdata, 1, 1);
+				&rdata, 1);
 	if (result < 0)
 		return result;
 
@@ -180,7 +180,7 @@
 	wdata[1] = (rdata & 0xF7) | (enable ? 8 : 0);
 
 	return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 2,
-			      NULL, 0, 1);
+			      NULL, 0);
 }
 
 static ssize_t set_device_state(const char *buf, size_t count, u8 mask)
@@ -217,7 +217,7 @@
 	u8 wdata = 0, rdata;
 	int result;
 
-	result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1, 1);
+	result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1);
 	if (result < 0)
 		return -1;
 
@@ -447,7 +447,7 @@
 
 static int dmi_check_cb(const struct dmi_system_id *id)
 {
-	pr_info("Identified laptop model '%s'.\n", id->ident);
+	pr_info("Identified laptop model '%s'\n", id->ident);
 	return 1;
 }
 
@@ -800,7 +800,7 @@
 	input_unregister_device(msi_laptop_input_dev);
 }
 
-static int load_scm_model_init(struct platform_device *sdev)
+static int __init load_scm_model_init(struct platform_device *sdev)
 {
 	u8 data;
 	int result;
@@ -875,8 +875,7 @@
 	/* Register backlight stuff */
 
 	if (acpi_video_backlight_support()) {
-		pr_info("Brightness ignored, must be controlled "
-		       "by ACPI video driver\n");
+		pr_info("Brightness ignored, must be controlled by ACPI video driver\n");
 	} else {
 		struct backlight_properties props;
 		memset(&props, 0, sizeof(struct backlight_properties));
@@ -930,7 +929,7 @@
 	if (auto_brightness != 2)
 		set_auto_brightness(auto_brightness);
 
-	pr_info("driver "MSI_DRIVER_VERSION" successfully loaded.\n");
+	pr_info("driver " MSI_DRIVER_VERSION " successfully loaded\n");
 
 	return 0;
 
@@ -978,7 +977,7 @@
 	if (auto_brightness != 2)
 		set_auto_brightness(1);
 
-	pr_info("driver unloaded.\n");
+	pr_info("driver unloaded\n");
 }
 
 module_init(msi_init);

diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index d5419c9..c832e33 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c

@@ -20,6 +20,7 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
 #include <linux/input.h>
@@ -36,13 +37,10 @@
 MODULE_ALIAS("wmi:B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2");
 
 #define DRV_NAME "msi-wmi"
-#define DRV_PFX DRV_NAME ": "
 
 #define MSIWMI_BIOS_GUID "551A1F84-FBDD-4125-91DB-3EA8F44F1D45"
 #define MSIWMI_EVENT_GUID "B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2"
 
-#define dprintk(msg...) pr_debug(DRV_PFX msg)
-
 #define SCANCODE_BASE 0xD0
 #define MSI_WMI_BRIGHTNESSUP   SCANCODE_BASE
 #define MSI_WMI_BRIGHTNESSDOWN (SCANCODE_BASE + 1)
@@ -78,7 +76,7 @@
 
 	if (!obj || obj->type != ACPI_TYPE_INTEGER) {
 		if (obj) {
-			printk(KERN_ERR DRV_PFX "query block returned object "
+			pr_err("query block returned object "
 			       "type: %d - buffer length:%d\n", obj->type,
 			       obj->type == ACPI_TYPE_BUFFER ?
 			       obj->buffer.length : 0);
@@ -97,8 +95,8 @@
 
 	struct acpi_buffer input = { sizeof(int), &value };
 
-	dprintk("Going to set block of instance: %d - value: %d\n",
-		instance, value);
+	pr_debug("Going to set block of instance: %d - value: %d\n",
+		 instance, value);
 
 	status = wmi_set_block(MSIWMI_BIOS_GUID, instance, &input);
 
@@ -112,20 +110,19 @@
 	/* Instance 1 is "get backlight", cmp with DSDT */
 	err = msi_wmi_query_block(1, &ret);
 	if (err) {
-		printk(KERN_ERR DRV_PFX "Could not query backlight: %d\n", err);
+		pr_err("Could not query backlight: %d\n", err);
 		return -EINVAL;
 	}
-	dprintk("Get: Query block returned: %d\n", ret);
+	pr_debug("Get: Query block returned: %d\n", ret);
 	for (level = 0; level < ARRAY_SIZE(backlight_map); level++) {
 		if (backlight_map[level] == ret) {
-			dprintk("Current backlight level: 0x%X - index: %d\n",
-				backlight_map[level], level);
+			pr_debug("Current backlight level: 0x%X - index: %d\n",
+				 backlight_map[level], level);
 			break;
 		}
 	}
 	if (level == ARRAY_SIZE(backlight_map)) {
-		printk(KERN_ERR DRV_PFX "get: Invalid brightness value: 0x%X\n",
-		       ret);
+		pr_err("get: Invalid brightness value: 0x%X\n", ret);
 		return -EINVAL;
 	}
 	return level;
@@ -156,7 +153,7 @@
 
 	status = wmi_get_event_data(value, &response);
 	if (status != AE_OK) {
-		printk(KERN_INFO DRV_PFX "bad event status 0x%x\n", status);
+		pr_info("bad event status 0x%x\n", status);
 		return;
 	}
 
@@ -164,7 +161,7 @@
 
 	if (obj && obj->type == ACPI_TYPE_INTEGER) {
 		int eventcode = obj->integer.value;
-		dprintk("Eventcode: 0x%x\n", eventcode);
+		pr_debug("Eventcode: 0x%x\n", eventcode);
 		key = sparse_keymap_entry_from_scancode(msi_wmi_input_dev,
 				eventcode);
 		if (key) {
@@ -175,8 +172,8 @@
 			/* Ignore event if the same event happened in a 50 ms
 			   timeframe -> Key press may result in 10-20 GPEs */
 			if (ktime_to_us(diff) < 1000 * 50) {
-				dprintk("Suppressed key event 0x%X - "
-					"Last press was %lld us ago\n",
+				pr_debug("Suppressed key event 0x%X - "
+					 "Last press was %lld us ago\n",
 					 key->code, ktime_to_us(diff));
 				return;
 			}
@@ -187,17 +184,16 @@
 			(!acpi_video_backlight_support() ||
 			(key->code != MSI_WMI_BRIGHTNESSUP &&
 			key->code != MSI_WMI_BRIGHTNESSDOWN))) {
-				dprintk("Send key: 0x%X - "
-					"Input layer keycode: %d\n", key->code,
-					 key->keycode);
+				pr_debug("Send key: 0x%X - "
+					 "Input layer keycode: %d\n",
+					 key->code, key->keycode);
 				sparse_keymap_report_entry(msi_wmi_input_dev,
 						key, 1, true);
 			}
 		} else
-			printk(KERN_INFO "Unknown key pressed - %x\n",
-			       eventcode);
+			pr_info("Unknown key pressed - %x\n", eventcode);
 	} else
-		printk(KERN_INFO DRV_PFX "Unknown event received\n");
+		pr_info("Unknown event received\n");
 	kfree(response.pointer);
 }
 
@@ -238,8 +234,7 @@
 	int err;
 
 	if (!wmi_has_guid(MSIWMI_EVENT_GUID)) {
-		printk(KERN_ERR
-		       "This machine doesn't have MSI-hotkeys through WMI\n");
+		pr_err("This machine doesn't have MSI-hotkeys through WMI\n");
 		return -ENODEV;
 	}
 	err = wmi_install_notify_handler(MSIWMI_EVENT_GUID,
@@ -270,7 +265,7 @@
 
 		backlight->props.brightness = err;
 	}
-	dprintk("Event handler installed\n");
+	pr_debug("Event handler installed\n");
 
 	return 0;
 

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 6fe8cd6..bbd182e 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c

@@ -42,6 +42,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -70,10 +72,10 @@
 #include <linux/miscdevice.h>
 #endif
 
-#define DRV_PFX			"sony-laptop: "
-#define dprintk(msg...)		do {	\
-	if (debug)			\
-		pr_warn(DRV_PFX msg);	\
+#define dprintk(fmt, ...)			\
+do {						\
+	if (debug)				\
+		pr_warn(fmt, ##__VA_ARGS__);	\
 } while (0)
 
 #define SONY_LAPTOP_DRIVER_VERSION	"0.6"
@@ -418,7 +420,7 @@
 	error = kfifo_alloc(&sony_laptop_input.fifo,
 			    SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
-		pr_err(DRV_PFX "kfifo_alloc failed\n");
+		pr_err("kfifo_alloc failed\n");
 		goto err_dec_users;
 	}
 
@@ -702,7 +704,7 @@
 		return 0;
 	}
 
-	pr_warn(DRV_PFX "acpi_callreadfunc failed\n");
+	pr_warn("acpi_callreadfunc failed\n");
 
 	return -1;
 }
@@ -728,8 +730,7 @@
 	if (status == AE_OK) {
 		if (result != NULL) {
 			if (out_obj.type != ACPI_TYPE_INTEGER) {
-				pr_warn(DRV_PFX "acpi_evaluate_object bad "
-				       "return type\n");
+				pr_warn("acpi_evaluate_object bad return type\n");
 				return -1;
 			}
 			*result = out_obj.integer.value;
@@ -737,7 +738,7 @@
 		return 0;
 	}
 
-	pr_warn(DRV_PFX "acpi_evaluate_object failed\n");
+	pr_warn("acpi_evaluate_object failed\n");
 
 	return -1;
 }
@@ -961,7 +962,6 @@
 static int sony_nc_get_brightness_ng(struct backlight_device *bd)
 {
 	int result;
-	int *handle = (int *)bl_get_data(bd);
 	struct sony_backlight_props *sdev =
 		(struct sony_backlight_props *)bl_get_data(bd);
 
@@ -973,7 +973,6 @@
 static int sony_nc_update_status_ng(struct backlight_device *bd)
 {
 	int value, result;
-	int *handle = (int *)bl_get_data(bd);
 	struct sony_backlight_props *sdev =
 		(struct sony_backlight_props *)bl_get_data(bd);
 
@@ -1104,10 +1103,8 @@
 				}
 
 				if (!key_event->data)
-					pr_info(DRV_PFX
-							"Unknown event: 0x%x 0x%x\n",
-							key_handle,
-							ev);
+					pr_info("Unknown event: 0x%x 0x%x\n",
+						key_handle, ev);
 				else
 					sony_laptop_report_input_event(ev);
 			}
@@ -1128,7 +1125,7 @@
 	struct acpi_device_info *info;
 
 	if (ACPI_SUCCESS(acpi_get_object_info(handle, &info))) {
-		pr_warn(DRV_PFX "method: name: %4.4s, args %X\n",
+		pr_warn("method: name: %4.4s, args %X\n",
 			(char *)&info->name, info->param_count);
 
 		kfree(info);
@@ -1169,7 +1166,7 @@
 		ret = acpi_callsetfunc(sony_nc_acpi_handle, *item->acpiset,
 				       item->value, NULL);
 		if (ret < 0) {
-			pr_err(DRV_PFX "%s: %d\n", __func__, ret);
+			pr_err("%s: %d\n", __func__, ret);
 			break;
 		}
 	}
@@ -1336,12 +1333,12 @@
 
 	device_enum = (union acpi_object *) buffer.pointer;
 	if (!device_enum) {
-		pr_err(DRV_PFX "No SN06 return object.");
+		pr_err("No SN06 return object\n");
 		goto out_no_enum;
 	}
 	if (device_enum->type != ACPI_TYPE_BUFFER) {
-		pr_err(DRV_PFX "Invalid SN06 return object 0x%.2x\n",
-				device_enum->type);
+		pr_err("Invalid SN06 return object 0x%.2x\n",
+		       device_enum->type);
 		goto out_no_enum;
 	}
 
@@ -1662,7 +1659,7 @@
 						      ops, &props);
 
 	if (IS_ERR(sony_bl_props.dev)) {
-		pr_warn(DRV_PFX "unable to register backlight device\n");
+		pr_warn("unable to register backlight device\n");
 		sony_bl_props.dev = NULL;
 	} else
 		sony_bl_props.dev->props.brightness =
@@ -1682,8 +1679,7 @@
 	acpi_handle handle;
 	struct sony_nc_value *item;
 
-	pr_info(DRV_PFX "%s v%s.\n", SONY_NC_DRIVER_NAME,
-			SONY_LAPTOP_DRIVER_VERSION);
+	pr_info("%s v%s\n", SONY_NC_DRIVER_NAME, SONY_LAPTOP_DRIVER_VERSION);
 
 	sony_nc_acpi_device = device;
 	strcpy(acpi_device_class(device), "sony/hotkey");
@@ -1708,7 +1704,7 @@
 				sony_nc_acpi_handle, 1, sony_walk_callback,
 				NULL, NULL, NULL);
 		if (ACPI_FAILURE(status)) {
-			pr_warn(DRV_PFX "unable to walk acpi resources\n");
+			pr_warn("unable to walk acpi resources\n");
 			result = -ENODEV;
 			goto outpresent;
 		}
@@ -1736,13 +1732,12 @@
 	/* setup input devices and helper fifo */
 	result = sony_laptop_setup_input(device);
 	if (result) {
-		pr_err(DRV_PFX "Unable to create input devices.\n");
+		pr_err("Unable to create input devices\n");
 		goto outkbdbacklight;
 	}
 
 	if (acpi_video_backlight_support()) {
-		pr_info(DRV_PFX "brightness ignored, must be "
-		       "controlled by ACPI video driver\n");
+		pr_info("brightness ignored, must be controlled by ACPI video driver\n");
 	} else {
 		sony_nc_backlight_setup();
 	}
@@ -2265,9 +2260,9 @@
 	if (pcidev)
 		pci_dev_put(pcidev);
 
-	pr_info(DRV_PFX "detected Type%d model\n",
-			dev->model == SONYPI_DEVICE_TYPE1 ? 1 :
-			dev->model == SONYPI_DEVICE_TYPE2 ? 2 : 3);
+	pr_info("detected Type%d model\n",
+		dev->model == SONYPI_DEVICE_TYPE1 ? 1 :
+		dev->model == SONYPI_DEVICE_TYPE2 ? 2 : 3);
 }
 
 /* camera tests and poweron/poweroff */
@@ -2313,7 +2308,7 @@
 static int __sony_pic_camera_off(void)
 {
 	if (!camera) {
-		pr_warn(DRV_PFX "camera control not enabled\n");
+		pr_warn("camera control not enabled\n");
 		return -ENODEV;
 	}
 
@@ -2333,7 +2328,7 @@
 	int i, j, x;
 
 	if (!camera) {
-		pr_warn(DRV_PFX "camera control not enabled\n");
+		pr_warn("camera control not enabled\n");
 		return -ENODEV;
 	}
 
@@ -2356,7 +2351,7 @@
 	}
 
 	if (j == 0) {
-		pr_warn(DRV_PFX "failed to power on camera\n");
+		pr_warn("failed to power on camera\n");
 		return -ENODEV;
 	}
 
@@ -2412,8 +2407,7 @@
 				ITERATIONS_SHORT);
 		break;
 	default:
-		pr_err(DRV_PFX "sony_pic_camera_command invalid: %d\n",
-		       command);
+		pr_err("sony_pic_camera_command invalid: %d\n", command);
 		break;
 	}
 	mutex_unlock(&spic_dev.lock);
@@ -2819,7 +2813,7 @@
 	error =
 	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
-		pr_err(DRV_PFX "kfifo_alloc failed\n");
+		pr_err("kfifo_alloc failed\n");
 		return error;
 	}
 
@@ -2829,12 +2823,12 @@
 		sonypi_misc_device.minor = minor;
 	error = misc_register(&sonypi_misc_device);
 	if (error) {
-		pr_err(DRV_PFX "misc_register failed\n");
+		pr_err("misc_register failed\n");
 		goto err_free_kfifo;
 	}
 	if (minor == -1)
-		pr_info(DRV_PFX "device allocated minor is %d\n",
-		       sonypi_misc_device.minor);
+		pr_info("device allocated minor is %d\n",
+			sonypi_misc_device.minor);
 
 	return 0;
 
@@ -2893,8 +2887,8 @@
 			}
 			for (i = 0; i < p->interrupt_count; i++) {
 				if (!p->interrupts[i]) {
-					pr_warn(DRV_PFX "Invalid IRQ %d\n",
-							p->interrupts[i]);
+					pr_warn("Invalid IRQ %d\n",
+						p->interrupts[i]);
 					continue;
 				}
 				interrupt = kzalloc(sizeof(*interrupt),
@@ -2932,14 +2926,14 @@
 						ioport->io2.address_length);
 			}
 			else {
-				pr_err(DRV_PFX "Unknown SPIC Type, more than 2 IO Ports\n");
+				pr_err("Unknown SPIC Type, more than 2 IO Ports\n");
 				return AE_ERROR;
 			}
 			return AE_OK;
 		}
 	default:
 		dprintk("Resource %d isn't an IRQ nor an IO port\n",
-				resource->type);
+			resource->type);
 
 	case ACPI_RESOURCE_TYPE_END_TAG:
 		return AE_OK;
@@ -2960,7 +2954,7 @@
 	dprintk("Evaluating _STA\n");
 	result = acpi_bus_get_status(device);
 	if (result) {
-		pr_warn(DRV_PFX "Unable to read status\n");
+		pr_warn("Unable to read status\n");
 		goto end;
 	}
 
@@ -2976,8 +2970,7 @@
 	status = acpi_walk_resources(device->handle, METHOD_NAME__PRS,
 			sony_pic_read_possible_resource, &spic_dev);
 	if (ACPI_FAILURE(status)) {
-		pr_warn(DRV_PFX "Failure evaluating %s\n",
-				METHOD_NAME__PRS);
+		pr_warn("Failure evaluating %s\n", METHOD_NAME__PRS);
 		result = -ENODEV;
 	}
 end:
@@ -3090,7 +3083,7 @@
 
 	/* check for total failure */
 	if (ACPI_FAILURE(status)) {
-		pr_err(DRV_PFX "Error evaluating _SRS\n");
+		pr_err("Error evaluating _SRS\n");
 		result = -ENODEV;
 		goto end;
 	}
@@ -3182,7 +3175,7 @@
 	struct sony_pic_irq *irq, *tmp_irq;
 
 	if (sony_pic_disable(device)) {
-		pr_err(DRV_PFX "Couldn't disable device.\n");
+		pr_err("Couldn't disable device\n");
 		return -ENXIO;
 	}
 
@@ -3222,8 +3215,7 @@
 	struct sony_pic_ioport *io, *tmp_io;
 	struct sony_pic_irq *irq, *tmp_irq;
 
-	pr_info(DRV_PFX "%s v%s.\n", SONY_PIC_DRIVER_NAME,
-			SONY_LAPTOP_DRIVER_VERSION);
+	pr_info("%s v%s\n", SONY_PIC_DRIVER_NAME, SONY_LAPTOP_DRIVER_VERSION);
 
 	spic_dev.acpi_dev = device;
 	strcpy(acpi_device_class(device), "sony/hotkey");
@@ -3233,14 +3225,14 @@
 	/* read _PRS resources */
 	result = sony_pic_possible_resources(device);
 	if (result) {
-		pr_err(DRV_PFX "Unable to read possible resources.\n");
+		pr_err("Unable to read possible resources\n");
 		goto err_free_resources;
 	}
 
 	/* setup input devices and helper fifo */
 	result = sony_laptop_setup_input(device);
 	if (result) {
-		pr_err(DRV_PFX "Unable to create input devices.\n");
+		pr_err("Unable to create input devices\n");
 		goto err_free_resources;
 	}
 
@@ -3281,7 +3273,7 @@
 		}
 	}
 	if (!spic_dev.cur_ioport) {
-		pr_err(DRV_PFX "Failed to request_region.\n");
+		pr_err("Failed to request_region\n");
 		result = -ENODEV;
 		goto err_remove_compat;
 	}
@@ -3301,7 +3293,7 @@
 		}
 	}
 	if (!spic_dev.cur_irq) {
-		pr_err(DRV_PFX "Failed to request_irq.\n");
+		pr_err("Failed to request_irq\n");
 		result = -ENODEV;
 		goto err_release_region;
 	}
@@ -3309,7 +3301,7 @@
 	/* set resource status _SRS */
 	result = sony_pic_enable(device, spic_dev.cur_ioport, spic_dev.cur_irq);
 	if (result) {
-		pr_err(DRV_PFX "Couldn't enable device.\n");
+		pr_err("Couldn't enable device\n");
 		goto err_free_irq;
 	}
 
@@ -3418,7 +3410,7 @@
 	if (!no_spic && dmi_check_system(sonypi_dmi_table)) {
 		result = acpi_bus_register_driver(&sony_pic_driver);
 		if (result) {
-			pr_err(DRV_PFX "Unable to register SPIC driver.");
+			pr_err("Unable to register SPIC driver\n");
 			goto out;
 		}
 		spic_drv_registered = 1;
@@ -3426,7 +3418,7 @@
 
 	result = acpi_bus_register_driver(&sony_nc_driver);
 	if (result) {
-		pr_err(DRV_PFX "Unable to register SNC driver.");
+		pr_err("Unable to register SNC driver\n");
 		goto out_unregister_pic;
 	}
 

diff --git a/drivers/platform/x86/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c
index 865ef78..e24f5ae 100644
--- a/drivers/platform/x86/tc1100-wmi.c
+++ b/drivers/platform/x86/tc1100-wmi.c

@@ -25,6 +25,8 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -40,9 +42,6 @@
 #define TC1100_INSTANCE_WIRELESS		1
 #define TC1100_INSTANCE_JOGDIAL		2
 
-#define TC1100_LOGPREFIX "tc1100-wmi: "
-#define TC1100_INFO KERN_INFO TC1100_LOGPREFIX
-
 MODULE_AUTHOR("Jamey Hicks, Carlos Corbacho");
 MODULE_DESCRIPTION("HP Compaq TC1100 Tablet WMI Extras");
 MODULE_LICENSE("GPL");
@@ -264,7 +263,7 @@
 	if (error)
 		goto err_device_del;
 
-	printk(TC1100_INFO "HP Compaq TC1100 Tablet WMI Extras loaded\n");
+	pr_info("HP Compaq TC1100 Tablet WMI Extras loaded\n");
 	return 0;
 
  err_device_del:

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 562fcf0..77f6e70 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c

@@ -21,6 +21,8 @@
  *  02110-1301, USA.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #define TPACPI_VERSION "0.24"
 #define TPACPI_SYSFS_VERSION 0x020700
 
@@ -224,17 +226,6 @@
 
 #define TPACPI_MAX_ACPI_ARGS 3
 
-/* printk headers */
-#define TPACPI_LOG TPACPI_FILE ": "
-#define TPACPI_EMERG	KERN_EMERG	TPACPI_LOG
-#define TPACPI_ALERT	KERN_ALERT	TPACPI_LOG
-#define TPACPI_CRIT	KERN_CRIT	TPACPI_LOG
-#define TPACPI_ERR	KERN_ERR	TPACPI_LOG
-#define TPACPI_WARN	KERN_WARNING	TPACPI_LOG
-#define TPACPI_NOTICE	KERN_NOTICE	TPACPI_LOG
-#define TPACPI_INFO	KERN_INFO	TPACPI_LOG
-#define TPACPI_DEBUG	KERN_DEBUG	TPACPI_LOG
-
 /* Debugging printk groups */
 #define TPACPI_DBG_ALL		0xffff
 #define TPACPI_DBG_DISCLOSETASK	0x8000
@@ -389,34 +380,36 @@
  *  Debugging helpers
  */
 
-#define dbg_printk(a_dbg_level, format, arg...) \
-	do { if (dbg_level & (a_dbg_level)) \
-		printk(TPACPI_DEBUG "%s: " format, __func__ , ## arg); \
-	} while (0)
+#define dbg_printk(a_dbg_level, format, arg...)				\
+do {									\
+	if (dbg_level & (a_dbg_level))					\
+		printk(KERN_DEBUG pr_fmt("%s: " format),		\
+		       __func__, ##arg);				\
+} while (0)
 
 #ifdef CONFIG_THINKPAD_ACPI_DEBUG
 #define vdbg_printk dbg_printk
 static const char *str_supported(int is_supported);
 #else
-#define vdbg_printk(a_dbg_level, format, arg...) \
-	do { } while (0)
+static inline const char *str_supported(int is_supported) { return ""; }
+#define vdbg_printk(a_dbg_level, format, arg...)	\
+	no_printk(format, ##arg)
 #endif
 
 static void tpacpi_log_usertask(const char * const what)
 {
-	printk(TPACPI_DEBUG "%s: access by process with PID %d\n",
-		what, task_tgid_vnr(current));
+	printk(KERN_DEBUG pr_fmt("%s: access by process with PID %d\n"),
+	       what, task_tgid_vnr(current));
 }
 
-#define tpacpi_disclose_usertask(what, format, arg...) \
-	do { \
-		if (unlikely( \
-		    (dbg_level & TPACPI_DBG_DISCLOSETASK) && \
-		    (tpacpi_lifecycle == TPACPI_LIFE_RUNNING))) { \
-			printk(TPACPI_DEBUG "%s: PID %d: " format, \
-				what, task_tgid_vnr(current), ## arg); \
-		} \
-	} while (0)
+#define tpacpi_disclose_usertask(what, format, arg...)			\
+do {									\
+	if (unlikely((dbg_level & TPACPI_DBG_DISCLOSETASK) &&		\
+		     (tpacpi_lifecycle == TPACPI_LIFE_RUNNING))) {	\
+		printk(KERN_DEBUG pr_fmt("%s: PID %d: " format),	\
+		       what, task_tgid_vnr(current), ## arg);		\
+	}								\
+} while (0)
 
 /*
  * Quirk handling helpers
@@ -535,15 +528,6 @@
 	   "HKEY",		/* all others */
 	   );			/* 570 */
 
-TPACPI_HANDLE(vid, root, "\\_SB.PCI.AGP.VGA",	/* 570 */
-	   "\\_SB.PCI0.AGP0.VID0",	/* 600e/x, 770x */
-	   "\\_SB.PCI0.VID0",	/* 770e */
-	   "\\_SB.PCI0.VID",	/* A21e, G4x, R50e, X30, X40 */
-	   "\\_SB.PCI0.AGP.VGA",	/* X100e and a few others */
-	   "\\_SB.PCI0.AGP.VID",	/* all others */
-	   );				/* R30, R31 */
-
-
 /*************************************************************************
  * ACPI helpers
  */
@@ -563,7 +547,7 @@
 	int quiet;
 
 	if (!*fmt) {
-		printk(TPACPI_ERR "acpi_evalf() called with empty format\n");
+		pr_err("acpi_evalf() called with empty format\n");
 		return 0;
 	}
 
@@ -588,7 +572,7 @@
 			break;
 			/* add more types as needed */
 		default:
-			printk(TPACPI_ERR "acpi_evalf() called "
+			pr_err("acpi_evalf() called "
 			       "with invalid format character '%c'\n", c);
 			va_end(ap);
 			return 0;
@@ -617,13 +601,13 @@
 		break;
 		/* add more types as needed */
 	default:
-		printk(TPACPI_ERR "acpi_evalf() called "
+		pr_err("acpi_evalf() called "
 		       "with invalid format character '%c'\n", res_type);
 		return 0;
 	}
 
 	if (!success && !quiet)
-		printk(TPACPI_ERR "acpi_evalf(%s, %s, ...) failed: %s\n",
+		pr_err("acpi_evalf(%s, %s, ...) failed: %s\n",
 		       method, fmt0, acpi_format_exception(status));
 
 	return success;
@@ -767,8 +751,7 @@
 
 	rc = acpi_bus_get_device(*ibm->acpi->handle, &ibm->acpi->device);
 	if (rc < 0) {
-		printk(TPACPI_ERR "acpi_bus_get_device(%s) failed: %d\n",
-			ibm->name, rc);
+		pr_err("acpi_bus_get_device(%s) failed: %d\n", ibm->name, rc);
 		return -ENODEV;
 	}
 
@@ -781,12 +764,10 @@
 			ibm->acpi->type, dispatch_acpi_notify, ibm);
 	if (ACPI_FAILURE(status)) {
 		if (status == AE_ALREADY_EXISTS) {
-			printk(TPACPI_NOTICE
-			       "another device driver is already "
-			       "handling %s events\n", ibm->name);
+			pr_notice("another device driver is already "
+				  "handling %s events\n", ibm->name);
 		} else {
-			printk(TPACPI_ERR
-			       "acpi_install_notify_handler(%s) failed: %s\n",
+			pr_err("acpi_install_notify_handler(%s) failed: %s\n",
 			       ibm->name, acpi_format_exception(status));
 		}
 		return -ENODEV;
@@ -811,8 +792,7 @@
 
 	ibm->acpi->driver = kzalloc(sizeof(struct acpi_driver), GFP_KERNEL);
 	if (!ibm->acpi->driver) {
-		printk(TPACPI_ERR
-		       "failed to allocate memory for ibm->acpi->driver\n");
+		pr_err("failed to allocate memory for ibm->acpi->driver\n");
 		return -ENOMEM;
 	}
 
@@ -823,7 +803,7 @@
 
 	rc = acpi_bus_register_driver(ibm->acpi->driver);
 	if (rc < 0) {
-		printk(TPACPI_ERR "acpi_bus_register_driver(%s) failed: %d\n",
+		pr_err("acpi_bus_register_driver(%s) failed: %d\n",
 		       ibm->name, rc);
 		kfree(ibm->acpi->driver);
 		ibm->acpi->driver = NULL;
@@ -1081,15 +1061,14 @@
 static void tpacpi_disable_brightness_delay(void)
 {
 	if (acpi_evalf(hkey_handle, NULL, "PWMS", "qvd", 0))
-		printk(TPACPI_NOTICE
-			"ACPI backlight control delay disabled\n");
+		pr_notice("ACPI backlight control delay disabled\n");
 }
 
 static void printk_deprecated_attribute(const char * const what,
 					const char * const details)
 {
 	tpacpi_log_usertask("deprecated sysfs attribute");
-	printk(TPACPI_WARN "WARNING: sysfs attribute %s is deprecated and "
+	pr_warn("WARNING: sysfs attribute %s is deprecated and "
 		"will be removed. %s\n",
 		what, details);
 }
@@ -1264,8 +1243,7 @@
 						&tpacpi_rfk_rfkill_ops,
 						atp_rfk);
 	if (!atp_rfk || !atp_rfk->rfkill) {
-		printk(TPACPI_ERR
-			"failed to allocate memory for rfkill class\n");
+		pr_err("failed to allocate memory for rfkill class\n");
 		kfree(atp_rfk);
 		return -ENOMEM;
 	}
@@ -1275,9 +1253,8 @@
 
 	sw_status = (tp_rfkops->get_status)();
 	if (sw_status < 0) {
-		printk(TPACPI_ERR
-			"failed to read initial state for %s, error %d\n",
-			name, sw_status);
+		pr_err("failed to read initial state for %s, error %d\n",
+		       name, sw_status);
 	} else {
 		sw_state = (sw_status == TPACPI_RFK_RADIO_OFF);
 		if (set_default) {
@@ -1291,9 +1268,7 @@
 
 	res = rfkill_register(atp_rfk->rfkill);
 	if (res < 0) {
-		printk(TPACPI_ERR
-			"failed to register %s rfkill switch: %d\n",
-			name, res);
+		pr_err("failed to register %s rfkill switch: %d\n", name, res);
 		rfkill_destroy(atp_rfk->rfkill);
 		kfree(atp_rfk);
 		return res;
@@ -1301,7 +1276,7 @@
 
 	tpacpi_rfkill_switches[id] = atp_rfk;
 
-	printk(TPACPI_INFO "rfkill switch %s: radio is %sblocked\n",
+	pr_info("rfkill switch %s: radio is %sblocked\n",
 		name, (sw_state || hw_state) ? "" : "un");
 	return 0;
 }
@@ -1825,10 +1800,8 @@
 		 * broken, or really stable to begin with, so it is
 		 * best if the user upgrades the firmware anyway.
 		 */
-		printk(TPACPI_WARN
-			"WARNING: Outdated ThinkPad BIOS/EC firmware\n");
-		printk(TPACPI_WARN
-			"WARNING: This firmware may be missing critical bug "
+		pr_warn("WARNING: Outdated ThinkPad BIOS/EC firmware\n");
+		pr_warn("WARNING: This firmware may be missing critical bug "
 			"fixes and/or important features\n");
 	}
 }
@@ -2117,9 +2090,7 @@
 		(hotkey_all_mask | TPACPI_HKEY_NVRAM_KNOWN_MASK);
 
 	if (wantedmask)
-		printk(TPACPI_NOTICE
-			"required events 0x%08x not enabled!\n",
-			wantedmask);
+		pr_notice("required events 0x%08x not enabled!\n", wantedmask);
 }
 
 /*
@@ -2157,10 +2128,9 @@
 	 * a given event.
 	 */
 	if (!hotkey_mask_get() && !rc && (fwmask & ~hotkey_acpi_mask)) {
-		printk(TPACPI_NOTICE
-		       "asked for hotkey mask 0x%08x, but "
-		       "firmware forced it to 0x%08x\n",
-		       fwmask, hotkey_acpi_mask);
+		pr_notice("asked for hotkey mask 0x%08x, but "
+			  "firmware forced it to 0x%08x\n",
+			  fwmask, hotkey_acpi_mask);
 	}
 
 	if (tpacpi_lifecycle != TPACPI_LIFE_EXITING)
@@ -2184,13 +2154,11 @@
 	    (mask == 0xffff || mask == 0xffffff ||
 	     mask == 0xffffffff)) {
 		tp_warned.hotkey_mask_ff = 1;
-		printk(TPACPI_NOTICE
-		       "setting the hotkey mask to 0x%08x is likely "
-		       "not the best way to go about it\n", mask);
-		printk(TPACPI_NOTICE
-		       "please consider using the driver defaults, "
-		       "and refer to up-to-date thinkpad-acpi "
-		       "documentation\n");
+		pr_notice("setting the hotkey mask to 0x%08x is likely "
+			  "not the best way to go about it\n", mask);
+		pr_notice("please consider using the driver defaults, "
+			  "and refer to up-to-date thinkpad-acpi "
+			  "documentation\n");
 	}
 
 	/* Try to enable what the user asked for, plus whatever we need.
@@ -2574,8 +2542,7 @@
 					NULL, TPACPI_NVRAM_KTHREAD_NAME);
 			if (IS_ERR(tpacpi_hotkey_task)) {
 				tpacpi_hotkey_task = NULL;
-				printk(TPACPI_ERR
-				       "could not create kernel thread "
+				pr_err("could not create kernel thread "
 				       "for hotkey polling\n");
 			}
 		}
@@ -2583,11 +2550,10 @@
 		hotkey_poll_stop_sync();
 		if (may_warn && (poll_driver_mask || poll_user_mask) &&
 		    hotkey_poll_freq == 0) {
-			printk(TPACPI_NOTICE
-				"hot keys 0x%08x and/or events 0x%08x "
-				"require polling, which is currently "
-				"disabled\n",
-				poll_user_mask, poll_driver_mask);
+			pr_notice("hot keys 0x%08x and/or events 0x%08x "
+				  "require polling, which is currently "
+				  "disabled\n",
+				  poll_user_mask, poll_driver_mask);
 		}
 	}
 }
@@ -2811,13 +2777,13 @@
 	mutex_unlock(&hotkey_mutex);
 
 	if (rc < 0)
-		printk(TPACPI_ERR "hotkey_source_mask: failed to update the"
-			"firmware event mask!\n");
+		pr_err("hotkey_source_mask: "
+		       "failed to update the firmware event mask!\n");
 
 	if (r_ev)
-		printk(TPACPI_NOTICE "hotkey_source_mask: "
-			"some important events were disabled: "
-			"0x%04x\n", r_ev);
+		pr_notice("hotkey_source_mask: "
+			  "some important events were disabled: 0x%04x\n",
+			  r_ev);
 
 	tpacpi_disclose_usertask("hotkey_source_mask", "set to 0x%08lx\n", t);
 
@@ -3048,8 +3014,7 @@
 	if (((tp_features.hotkey_mask &&
 	      hotkey_mask_set(hotkey_orig_mask)) |
 	     hotkey_status_set(false)) != 0)
-		printk(TPACPI_ERR
-		       "failed to restore hot key mask "
+		pr_err("failed to restore hot key mask "
 		       "to BIOS defaults\n");
 }
 
@@ -3288,10 +3253,9 @@
 	   for HKEY interface version 0x100 */
 	if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
 		if ((hkeyv >> 8) != 1) {
-			printk(TPACPI_ERR "unknown version of the "
-			       "HKEY interface: 0x%x\n", hkeyv);
-			printk(TPACPI_ERR "please report this to %s\n",
-			       TPACPI_MAIL);
+			pr_err("unknown version of the HKEY interface: 0x%x\n",
+			       hkeyv);
+			pr_err("please report this to %s\n", TPACPI_MAIL);
 		} else {
 			/*
 			 * MHKV 0x100 in A31, R40, R40e,
@@ -3304,8 +3268,7 @@
 			/* Paranoia check AND init hotkey_all_mask */
 			if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
 					"MHKA", "qd")) {
-				printk(TPACPI_ERR
-				       "missing MHKA handler, "
+				pr_err("missing MHKA handler, "
 				       "please report this to %s\n",
 				       TPACPI_MAIL);
 				/* Fallback: pre-init for FN+F3,F4,F12 */
@@ -3343,16 +3306,14 @@
 	if (dbg_wlswemul) {
 		tp_features.hotkey_wlsw = 1;
 		radiosw_state = !!tpacpi_wlsw_emulstate;
-		printk(TPACPI_INFO
-			"radio switch emulation enabled\n");
+		pr_info("radio switch emulation enabled\n");
 	} else
 #endif
 	/* Not all thinkpads have a hardware radio switch */
 	if (acpi_evalf(hkey_handle, &status, "WLSW", "qd")) {
 		tp_features.hotkey_wlsw = 1;
 		radiosw_state = !!status;
-		printk(TPACPI_INFO
-			"radio switch found; radios are %s\n",
+		pr_info("radio switch found; radios are %s\n",
 			enabled(status, 0));
 	}
 	if (tp_features.hotkey_wlsw)
@@ -3363,8 +3324,7 @@
 	if (!res && acpi_evalf(hkey_handle, &status, "MHKG", "qd")) {
 		tp_features.hotkey_tablet = 1;
 		tabletsw_state = !!(status & TP_HOTKEY_TABLET_MASK);
-		printk(TPACPI_INFO
-			"possible tablet mode switch found; "
+		pr_info("possible tablet mode switch found; "
 			"ThinkPad in %s mode\n",
 			(tabletsw_state) ? "tablet" : "laptop");
 		res = add_to_attr_set(hotkey_dev_attributes,
@@ -3382,8 +3342,7 @@
 	hotkey_keycode_map = kmalloc(TPACPI_HOTKEY_MAP_SIZE,
 					GFP_KERNEL);
 	if (!hotkey_keycode_map) {
-		printk(TPACPI_ERR
-			"failed to allocate memory for key map\n");
+		pr_err("failed to allocate memory for key map\n");
 		res = -ENOMEM;
 		goto err_exit;
 	}
@@ -3426,13 +3385,11 @@
 	 * userspace. tpacpi_detect_brightness_capabilities() must have
 	 * been called before this point  */
 	if (tp_features.bright_acpimode && acpi_video_backlight_support()) {
-		printk(TPACPI_INFO
-		       "This ThinkPad has standard ACPI backlight "
-		       "brightness control, supported by the ACPI "
-		       "video driver\n");
-		printk(TPACPI_NOTICE
-		       "Disabling thinkpad-acpi brightness events "
-		       "by default...\n");
+		pr_info("This ThinkPad has standard ACPI backlight "
+			"brightness control, supported by the ACPI "
+			"video driver\n");
+		pr_notice("Disabling thinkpad-acpi brightness events "
+			  "by default...\n");
 
 		/* Disable brightness up/down on Lenovo thinkpads when
 		 * ACPI is handling them, otherwise it is plain impossible
@@ -3539,8 +3496,7 @@
 
 	case TP_HKEY_EV_WKUP_S3_BATLOW: /* Battery on critical low level/S3 */
 	case TP_HKEY_EV_WKUP_S4_BATLOW: /* Battery on critical low level/S4 */
-		printk(TPACPI_ALERT
-			"EMERGENCY WAKEUP: battery almost empty\n");
+		pr_alert("EMERGENCY WAKEUP: battery almost empty\n");
 		/* how to auto-heal: */
 		/* 2313: woke up from S3, go to S4/S5 */
 		/* 2413: woke up from S4, go to S5 */
@@ -3551,9 +3507,7 @@
 	}
 
 	if (hotkey_wakeup_reason != TP_ACPI_WAKEUP_NONE) {
-		printk(TPACPI_INFO
-		       "woke up due to a hot-unplug "
-		       "request...\n");
+		pr_info("woke up due to a hot-unplug request...\n");
 		hotkey_wakeup_reason_notify_change();
 	}
 	return true;
@@ -3605,37 +3559,31 @@
 
 	switch (hkey) {
 	case TP_HKEY_EV_THM_TABLE_CHANGED:
-		printk(TPACPI_INFO
-			"EC reports that Thermal Table has changed\n");
+		pr_info("EC reports that Thermal Table has changed\n");
 		/* recommended action: do nothing, we don't have
 		 * Lenovo ATM information */
 		return true;
 	case TP_HKEY_EV_ALARM_BAT_HOT:
-		printk(TPACPI_CRIT
-			"THERMAL ALARM: battery is too hot!\n");
+		pr_crit("THERMAL ALARM: battery is too hot!\n");
 		/* recommended action: warn user through gui */
 		break;
 	case TP_HKEY_EV_ALARM_BAT_XHOT:
-		printk(TPACPI_ALERT
-			"THERMAL EMERGENCY: battery is extremely hot!\n");
+		pr_alert("THERMAL EMERGENCY: battery is extremely hot!\n");
 		/* recommended action: immediate sleep/hibernate */
 		break;
 	case TP_HKEY_EV_ALARM_SENSOR_HOT:
-		printk(TPACPI_CRIT
-			"THERMAL ALARM: "
+		pr_crit("THERMAL ALARM: "
 			"a sensor reports something is too hot!\n");
 		/* recommended action: warn user through gui, that */
 		/* some internal component is too hot */
 		break;
 	case TP_HKEY_EV_ALARM_SENSOR_XHOT:
-		printk(TPACPI_ALERT
-			"THERMAL EMERGENCY: "
-			"a sensor reports something is extremely hot!\n");
+		pr_alert("THERMAL EMERGENCY: "
+			 "a sensor reports something is extremely hot!\n");
 		/* recommended action: immediate sleep/hibernate */
 		break;
 	default:
-		printk(TPACPI_ALERT
-			 "THERMAL ALERT: unknown thermal alarm received\n");
+		pr_alert("THERMAL ALERT: unknown thermal alarm received\n");
 		known = false;
 	}
 
@@ -3652,8 +3600,7 @@
 	bool known_ev;
 
 	if (event != 0x80) {
-		printk(TPACPI_ERR
-		       "unknown HKEY notification event %d\n", event);
+		pr_err("unknown HKEY notification event %d\n", event);
 		/* forward it to userspace, maybe it knows how to handle it */
 		acpi_bus_generate_netlink_event(
 					ibm->acpi->device->pnp.device_class,
@@ -3664,7 +3611,7 @@
 
 	while (1) {
 		if (!acpi_evalf(hkey_handle, &hkey, "MHKP", "d")) {
-			printk(TPACPI_ERR "failed to retrieve HKEY event\n");
+			pr_err("failed to retrieve HKEY event\n");
 			return;
 		}
 
@@ -3692,8 +3639,7 @@
 			switch (hkey) {
 			case TP_HKEY_EV_BAYEJ_ACK:
 				hotkey_autosleep_ack = 1;
-				printk(TPACPI_INFO
-				       "bay ejected\n");
+				pr_info("bay ejected\n");
 				hotkey_wakeup_hotunplug_complete_notify_change();
 				known_ev = true;
 				break;
@@ -3709,8 +3655,7 @@
 			/* 0x4000-0x4FFF: dock-related wakeups */
 			if (hkey == TP_HKEY_EV_UNDOCK_ACK) {
 				hotkey_autosleep_ack = 1;
-				printk(TPACPI_INFO
-				       "undocked\n");
+				pr_info("undocked\n");
 				hotkey_wakeup_hotunplug_complete_notify_change();
 				known_ev = true;
 			} else {
@@ -3741,11 +3686,9 @@
 			known_ev = false;
 		}
 		if (!known_ev) {
-			printk(TPACPI_NOTICE
-			       "unhandled HKEY event 0x%04x\n", hkey);
-			printk(TPACPI_NOTICE
-			       "please report the conditions when this "
-			       "event happened to %s\n", TPACPI_MAIL);
+			pr_notice("unhandled HKEY event 0x%04x\n", hkey);
+			pr_notice("please report the conditions when this "
+				  "event happened to %s\n", TPACPI_MAIL);
 		}
 
 		/* Legacy events */
@@ -3778,8 +3721,7 @@
 
 	if (hotkey_status_set(true) < 0 ||
 	    hotkey_mask_set(hotkey_acpi_mask) < 0)
-		printk(TPACPI_ERR
-		       "error while attempting to reset the event "
+		pr_err("error while attempting to reset the event "
 		       "firmware interface\n");
 
 	tpacpi_send_radiosw_update();
@@ -3824,14 +3766,12 @@
 {
 	tpacpi_log_usertask("procfs hotkey enable/disable");
 	if (!WARN((tpacpi_lifecycle == TPACPI_LIFE_RUNNING || !enable),
-			TPACPI_WARN
-			"hotkey enable/disable functionality has been "
-			"removed from the driver.  Hotkeys are always "
-			"enabled\n"))
-		printk(TPACPI_ERR
-			"Please remove the hotkey=enable module "
-			"parameter, it is deprecated.  Hotkeys are always "
-			"enabled\n");
+		  pr_fmt("hotkey enable/disable functionality has been "
+			 "removed from the driver.  "
+			 "Hotkeys are always enabled.\n")))
+		pr_err("Please remove the hotkey=enable module "
+		       "parameter, it is deprecated.  "
+		       "Hotkeys are always enabled.\n");
 }
 
 static int hotkey_write(char *buf)
@@ -4011,8 +3951,7 @@
 	/* Order firmware to save current state to NVRAM */
 	if (!acpi_evalf(NULL, NULL, "\\BLTH", "vd",
 			TP_ACPI_BLTH_SAVE_STATE))
-		printk(TPACPI_NOTICE
-			"failed to save bluetooth state to NVRAM\n");
+		pr_notice("failed to save bluetooth state to NVRAM\n");
 	else
 		vdbg_printk(TPACPI_DBG_RFKILL,
 			"bluestooth state saved to NVRAM\n");
@@ -4051,8 +3990,7 @@
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_bluetoothemul) {
 		tp_features.bluetooth = 1;
-		printk(TPACPI_INFO
-			"bluetooth switch emulation enabled\n");
+		pr_info("bluetooth switch emulation enabled\n");
 	} else
 #endif
 	if (tp_features.bluetooth &&
@@ -4203,8 +4141,7 @@
 	/* Order firmware to save current state to NVRAM */
 	if (!acpi_evalf(NULL, NULL, "\\WGSV", "vd",
 			TP_ACPI_WGSV_SAVE_STATE))
-		printk(TPACPI_NOTICE
-			"failed to save WWAN state to NVRAM\n");
+		pr_notice("failed to save WWAN state to NVRAM\n");
 	else
 		vdbg_printk(TPACPI_DBG_RFKILL,
 			"WWAN state saved to NVRAM\n");
@@ -4241,8 +4178,7 @@
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_wwanemul) {
 		tp_features.wan = 1;
-		printk(TPACPI_INFO
-			"wwan switch emulation enabled\n");
+		pr_info("wwan switch emulation enabled\n");
 	} else
 #endif
 	if (tp_features.wan &&
@@ -4382,8 +4318,7 @@
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_uwbemul) {
 		tp_features.uwb = 1;
-		printk(TPACPI_INFO
-			"uwb switch emulation enabled\n");
+		pr_info("uwb switch emulation enabled\n");
 	} else
 #endif
 	if (tp_features.uwb &&
@@ -4444,6 +4379,15 @@
 static int video_autosw_get(void);
 static int video_autosw_set(int enable);
 
+TPACPI_HANDLE(vid, root,
+	      "\\_SB.PCI.AGP.VGA",	/* 570 */
+	      "\\_SB.PCI0.AGP0.VID0",	/* 600e/x, 770x */
+	      "\\_SB.PCI0.VID0",	/* 770e */
+	      "\\_SB.PCI0.VID",		/* A21e, G4x, R50e, X30, X40 */
+	      "\\_SB.PCI0.AGP.VGA",	/* X100e and a few others */
+	      "\\_SB.PCI0.AGP.VID",	/* all others */
+	);				/* R30, R31 */
+
 TPACPI_HANDLE(vid2, root, "\\_SB.PCI0.AGPB.VID");	/* G41 */
 
 static int __init video_init(struct ibm_init_struct *iibm)
@@ -4487,7 +4431,7 @@
 	dbg_printk(TPACPI_DBG_EXIT,
 		   "restoring original video autoswitch mode\n");
 	if (video_autosw_set(video_orig_autosw))
-		printk(TPACPI_ERR "error while trying to restore original "
+		pr_err("error while trying to restore original "
 			"video autoswitch mode\n");
 }
 
@@ -4560,8 +4504,7 @@
 		res = acpi_evalf(vid_handle, NULL,
 				 "ASWT", "vdd", status * 0x100, 0);
 		if (!autosw && video_autosw_set(autosw)) {
-			printk(TPACPI_ERR
-			       "video auto-switch left enabled due to error\n");
+			pr_err("video auto-switch left enabled due to error\n");
 			return -EIO;
 		}
 		break;
@@ -4630,8 +4573,7 @@
 		return -ENOSYS;
 	}
 	if (!autosw && video_autosw_set(autosw)) {
-		printk(TPACPI_ERR
-		       "video auto-switch left enabled due to error\n");
+		pr_err("video auto-switch left enabled due to error\n");
 		return -EIO;
 	}
 
@@ -5348,7 +5290,7 @@
 	tpacpi_leds = kzalloc(sizeof(*tpacpi_leds) * TPACPI_LED_NUMLEDS,
 			      GFP_KERNEL);
 	if (!tpacpi_leds) {
-		printk(TPACPI_ERR "Out of memory for LED data\n");
+		pr_err("Out of memory for LED data\n");
 		return -ENOMEM;
 	}
 
@@ -5367,9 +5309,8 @@
 	}
 
 #ifdef CONFIG_THINKPAD_ACPI_UNSAFE_LEDS
-	printk(TPACPI_NOTICE
-		"warning: userspace override of important "
-		"firmware LEDs is enabled\n");
+	pr_notice("warning: userspace override of important "
+		  "firmware LEDs is enabled\n");
 #endif
 	return 0;
 }
@@ -5639,17 +5580,16 @@
 	if (n <= 0)
 		return;
 
-	printk(TPACPI_NOTICE
-		"temperatures (Celsius):");
+	pr_notice("temperatures (Celsius):");
 
 	for (i = 0; i < n; i++) {
 		if (t.temp[i] != TPACPI_THERMAL_SENSOR_NA)
-			printk(KERN_CONT " %d", (int)(t.temp[i] / 1000));
+			pr_cont(" %d", (int)(t.temp[i] / 1000));
 		else
-			printk(KERN_CONT " N/A");
+			pr_cont(" N/A");
 	}
 
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 }
 
 /* sysfs temp##_input -------------------------------------------------- */
@@ -5769,14 +5709,12 @@
 		if (ta1 == 0) {
 			/* This is sheer paranoia, but we handle it anyway */
 			if (acpi_tmp7) {
-				printk(TPACPI_ERR
-				       "ThinkPad ACPI EC access misbehaving, "
+				pr_err("ThinkPad ACPI EC access misbehaving, "
 				       "falling back to ACPI TMPx access "
 				       "mode\n");
 				thermal_read_mode = TPACPI_THERMAL_ACPI_TMP07;
 			} else {
-				printk(TPACPI_ERR
-				       "ThinkPad ACPI EC access misbehaving, "
+				pr_err("ThinkPad ACPI EC access misbehaving, "
 				       "disabling thermal sensors access\n");
 				thermal_read_mode = TPACPI_THERMAL_NONE;
 			}
@@ -6129,8 +6067,8 @@
 	if (ACPI_SUCCESS(acpi_evaluate_object(handle, "_BCL", NULL, &buffer))) {
 		obj = (union acpi_object *)buffer.pointer;
 		if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) {
-			printk(TPACPI_ERR "Unknown _BCL data, "
-			       "please report this to %s\n", TPACPI_MAIL);
+			pr_err("Unknown _BCL data, please report this to %s\n",
+			       TPACPI_MAIL);
 			rc = 0;
 		} else {
 			rc = obj->package.count;
@@ -6214,18 +6152,15 @@
 	switch (b) {
 	case 16:
 		bright_maxlvl = 15;
-		printk(TPACPI_INFO
-		       "detected a 16-level brightness capable ThinkPad\n");
+		pr_info("detected a 16-level brightness capable ThinkPad\n");
 		break;
 	case 8:
 	case 0:
 		bright_maxlvl = 7;
-		printk(TPACPI_INFO
-		       "detected a 8-level brightness capable ThinkPad\n");
+		pr_info("detected a 8-level brightness capable ThinkPad\n");
 		break;
 	default:
-		printk(TPACPI_ERR
-		       "Unsupported brightness interface, "
+		pr_err("Unsupported brightness interface, "
 		       "please contact %s\n", TPACPI_MAIL);
 		tp_features.bright_unkfw = 1;
 		bright_maxlvl = b - 1;
@@ -6260,22 +6195,19 @@
 
 	if (acpi_video_backlight_support()) {
 		if (brightness_enable > 1) {
-			printk(TPACPI_INFO
-			       "Standard ACPI backlight interface "
-			       "available, not loading native one.\n");
+			pr_info("Standard ACPI backlight interface "
+				"available, not loading native one\n");
 			return 1;
 		} else if (brightness_enable == 1) {
-			printk(TPACPI_WARN
-				"Cannot enable backlight brightness support, "
+			pr_warn("Cannot enable backlight brightness support, "
 				"ACPI is already handling it.  Refer to the "
-				"acpi_backlight kernel parameter\n");
+				"acpi_backlight kernel parameter.\n");
 			return 1;
 		}
 	} else if (tp_features.bright_acpimode && brightness_enable > 1) {
-		printk(TPACPI_NOTICE
-			"Standard ACPI backlight interface not "
-			"available, thinkpad_acpi native "
-			"brightness control enabled\n");
+		pr_notice("Standard ACPI backlight interface not "
+			  "available, thinkpad_acpi native "
+			  "brightness control enabled\n");
 	}
 
 	/*
@@ -6319,19 +6251,17 @@
 	if (IS_ERR(ibm_backlight_device)) {
 		int rc = PTR_ERR(ibm_backlight_device);
 		ibm_backlight_device = NULL;
-		printk(TPACPI_ERR "Could not register backlight device\n");
+		pr_err("Could not register backlight device\n");
 		return rc;
 	}
 	vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT,
 			"brightness is supported\n");
 
 	if (quirks & TPACPI_BRGHT_Q_ASK) {
-		printk(TPACPI_NOTICE
-			"brightness: will use unverified default: "
-			"brightness_mode=%d\n", brightness_mode);
-		printk(TPACPI_NOTICE
-			"brightness: please report to %s whether it works well "
-			"or not on your ThinkPad\n", TPACPI_MAIL);
+		pr_notice("brightness: will use unverified default: "
+			  "brightness_mode=%d\n", brightness_mode);
+		pr_notice("brightness: please report to %s whether it works well "
+			  "or not on your ThinkPad\n", TPACPI_MAIL);
 	}
 
 	/* Added by mistake in early 2007.  Probably useless, but it could
@@ -6804,8 +6734,7 @@
 	rc = snd_card_create(alsa_index, alsa_id, THIS_MODULE,
 			    sizeof(struct tpacpi_alsa_data), &card);
 	if (rc < 0 || !card) {
-		printk(TPACPI_ERR
-			"Failed to create ALSA card structures: %d\n", rc);
+		pr_err("Failed to create ALSA card structures: %d\n", rc);
 		return 1;
 	}
 
@@ -6839,9 +6768,8 @@
 		ctl_vol = snd_ctl_new1(&volume_alsa_control_vol, NULL);
 		rc = snd_ctl_add(card, ctl_vol);
 		if (rc < 0) {
-			printk(TPACPI_ERR
-				"Failed to create ALSA volume control: %d\n",
-				rc);
+			pr_err("Failed to create ALSA volume control: %d\n",
+			       rc);
 			goto err_exit;
 		}
 		data->ctl_vol_id = &ctl_vol->id;
@@ -6850,8 +6778,7 @@
 	ctl_mute = snd_ctl_new1(&volume_alsa_control_mute, NULL);
 	rc = snd_ctl_add(card, ctl_mute);
 	if (rc < 0) {
-		printk(TPACPI_ERR "Failed to create ALSA mute control: %d\n",
-			rc);
+		pr_err("Failed to create ALSA mute control: %d\n", rc);
 		goto err_exit;
 	}
 	data->ctl_mute_id = &ctl_mute->id;
@@ -6859,7 +6786,7 @@
 	snd_card_set_dev(card, &tpacpi_pdev->dev);
 	rc = snd_card_register(card);
 	if (rc < 0) {
-		printk(TPACPI_ERR "Failed to register ALSA card: %d\n", rc);
+		pr_err("Failed to register ALSA card: %d\n", rc);
 		goto err_exit;
 	}
 
@@ -6915,9 +6842,8 @@
 		return -EINVAL;
 
 	if (volume_mode == TPACPI_VOL_MODE_UCMS_STEP) {
-		printk(TPACPI_ERR
-			"UCMS step volume mode not implemented, "
-			"please contact %s\n", TPACPI_MAIL);
+		pr_err("UCMS step volume mode not implemented, "
+		       "please contact %s\n", TPACPI_MAIL);
 		return 1;
 	}
 
@@ -6981,13 +6907,11 @@
 
 	rc = volume_create_alsa_mixer();
 	if (rc) {
-		printk(TPACPI_ERR
-			"Could not create the ALSA mixer interface\n");
+		pr_err("Could not create the ALSA mixer interface\n");
 		return rc;
 	}
 
-	printk(TPACPI_INFO
-		"Console audio control enabled, mode: %s\n",
+	pr_info("Console audio control enabled, mode: %s\n",
 		(volume_control_allowed) ?
 			"override (read/write)" :
 			"monitor (read only)");
@@ -7049,12 +6973,10 @@
 	if (!volume_control_allowed && tpacpi_lifecycle != TPACPI_LIFE_INIT) {
 		if (unlikely(!tp_warned.volume_ctrl_forbidden)) {
 			tp_warned.volume_ctrl_forbidden = 1;
-			printk(TPACPI_NOTICE
-				"Console audio control in monitor mode, "
-				"changes are not allowed.\n");
-			printk(TPACPI_NOTICE
-				"Use the volume_control=1 module parameter "
-				"to enable volume control\n");
+			pr_notice("Console audio control in monitor mode, "
+				  "changes are not allowed\n");
+			pr_notice("Use the volume_control=1 module parameter "
+				  "to enable volume control\n");
 		}
 		return -EPERM;
 	}
@@ -7129,8 +7051,7 @@
 
 static int __init volume_init(struct ibm_init_struct *iibm)
 {
-	printk(TPACPI_INFO
-		"volume: disabled as there is no ALSA support in this kernel\n");
+	pr_info("volume: disabled as there is no ALSA support in this kernel\n");
 
 	return 1;
 }
@@ -7337,9 +7258,8 @@
 static void fan_quirk1_setup(void)
 {
 	if (fan_control_initial_status == 0x07) {
-		printk(TPACPI_NOTICE
-		       "fan_init: initial fan status is unknown, "
-		       "assuming it is in auto mode\n");
+		pr_notice("fan_init: initial fan status is unknown, "
+			  "assuming it is in auto mode\n");
 		tp_features.fan_ctrl_status_undef = 1;
 	}
 }
@@ -7726,8 +7646,7 @@
 		if (!queue_delayed_work(tpacpi_wq, &fan_watchdog_task,
 				msecs_to_jiffies(fan_watchdog_maxinterval
 						 * 1000))) {
-			printk(TPACPI_ERR
-			       "failed to queue the fan watchdog, "
+			pr_err("failed to queue the fan watchdog, "
 			       "watchdog will not trigger\n");
 		}
 	} else
@@ -7741,11 +7660,11 @@
 	if (tpacpi_lifecycle != TPACPI_LIFE_RUNNING)
 		return;
 
-	printk(TPACPI_NOTICE "fan watchdog: enabling fan\n");
+	pr_notice("fan watchdog: enabling fan\n");
 	rc = fan_set_enable();
 	if (rc < 0) {
-		printk(TPACPI_ERR "fan watchdog: error %d while enabling fan, "
-			"will try again later...\n", -rc);
+		pr_err("fan watchdog: error %d while enabling fan, "
+		       "will try again later...\n", -rc);
 		/* reschedule for later */
 		fan_watchdog_reset();
 	}
@@ -8049,8 +7968,7 @@
 					"secondary fan support enabled\n");
 			}
 		} else {
-			printk(TPACPI_ERR
-			       "ThinkPad ACPI EC access misbehaving, "
+			pr_err("ThinkPad ACPI EC access misbehaving, "
 			       "fan status and control unavailable\n");
 			return 1;
 		}
@@ -8150,9 +8068,8 @@
 	fan_control_resume_level = 0;
 	rc = fan_get_status_safe(&fan_control_resume_level);
 	if (rc < 0)
-		printk(TPACPI_NOTICE
-			"failed to read fan level for later "
-			"restore during resume: %d\n", rc);
+		pr_notice("failed to read fan level for later "
+			  "restore during resume: %d\n", rc);
 
 	/* if it is undefined, don't attempt to restore it.
 	 * KEEP THIS LAST */
@@ -8207,13 +8124,11 @@
 		return;
 	}
 	if (do_set) {
-		printk(TPACPI_NOTICE
-			"restoring fan level to 0x%02x\n",
-			fan_control_resume_level);
+		pr_notice("restoring fan level to 0x%02x\n",
+			  fan_control_resume_level);
 		rc = fan_set_level_safe(fan_control_resume_level);
 		if (rc < 0)
-			printk(TPACPI_NOTICE
-				"failed to restore fan level: %d\n", rc);
+			pr_notice("failed to restore fan level: %d\n", rc);
 	}
 }
 
@@ -8305,8 +8220,8 @@
 
 	*rc = fan_set_level_safe(level);
 	if (*rc == -ENXIO)
-		printk(TPACPI_ERR "level command accepted for unsupported "
-		       "access mode %d", fan_control_access_mode);
+		pr_err("level command accepted for unsupported access mode %d\n",
+		       fan_control_access_mode);
 	else if (!*rc)
 		tpacpi_disclose_usertask("procfs fan",
 			"set level to %d\n", level);
@@ -8321,8 +8236,8 @@
 
 	*rc = fan_set_enable();
 	if (*rc == -ENXIO)
-		printk(TPACPI_ERR "enable command accepted for unsupported "
-		       "access mode %d", fan_control_access_mode);
+		pr_err("enable command accepted for unsupported access mode %d\n",
+		       fan_control_access_mode);
 	else if (!*rc)
 		tpacpi_disclose_usertask("procfs fan", "enable\n");
 
@@ -8336,8 +8251,8 @@
 
 	*rc = fan_set_disable();
 	if (*rc == -ENXIO)
-		printk(TPACPI_ERR "disable command accepted for unsupported "
-		       "access mode %d", fan_control_access_mode);
+		pr_err("disable command accepted for unsupported access mode %d\n",
+		       fan_control_access_mode);
 	else if (!*rc)
 		tpacpi_disclose_usertask("procfs fan", "disable\n");
 
@@ -8356,8 +8271,8 @@
 
 	*rc = fan_set_speed(speed);
 	if (*rc == -ENXIO)
-		printk(TPACPI_ERR "speed command accepted for unsupported "
-		       "access mode %d", fan_control_access_mode);
+		pr_err("speed command accepted for unsupported access mode %d\n",
+		       fan_control_access_mode);
 	else if (!*rc)
 		tpacpi_disclose_usertask("procfs fan",
 			"set speed to %d\n", speed);
@@ -8560,8 +8475,8 @@
 		if (ibm->acpi->notify) {
 			ret = setup_acpi_notify(ibm);
 			if (ret == -ENODEV) {
-				printk(TPACPI_NOTICE "disabling subdriver %s\n",
-					ibm->name);
+				pr_notice("disabling subdriver %s\n",
+					  ibm->name);
 				ret = 0;
 				goto err_out;
 			}
@@ -8583,8 +8498,7 @@
 		entry = proc_create_data(ibm->name, mode, proc_dir,
 					 &dispatch_proc_fops, ibm);
 		if (!entry) {
-			printk(TPACPI_ERR "unable to create proc entry %s\n",
-			       ibm->name);
+			pr_err("unable to create proc entry %s\n", ibm->name);
 			ret = -ENODEV;
 			goto err_out;
 		}
@@ -8683,13 +8597,11 @@
 				tp->ec_release = (ec_fw_string[4] << 8)
 						| ec_fw_string[5];
 			} else {
-				printk(TPACPI_NOTICE
-					"ThinkPad firmware release %s "
-					"doesn't match the known patterns\n",
-					ec_fw_string);
-				printk(TPACPI_NOTICE
-					"please report this to %s\n",
-					TPACPI_MAIL);
+				pr_notice("ThinkPad firmware release %s "
+					  "doesn't match the known patterns\n",
+					  ec_fw_string);
+				pr_notice("please report this to %s\n",
+					  TPACPI_MAIL);
 			}
 			break;
 		}
@@ -8733,8 +8645,7 @@
 	tpacpi_acpi_handle_locate("ec", TPACPI_ACPI_EC_HID, &ec_handle);
 	if (!ec_handle) {
 		if (is_thinkpad)
-			printk(TPACPI_ERR
-				"Not yet supported ThinkPad detected!\n");
+			pr_err("Not yet supported ThinkPad detected!\n");
 		return -ENODEV;
 	}
 
@@ -8746,10 +8657,10 @@
 
 static void __init thinkpad_acpi_init_banner(void)
 {
-	printk(TPACPI_INFO "%s v%s\n", TPACPI_DESC, TPACPI_VERSION);
-	printk(TPACPI_INFO "%s\n", TPACPI_URL);
+	pr_info("%s v%s\n", TPACPI_DESC, TPACPI_VERSION);
+	pr_info("%s\n", TPACPI_URL);
 
-	printk(TPACPI_INFO "ThinkPad BIOS %s, EC %s\n",
+	pr_info("ThinkPad BIOS %s, EC %s\n",
 		(thinkpad_id.bios_version_str) ?
 			thinkpad_id.bios_version_str : "unknown",
 		(thinkpad_id.ec_version_str) ?
@@ -8758,7 +8669,7 @@
 	BUG_ON(!thinkpad_id.vendor);
 
 	if (thinkpad_id.model_str)
-		printk(TPACPI_INFO "%s %s, model %s\n",
+		pr_info("%s %s, model %s\n",
 			(thinkpad_id.vendor == PCI_VENDOR_ID_IBM) ?
 				"IBM" : ((thinkpad_id.vendor ==
 						PCI_VENDOR_ID_LENOVO) ?
@@ -9024,8 +8935,7 @@
 
 	ret = get_thinkpad_model_data(&thinkpad_id);
 	if (ret) {
-		printk(TPACPI_ERR
-			"unable to get DMI data: %d\n", ret);
+		pr_err("unable to get DMI data: %d\n", ret);
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
@@ -9051,16 +8961,14 @@
 
 	proc_dir = proc_mkdir(TPACPI_PROC_DIR, acpi_root_dir);
 	if (!proc_dir) {
-		printk(TPACPI_ERR
-		       "unable to create proc dir " TPACPI_PROC_DIR);
+		pr_err("unable to create proc dir " TPACPI_PROC_DIR "\n");
 		thinkpad_acpi_module_exit();
 		return -ENODEV;
 	}
 
 	ret = platform_driver_register(&tpacpi_pdriver);
 	if (ret) {
-		printk(TPACPI_ERR
-		       "unable to register main platform driver\n");
+		pr_err("unable to register main platform driver\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
@@ -9068,8 +8976,7 @@
 
 	ret = platform_driver_register(&tpacpi_hwmon_pdriver);
 	if (ret) {
-		printk(TPACPI_ERR
-		       "unable to register hwmon platform driver\n");
+		pr_err("unable to register hwmon platform driver\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
@@ -9082,8 +8989,7 @@
 					&tpacpi_hwmon_pdriver.driver);
 	}
 	if (ret) {
-		printk(TPACPI_ERR
-		       "unable to create sysfs driver attributes\n");
+		pr_err("unable to create sysfs driver attributes\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
@@ -9096,7 +9002,7 @@
 	if (IS_ERR(tpacpi_pdev)) {
 		ret = PTR_ERR(tpacpi_pdev);
 		tpacpi_pdev = NULL;
-		printk(TPACPI_ERR "unable to register platform device\n");
+		pr_err("unable to register platform device\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
@@ -9106,16 +9012,14 @@
 	if (IS_ERR(tpacpi_sensors_pdev)) {
 		ret = PTR_ERR(tpacpi_sensors_pdev);
 		tpacpi_sensors_pdev = NULL;
-		printk(TPACPI_ERR
-		       "unable to register hwmon platform device\n");
+		pr_err("unable to register hwmon platform device\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
 	ret = device_create_file(&tpacpi_sensors_pdev->dev,
 				 &dev_attr_thinkpad_acpi_pdev_name);
 	if (ret) {
-		printk(TPACPI_ERR
-		       "unable to create sysfs hwmon device attributes\n");
+		pr_err("unable to create sysfs hwmon device attributes\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
@@ -9124,14 +9028,14 @@
 	if (IS_ERR(tpacpi_hwmon)) {
 		ret = PTR_ERR(tpacpi_hwmon);
 		tpacpi_hwmon = NULL;
-		printk(TPACPI_ERR "unable to register hwmon device\n");
+		pr_err("unable to register hwmon device\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
 	mutex_init(&tpacpi_inputdev_send_mutex);
 	tpacpi_inputdev = input_allocate_device();
 	if (!tpacpi_inputdev) {
-		printk(TPACPI_ERR "unable to allocate input device\n");
+		pr_err("unable to allocate input device\n");
 		thinkpad_acpi_module_exit();
 		return -ENOMEM;
 	} else {
@@ -9163,7 +9067,7 @@
 
 	ret = input_register_device(tpacpi_inputdev);
 	if (ret < 0) {
-		printk(TPACPI_ERR "unable to register input device\n");
+		pr_err("unable to register input device\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	} else {

diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c
index 1d07d6d..4c20447 100644
--- a/drivers/platform/x86/topstar-laptop.c
+++ b/drivers/platform/x86/topstar-laptop.c

@@ -194,7 +194,7 @@
 	if (ret < 0)
 		return ret;
 
-	printk(KERN_INFO "Topstar Laptop ACPI extras driver loaded\n");
+	pr_info("ACPI extras driver loaded\n");
 
 	return 0;
 }

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 63f42a2..cb009b2 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c

@@ -35,6 +35,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #define TOSHIBA_ACPI_VERSION	"0.19"
 #define PROC_INTERFACE_VERSION	1
 
@@ -60,11 +62,6 @@
 MODULE_DESCRIPTION("Toshiba Laptop ACPI Extras Driver");
 MODULE_LICENSE("GPL");
 
-#define MY_LOGPREFIX "toshiba_acpi: "
-#define MY_ERR KERN_ERR MY_LOGPREFIX
-#define MY_NOTICE KERN_NOTICE MY_LOGPREFIX
-#define MY_INFO KERN_INFO MY_LOGPREFIX
-
 /* Toshiba ACPI method paths */
 #define METHOD_LCD_BRIGHTNESS	"\\_SB_.PCI0.VGA_.LCD_._BCM"
 #define TOSH_INTERFACE_1	"\\_SB_.VALD"
@@ -301,7 +298,7 @@
 	in[0] = 0xf100;
 	status = hci_raw(in, out);
 	if (ACPI_FAILURE(status)) {
-		printk(MY_INFO "Illumination device not available\n");
+		pr_info("Illumination device not available\n");
 		return 0;
 	}
 	in[0] = 0xf400;
@@ -320,7 +317,7 @@
 	in[0] = 0xf100;
 	status = hci_raw(in, out);
 	if (ACPI_FAILURE(status)) {
-		printk(MY_INFO "Illumination device not available\n");
+		pr_info("Illumination device not available\n");
 		return;
 	}
 
@@ -331,7 +328,7 @@
 		in[2] = 1;
 		status = hci_raw(in, out);
 		if (ACPI_FAILURE(status)) {
-			printk(MY_INFO "ACPI call for illumination failed.\n");
+			pr_info("ACPI call for illumination failed\n");
 			return;
 		}
 	} else {
@@ -341,7 +338,7 @@
 		in[2] = 0;
 		status = hci_raw(in, out);
 		if (ACPI_FAILURE(status)) {
-			printk(MY_INFO "ACPI call for illumination failed.\n");
+			pr_info("ACPI call for illumination failed.\n");
 			return;
 		}
 	}
@@ -364,7 +361,7 @@
 	in[0] = 0xf100;
 	status = hci_raw(in, out);
 	if (ACPI_FAILURE(status)) {
-		printk(MY_INFO "Illumination device not available\n");
+		pr_info("Illumination device not available\n");
 		return LED_OFF;
 	}
 
@@ -373,7 +370,7 @@
 	in[1] = 0x14e;
 	status = hci_raw(in, out);
 	if (ACPI_FAILURE(status)) {
-		printk(MY_INFO "ACPI call for illumination failed.\n");
+		pr_info("ACPI call for illumination failed.\n");
 		return LED_OFF;
 	}
 
@@ -517,7 +514,7 @@
 		seq_printf(m, "brightness_levels:       %d\n",
 			     HCI_LCD_BRIGHTNESS_LEVELS);
 	} else {
-		printk(MY_ERR "Error reading LCD brightness\n");
+		pr_err("Error reading LCD brightness\n");
 	}
 
 	return 0;
@@ -592,7 +589,7 @@
 		seq_printf(m, "crt_out:                 %d\n", is_crt);
 		seq_printf(m, "tv_out:                  %d\n", is_tv);
 	} else {
-		printk(MY_ERR "Error reading video out status\n");
+		pr_err("Error reading video out status\n");
 	}
 
 	return 0;
@@ -686,7 +683,7 @@
 		seq_printf(m, "running:                 %d\n", (value > 0));
 		seq_printf(m, "force_on:                %d\n", force_fan);
 	} else {
-		printk(MY_ERR "Error reading fan status\n");
+		pr_err("Error reading fan status\n");
 	}
 
 	return 0;
@@ -750,9 +747,9 @@
 			 * some machines where system events sporadically
 			 * become disabled. */
 			hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result);
-			printk(MY_NOTICE "Re-enabled hotkeys\n");
+			pr_notice("Re-enabled hotkeys\n");
 		} else {
-			printk(MY_ERR "Error reading hotkey status\n");
+			pr_err("Error reading hotkey status\n");
 			goto end;
 		}
 	}
@@ -863,7 +860,7 @@
 
 			if (!sparse_keymap_report_event(toshiba_acpi.hotkey_dev,
 							value, 1, true)) {
-				printk(MY_INFO "Unknown key %x\n",
+				pr_info("Unknown key %x\n",
 				       value);
 			}
 		} else if (hci_result == HCI_NOT_SUPPORTED) {
@@ -871,7 +868,7 @@
 			 * some machines where system events sporadically
 			 * become disabled. */
 			hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result);
-			printk(MY_NOTICE "Re-enabled hotkeys\n");
+			pr_notice("Re-enabled hotkeys\n");
 		}
 	} while (hci_result != HCI_EMPTY);
 }
@@ -883,13 +880,13 @@
 
 	status = acpi_get_handle(NULL, device, &toshiba_acpi.handle);
 	if (ACPI_FAILURE(status)) {
-		printk(MY_INFO "Unable to get notification device\n");
+		pr_info("Unable to get notification device\n");
 		return -ENODEV;
 	}
 
 	toshiba_acpi.hotkey_dev = input_allocate_device();
 	if (!toshiba_acpi.hotkey_dev) {
-		printk(MY_INFO "Unable to register input device\n");
+		pr_info("Unable to register input device\n");
 		return -ENOMEM;
 	}
 
@@ -905,21 +902,21 @@
 	status = acpi_install_notify_handler(toshiba_acpi.handle,
 				ACPI_DEVICE_NOTIFY, toshiba_acpi_notify, NULL);
 	if (ACPI_FAILURE(status)) {
-		printk(MY_INFO "Unable to install hotkey notification\n");
+		pr_info("Unable to install hotkey notification\n");
 		error = -ENODEV;
 		goto err_free_keymap;
 	}
 
 	status = acpi_evaluate_object(toshiba_acpi.handle, "ENAB", NULL, NULL);
 	if (ACPI_FAILURE(status)) {
-		printk(MY_INFO "Unable to enable hotkeys\n");
+		pr_info("Unable to enable hotkeys\n");
 		error = -ENODEV;
 		goto err_remove_notify;
 	}
 
 	error = input_register_device(toshiba_acpi.hotkey_dev);
 	if (error) {
-		printk(MY_INFO "Unable to register input device\n");
+		pr_info("Unable to register input device\n");
 		goto err_remove_notify;
 	}
 
@@ -980,17 +977,17 @@
 	if (is_valid_acpi_path(TOSH_INTERFACE_1 GHCI_METHOD)) {
 		method_hci = TOSH_INTERFACE_1 GHCI_METHOD;
 		if (toshiba_acpi_setup_keyboard(TOSH_INTERFACE_1))
-			printk(MY_INFO "Unable to activate hotkeys\n");
+			pr_info("Unable to activate hotkeys\n");
 	} else if (is_valid_acpi_path(TOSH_INTERFACE_2 GHCI_METHOD)) {
 		method_hci = TOSH_INTERFACE_2 GHCI_METHOD;
 		if (toshiba_acpi_setup_keyboard(TOSH_INTERFACE_2))
-			printk(MY_INFO "Unable to activate hotkeys\n");
+			pr_info("Unable to activate hotkeys\n");
 	} else
 		return -ENODEV;
 
-	printk(MY_INFO "Toshiba Laptop ACPI Extras version %s\n",
+	pr_info("Toshiba Laptop ACPI Extras version %s\n",
 	       TOSHIBA_ACPI_VERSION);
-	printk(MY_INFO "    HCI method: %s\n", method_hci);
+	pr_info("    HCI method: %s\n", method_hci);
 
 	mutex_init(&toshiba_acpi.mutex);
 
@@ -998,7 +995,7 @@
 							      -1, NULL, 0);
 	if (IS_ERR(toshiba_acpi.p_dev)) {
 		ret = PTR_ERR(toshiba_acpi.p_dev);
-		printk(MY_ERR "unable to register platform device\n");
+		pr_err("unable to register platform device\n");
 		toshiba_acpi.p_dev = NULL;
 		toshiba_acpi_exit();
 		return ret;
@@ -1028,7 +1025,7 @@
         if (IS_ERR(toshiba_backlight_device)) {
 		ret = PTR_ERR(toshiba_backlight_device);
 
-		printk(KERN_ERR "Could not register toshiba backlight device\n");
+		pr_err("Could not register toshiba backlight device\n");
 		toshiba_backlight_device = NULL;
 		toshiba_acpi_exit();
 		return ret;
@@ -1042,14 +1039,14 @@
 						   &toshiba_rfk_ops,
 						   &toshiba_acpi);
 		if (!toshiba_acpi.bt_rfk) {
-			printk(MY_ERR "unable to allocate rfkill device\n");
+			pr_err("unable to allocate rfkill device\n");
 			toshiba_acpi_exit();
 			return -ENOMEM;
 		}
 
 		ret = rfkill_register(toshiba_acpi.bt_rfk);
 		if (ret) {
-			printk(MY_ERR "unable to register rfkill device\n");
+			pr_err("unable to register rfkill device\n");
 			rfkill_destroy(toshiba_acpi.bt_rfk);
 			toshiba_acpi_exit();
 			return ret;

diff --git a/drivers/platform/x86/toshiba_bluetooth.c b/drivers/platform/x86/toshiba_bluetooth.c
index 9440686..5fb7186 100644
--- a/drivers/platform/x86/toshiba_bluetooth.c
+++ b/drivers/platform/x86/toshiba_bluetooth.c

@@ -17,6 +17,8 @@
  * delivered.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -70,14 +72,13 @@
 	if (!(result & 0x01))
 		return 0;
 
-	printk(KERN_INFO "toshiba_bluetooth: Re-enabling Toshiba Bluetooth\n");
+	pr_info("Re-enabling Toshiba Bluetooth\n");
 	res1 = acpi_evaluate_object(handle, "AUSB", NULL, NULL);
 	res2 = acpi_evaluate_object(handle, "BTPO", NULL, NULL);
 	if (!ACPI_FAILURE(res1) || !ACPI_FAILURE(res2))
 		return 0;
 
-	printk(KERN_WARNING "toshiba_bluetooth: Failed to re-enable "
-	       "Toshiba Bluetooth\n");
+	pr_warn("Failed to re-enable Toshiba Bluetooth\n");
 
 	return -ENODEV;
 }
@@ -107,8 +108,8 @@
 				       &bt_present);
 
 	if (!ACPI_FAILURE(status) && bt_present) {
-		printk(KERN_INFO "Detected Toshiba ACPI Bluetooth device - "
-		      "installing RFKill handler\n");
+		pr_info("Detected Toshiba ACPI Bluetooth device - "
+			"installing RFKill handler\n");
 		result = toshiba_bluetooth_enable(device->handle);
 	}
 

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 05cc796..f23d5a8 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c

@@ -486,16 +486,16 @@
 	pr_info("\tnotify_id: %02X\n", g->notify_id);
 	pr_info("\treserved: %02X\n", g->reserved);
 	pr_info("\tinstance_count: %d\n", g->instance_count);
-	pr_info("\tflags: %#x ", g->flags);
+	pr_info("\tflags: %#x", g->flags);
 	if (g->flags) {
 		if (g->flags & ACPI_WMI_EXPENSIVE)
-			pr_cont("ACPI_WMI_EXPENSIVE ");
+			pr_cont(" ACPI_WMI_EXPENSIVE");
 		if (g->flags & ACPI_WMI_METHOD)
-			pr_cont("ACPI_WMI_METHOD ");
+			pr_cont(" ACPI_WMI_METHOD");
 		if (g->flags & ACPI_WMI_STRING)
-			pr_cont("ACPI_WMI_STRING ");
+			pr_cont(" ACPI_WMI_STRING");
 		if (g->flags & ACPI_WMI_EVENT)
-			pr_cont("ACPI_WMI_EVENT ");
+			pr_cont(" ACPI_WMI_EVENT");
 	}
 	pr_cont("\n");
 

diff --git a/drivers/platform/x86/xo15-ebook.c b/drivers/platform/x86/xo15-ebook.c
index c1372ed..fad153d 100644
--- a/drivers/platform/x86/xo15-ebook.c
+++ b/drivers/platform/x86/xo15-ebook.c

@@ -11,6 +11,8 @@
  *  your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -20,7 +22,6 @@
 #include <acpi/acpi_drivers.h>
 
 #define MODULE_NAME "xo15-ebook"
-#define PREFIX MODULE_NAME ": "
 
 #define XO15_EBOOK_CLASS		MODULE_NAME
 #define XO15_EBOOK_TYPE_UNKNOWN	0x00
@@ -105,7 +106,7 @@
 	class = acpi_device_class(device);
 
 	if (strcmp(hid, XO15_EBOOK_HID)) {
-		printk(KERN_ERR PREFIX "Unsupported hid [%s]\n", hid);
+		pr_err("Unsupported hid [%s]\n", hid);
 		error = -ENODEV;
 		goto err_free_input;
 	}

diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index f46855c..ad747dc 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c

@@ -381,11 +381,6 @@
 	return err;
 }
 
-/**
- * proc_scsi_show - show contents of /proc/scsi/scsi (attached devices)
- * @s: output goes here
- * @p: not used
- */
 static int always_match(struct device *dev, void *data)
 {
 	return 1;

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index fbd96b2..de35c3a 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig

@@ -80,6 +80,15 @@
 	help
 	  This is the SPI controller master driver for Blackfin 5xx processor.
 
+config SPI_BFIN_SPORT
+	tristate "SPI bus via Blackfin SPORT"
+	depends on BLACKFIN
+	help
+	  Enable support for a SPI bus via the Blackfin SPORT peripheral.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called spi_bfin_sport.
+
 config SPI_AU1550
 	tristate "Au1550/Au12x0 SPI Controller"
 	depends on (SOC_AU1550 || SOC_AU1200) && EXPERIMENTAL

diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index fd2fc5f..0f8c69b 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile

@@ -13,6 +13,7 @@
 obj-$(CONFIG_SPI_ATMEL)			+= atmel_spi.o
 obj-$(CONFIG_SPI_ATH79)			+= ath79_spi.o
 obj-$(CONFIG_SPI_BFIN)			+= spi_bfin5xx.o
+obj-$(CONFIG_SPI_BFIN_SPORT)		+= spi_bfin_sport.o
 obj-$(CONFIG_SPI_BITBANG)		+= spi_bitbang.o
 obj-$(CONFIG_SPI_AU1550)		+= au1550_spi.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi_butterfly.o

diff --git a/drivers/spi/spi_bfin_sport.c b/drivers/spi/spi_bfin_sport.c
new file mode 100644
index 0000000..e557ff6
--- /dev/null
+++ b/drivers/spi/spi_bfin_sport.c

@@ -0,0 +1,952 @@
+/*
+ * SPI bus via the Blackfin SPORT peripheral
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright 2009-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+
+#include <asm/portmux.h>
+#include <asm/bfin5xx_spi.h>
+#include <asm/blackfin.h>
+#include <asm/bfin_sport.h>
+#include <asm/cacheflush.h>
+
+#define DRV_NAME	"bfin-sport-spi"
+#define DRV_DESC	"SPI bus via the Blackfin SPORT"
+
+MODULE_AUTHOR("Cliff Cai");
+MODULE_DESCRIPTION(DRV_DESC);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:bfin-sport-spi");
+
+enum bfin_sport_spi_state {
+	START_STATE,
+	RUNNING_STATE,
+	DONE_STATE,
+	ERROR_STATE,
+};
+
+struct bfin_sport_spi_master_data;
+
+struct bfin_sport_transfer_ops {
+	void (*write) (struct bfin_sport_spi_master_data *);
+	void (*read) (struct bfin_sport_spi_master_data *);
+	void (*duplex) (struct bfin_sport_spi_master_data *);
+};
+
+struct bfin_sport_spi_master_data {
+	/* Driver model hookup */
+	struct device *dev;
+
+	/* SPI framework hookup */
+	struct spi_master *master;
+
+	/* Regs base of SPI controller */
+	struct sport_register __iomem *regs;
+	int err_irq;
+
+	/* Pin request list */
+	u16 *pin_req;
+
+	/* Driver message queue */
+	struct workqueue_struct *workqueue;
+	struct work_struct pump_messages;
+	spinlock_t lock;
+	struct list_head queue;
+	int busy;
+	bool run;
+
+	/* Message Transfer pump */
+	struct tasklet_struct pump_transfers;
+
+	/* Current message transfer state info */
+	enum bfin_sport_spi_state state;
+	struct spi_message *cur_msg;
+	struct spi_transfer *cur_transfer;
+	struct bfin_sport_spi_slave_data *cur_chip;
+	union {
+		void *tx;
+		u8 *tx8;
+		u16 *tx16;
+	};
+	void *tx_end;
+	union {
+		void *rx;
+		u8 *rx8;
+		u16 *rx16;
+	};
+	void *rx_end;
+
+	int cs_change;
+	struct bfin_sport_transfer_ops *ops;
+};
+
+struct bfin_sport_spi_slave_data {
+	u16 ctl_reg;
+	u16 baud;
+	u16 cs_chg_udelay;	/* Some devices require > 255usec delay */
+	u32 cs_gpio;
+	u16 idle_tx_val;
+	struct bfin_sport_transfer_ops *ops;
+};
+
+static void
+bfin_sport_spi_enable(struct bfin_sport_spi_master_data *drv_data)
+{
+	bfin_write_or(&drv_data->regs->tcr1, TSPEN);
+	bfin_write_or(&drv_data->regs->rcr1, TSPEN);
+	SSYNC();
+}
+
+static void
+bfin_sport_spi_disable(struct bfin_sport_spi_master_data *drv_data)
+{
+	bfin_write_and(&drv_data->regs->tcr1, ~TSPEN);
+	bfin_write_and(&drv_data->regs->rcr1, ~TSPEN);
+	SSYNC();
+}
+
+/* Caculate the SPI_BAUD register value based on input HZ */
+static u16
+bfin_sport_hz_to_spi_baud(u32 speed_hz)
+{
+	u_long clk, sclk = get_sclk();
+	int div = (sclk / (2 * speed_hz)) - 1;
+
+	if (div < 0)
+		div = 0;
+
+	clk = sclk / (2 * (div + 1));
+
+	if (clk > speed_hz)
+		div++;
+
+	return div;
+}
+
+/* Chip select operation functions for cs_change flag */
+static void
+bfin_sport_spi_cs_active(struct bfin_sport_spi_slave_data *chip)
+{
+	gpio_direction_output(chip->cs_gpio, 0);
+}
+
+static void
+bfin_sport_spi_cs_deactive(struct bfin_sport_spi_slave_data *chip)
+{
+	gpio_direction_output(chip->cs_gpio, 1);
+	/* Move delay here for consistency */
+	if (chip->cs_chg_udelay)
+		udelay(chip->cs_chg_udelay);
+}
+
+static void
+bfin_sport_spi_stat_poll_complete(struct bfin_sport_spi_master_data *drv_data)
+{
+	unsigned long timeout = jiffies + HZ;
+	while (!(bfin_read(&drv_data->regs->stat) & RXNE)) {
+		if (!time_before(jiffies, timeout))
+			break;
+	}
+}
+
+static void
+bfin_sport_spi_u8_writer(struct bfin_sport_spi_master_data *drv_data)
+{
+	u16 dummy;
+
+	while (drv_data->tx < drv_data->tx_end) {
+		bfin_write(&drv_data->regs->tx16, *drv_data->tx8++);
+		bfin_sport_spi_stat_poll_complete(drv_data);
+		dummy = bfin_read(&drv_data->regs->rx16);
+	}
+}
+
+static void
+bfin_sport_spi_u8_reader(struct bfin_sport_spi_master_data *drv_data)
+{
+	u16 tx_val = drv_data->cur_chip->idle_tx_val;
+
+	while (drv_data->rx < drv_data->rx_end) {
+		bfin_write(&drv_data->regs->tx16, tx_val);
+		bfin_sport_spi_stat_poll_complete(drv_data);
+		*drv_data->rx8++ = bfin_read(&drv_data->regs->rx16);
+	}
+}
+
+static void
+bfin_sport_spi_u8_duplex(struct bfin_sport_spi_master_data *drv_data)
+{
+	while (drv_data->rx < drv_data->rx_end) {
+		bfin_write(&drv_data->regs->tx16, *drv_data->tx8++);
+		bfin_sport_spi_stat_poll_complete(drv_data);
+		*drv_data->rx8++ = bfin_read(&drv_data->regs->rx16);
+	}
+}
+
+static struct bfin_sport_transfer_ops bfin_sport_transfer_ops_u8 = {
+	.write  = bfin_sport_spi_u8_writer,
+	.read   = bfin_sport_spi_u8_reader,
+	.duplex = bfin_sport_spi_u8_duplex,
+};
+
+static void
+bfin_sport_spi_u16_writer(struct bfin_sport_spi_master_data *drv_data)
+{
+	u16 dummy;
+
+	while (drv_data->tx < drv_data->tx_end) {
+		bfin_write(&drv_data->regs->tx16, *drv_data->tx16++);
+		bfin_sport_spi_stat_poll_complete(drv_data);
+		dummy = bfin_read(&drv_data->regs->rx16);
+	}
+}
+
+static void
+bfin_sport_spi_u16_reader(struct bfin_sport_spi_master_data *drv_data)
+{
+	u16 tx_val = drv_data->cur_chip->idle_tx_val;
+
+	while (drv_data->rx < drv_data->rx_end) {
+		bfin_write(&drv_data->regs->tx16, tx_val);
+		bfin_sport_spi_stat_poll_complete(drv_data);
+		*drv_data->rx16++ = bfin_read(&drv_data->regs->rx16);
+	}
+}
+
+static void
+bfin_sport_spi_u16_duplex(struct bfin_sport_spi_master_data *drv_data)
+{
+	while (drv_data->rx < drv_data->rx_end) {
+		bfin_write(&drv_data->regs->tx16, *drv_data->tx16++);
+		bfin_sport_spi_stat_poll_complete(drv_data);
+		*drv_data->rx16++ = bfin_read(&drv_data->regs->rx16);
+	}
+}
+
+static struct bfin_sport_transfer_ops bfin_sport_transfer_ops_u16 = {
+	.write  = bfin_sport_spi_u16_writer,
+	.read   = bfin_sport_spi_u16_reader,
+	.duplex = bfin_sport_spi_u16_duplex,
+};
+
+/* stop controller and re-config current chip */
+static void
+bfin_sport_spi_restore_state(struct bfin_sport_spi_master_data *drv_data)
+{
+	struct bfin_sport_spi_slave_data *chip = drv_data->cur_chip;
+	unsigned int bits = (drv_data->ops == &bfin_sport_transfer_ops_u8 ? 7 : 15);
+
+	bfin_sport_spi_disable(drv_data);
+	dev_dbg(drv_data->dev, "restoring spi ctl state\n");
+
+	bfin_write(&drv_data->regs->tcr1, chip->ctl_reg);
+	bfin_write(&drv_data->regs->tcr2, bits);
+	bfin_write(&drv_data->regs->tclkdiv, chip->baud);
+	bfin_write(&drv_data->regs->tfsdiv, bits);
+	SSYNC();
+
+	bfin_write(&drv_data->regs->rcr1, chip->ctl_reg & ~(ITCLK | ITFS));
+	bfin_write(&drv_data->regs->rcr2, bits);
+	SSYNC();
+
+	bfin_sport_spi_cs_active(chip);
+}
+
+/* test if there is more transfer to be done */
+static enum bfin_sport_spi_state
+bfin_sport_spi_next_transfer(struct bfin_sport_spi_master_data *drv_data)
+{
+	struct spi_message *msg = drv_data->cur_msg;
+	struct spi_transfer *trans = drv_data->cur_transfer;
+
+	/* Move to next transfer */
+	if (trans->transfer_list.next != &msg->transfers) {
+		drv_data->cur_transfer =
+		    list_entry(trans->transfer_list.next,
+			       struct spi_transfer, transfer_list);
+		return RUNNING_STATE;
+	}
+
+	return DONE_STATE;
+}
+
+/*
+ * caller already set message->status;
+ * dma and pio irqs are blocked give finished message back
+ */
+static void
+bfin_sport_spi_giveback(struct bfin_sport_spi_master_data *drv_data)
+{
+	struct bfin_sport_spi_slave_data *chip = drv_data->cur_chip;
+	unsigned long flags;
+	struct spi_message *msg;
+
+	spin_lock_irqsave(&drv_data->lock, flags);
+	msg = drv_data->cur_msg;
+	drv_data->state = START_STATE;
+	drv_data->cur_msg = NULL;
+	drv_data->cur_transfer = NULL;
+	drv_data->cur_chip = NULL;
+	queue_work(drv_data->workqueue, &drv_data->pump_messages);
+	spin_unlock_irqrestore(&drv_data->lock, flags);
+
+	if (!drv_data->cs_change)
+		bfin_sport_spi_cs_deactive(chip);
+
+	if (msg->complete)
+		msg->complete(msg->context);
+}
+
+static irqreturn_t
+sport_err_handler(int irq, void *dev_id)
+{
+	struct bfin_sport_spi_master_data *drv_data = dev_id;
+	u16 status;
+
+	dev_dbg(drv_data->dev, "%s enter\n", __func__);
+	status = bfin_read(&drv_data->regs->stat) & (TOVF | TUVF | ROVF | RUVF);
+
+	if (status) {
+		bfin_write(&drv_data->regs->stat, status);
+		SSYNC();
+
+		bfin_sport_spi_disable(drv_data);
+		dev_err(drv_data->dev, "status error:%s%s%s%s\n",
+			status & TOVF ? " TOVF" : "",
+			status & TUVF ? " TUVF" : "",
+			status & ROVF ? " ROVF" : "",
+			status & RUVF ? " RUVF" : "");
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void
+bfin_sport_spi_pump_transfers(unsigned long data)
+{
+	struct bfin_sport_spi_master_data *drv_data = (void *)data;
+	struct spi_message *message = NULL;
+	struct spi_transfer *transfer = NULL;
+	struct spi_transfer *previous = NULL;
+	struct bfin_sport_spi_slave_data *chip = NULL;
+	unsigned int bits_per_word;
+	u32 tranf_success = 1;
+	u32 transfer_speed;
+	u8 full_duplex = 0;
+
+	/* Get current state information */
+	message = drv_data->cur_msg;
+	transfer = drv_data->cur_transfer;
+	chip = drv_data->cur_chip;
+
+	if (transfer->speed_hz)
+		transfer_speed = bfin_sport_hz_to_spi_baud(transfer->speed_hz);
+	else
+		transfer_speed = chip->baud;
+	bfin_write(&drv_data->regs->tclkdiv, transfer_speed);
+	SSYNC();
+
+	/*
+	 * if msg is error or done, report it back using complete() callback
+	 */
+
+	 /* Handle for abort */
+	if (drv_data->state == ERROR_STATE) {
+		dev_dbg(drv_data->dev, "transfer: we've hit an error\n");
+		message->status = -EIO;
+		bfin_sport_spi_giveback(drv_data);
+		return;
+	}
+
+	/* Handle end of message */
+	if (drv_data->state == DONE_STATE) {
+		dev_dbg(drv_data->dev, "transfer: all done!\n");
+		message->status = 0;
+		bfin_sport_spi_giveback(drv_data);
+		return;
+	}
+
+	/* Delay if requested at end of transfer */
+	if (drv_data->state == RUNNING_STATE) {
+		dev_dbg(drv_data->dev, "transfer: still running ...\n");
+		previous = list_entry(transfer->transfer_list.prev,
+				      struct spi_transfer, transfer_list);
+		if (previous->delay_usecs)
+			udelay(previous->delay_usecs);
+	}
+
+	if (transfer->len == 0) {
+		/* Move to next transfer of this msg */
+		drv_data->state = bfin_sport_spi_next_transfer(drv_data);
+		/* Schedule next transfer tasklet */
+		tasklet_schedule(&drv_data->pump_transfers);
+	}
+
+	if (transfer->tx_buf != NULL) {
+		drv_data->tx = (void *)transfer->tx_buf;
+		drv_data->tx_end = drv_data->tx + transfer->len;
+		dev_dbg(drv_data->dev, "tx_buf is %p, tx_end is %p\n",
+			transfer->tx_buf, drv_data->tx_end);
+	} else
+		drv_data->tx = NULL;
+
+	if (transfer->rx_buf != NULL) {
+		full_duplex = transfer->tx_buf != NULL;
+		drv_data->rx = transfer->rx_buf;
+		drv_data->rx_end = drv_data->rx + transfer->len;
+		dev_dbg(drv_data->dev, "rx_buf is %p, rx_end is %p\n",
+			transfer->rx_buf, drv_data->rx_end);
+	} else
+		drv_data->rx = NULL;
+
+	drv_data->cs_change = transfer->cs_change;
+
+	/* Bits per word setup */
+	bits_per_word = transfer->bits_per_word ? : message->spi->bits_per_word;
+	if (bits_per_word == 8)
+		drv_data->ops = &bfin_sport_transfer_ops_u8;
+	else
+		drv_data->ops = &bfin_sport_transfer_ops_u16;
+
+	drv_data->state = RUNNING_STATE;
+
+	if (drv_data->cs_change)
+		bfin_sport_spi_cs_active(chip);
+
+	dev_dbg(drv_data->dev,
+		"now pumping a transfer: width is %d, len is %d\n",
+		bits_per_word, transfer->len);
+
+	/* PIO mode write then read */
+	dev_dbg(drv_data->dev, "doing IO transfer\n");
+
+	bfin_sport_spi_enable(drv_data);
+	if (full_duplex) {
+		/* full duplex mode */
+		BUG_ON((drv_data->tx_end - drv_data->tx) !=
+		       (drv_data->rx_end - drv_data->rx));
+		drv_data->ops->duplex(drv_data);
+
+		if (drv_data->tx != drv_data->tx_end)
+			tranf_success = 0;
+	} else if (drv_data->tx != NULL) {
+		/* write only half duplex */
+
+		drv_data->ops->write(drv_data);
+
+		if (drv_data->tx != drv_data->tx_end)
+			tranf_success = 0;
+	} else if (drv_data->rx != NULL) {
+		/* read only half duplex */
+
+		drv_data->ops->read(drv_data);
+		if (drv_data->rx != drv_data->rx_end)
+			tranf_success = 0;
+	}
+	bfin_sport_spi_disable(drv_data);
+
+	if (!tranf_success) {
+		dev_dbg(drv_data->dev, "IO write error!\n");
+		drv_data->state = ERROR_STATE;
+	} else {
+		/* Update total byte transfered */
+		message->actual_length += transfer->len;
+		/* Move to next transfer of this msg */
+		drv_data->state = bfin_sport_spi_next_transfer(drv_data);
+		if (drv_data->cs_change)
+			bfin_sport_spi_cs_deactive(chip);
+	}
+
+	/* Schedule next transfer tasklet */
+	tasklet_schedule(&drv_data->pump_transfers);
+}
+
+/* pop a msg from queue and kick off real transfer */
+static void
+bfin_sport_spi_pump_messages(struct work_struct *work)
+{
+	struct bfin_sport_spi_master_data *drv_data;
+	unsigned long flags;
+	struct spi_message *next_msg;
+
+	drv_data = container_of(work, struct bfin_sport_spi_master_data, pump_messages);
+
+	/* Lock queue and check for queue work */
+	spin_lock_irqsave(&drv_data->lock, flags);
+	if (list_empty(&drv_data->queue) || !drv_data->run) {
+		/* pumper kicked off but no work to do */
+		drv_data->busy = 0;
+		spin_unlock_irqrestore(&drv_data->lock, flags);
+		return;
+	}
+
+	/* Make sure we are not already running a message */
+	if (drv_data->cur_msg) {
+		spin_unlock_irqrestore(&drv_data->lock, flags);
+		return;
+	}
+
+	/* Extract head of queue */
+	next_msg = list_entry(drv_data->queue.next,
+		struct spi_message, queue);
+
+	drv_data->cur_msg = next_msg;
+
+	/* Setup the SSP using the per chip configuration */
+	drv_data->cur_chip = spi_get_ctldata(drv_data->cur_msg->spi);
+
+	list_del_init(&drv_data->cur_msg->queue);
+
+	/* Initialize message state */
+	drv_data->cur_msg->state = START_STATE;
+	drv_data->cur_transfer = list_entry(drv_data->cur_msg->transfers.next,
+					    struct spi_transfer, transfer_list);
+	bfin_sport_spi_restore_state(drv_data);
+	dev_dbg(drv_data->dev, "got a message to pump, "
+		"state is set to: baud %d, cs_gpio %i, ctl 0x%x\n",
+		drv_data->cur_chip->baud, drv_data->cur_chip->cs_gpio,
+		drv_data->cur_chip->ctl_reg);
+
+	dev_dbg(drv_data->dev,
+		"the first transfer len is %d\n",
+		drv_data->cur_transfer->len);
+
+	/* Mark as busy and launch transfers */
+	tasklet_schedule(&drv_data->pump_transfers);
+
+	drv_data->busy = 1;
+	spin_unlock_irqrestore(&drv_data->lock, flags);
+}
+
+/*
+ * got a msg to transfer, queue it in drv_data->queue.
+ * And kick off message pumper
+ */
+static int
+bfin_sport_spi_transfer(struct spi_device *spi, struct spi_message *msg)
+{
+	struct bfin_sport_spi_master_data *drv_data = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	spin_lock_irqsave(&drv_data->lock, flags);
+
+	if (!drv_data->run) {
+		spin_unlock_irqrestore(&drv_data->lock, flags);
+		return -ESHUTDOWN;
+	}
+
+	msg->actual_length = 0;
+	msg->status = -EINPROGRESS;
+	msg->state = START_STATE;
+
+	dev_dbg(&spi->dev, "adding an msg in transfer()\n");
+	list_add_tail(&msg->queue, &drv_data->queue);
+
+	if (drv_data->run && !drv_data->busy)
+		queue_work(drv_data->workqueue, &drv_data->pump_messages);
+
+	spin_unlock_irqrestore(&drv_data->lock, flags);
+
+	return 0;
+}
+
+/* Called every time common spi devices change state */
+static int
+bfin_sport_spi_setup(struct spi_device *spi)
+{
+	struct bfin_sport_spi_slave_data *chip, *first = NULL;
+	int ret;
+
+	/* Only alloc (or use chip_info) on first setup */
+	chip = spi_get_ctldata(spi);
+	if (chip == NULL) {
+		struct bfin5xx_spi_chip *chip_info;
+
+		chip = first = kzalloc(sizeof(*chip), GFP_KERNEL);
+		if (!chip)
+			return -ENOMEM;
+
+		/* platform chip_info isn't required */
+		chip_info = spi->controller_data;
+		if (chip_info) {
+			/*
+			 * DITFS and TDTYPE are only thing we don't set, but
+			 * they probably shouldn't be changed by people.
+			 */
+			if (chip_info->ctl_reg || chip_info->enable_dma) {
+				ret = -EINVAL;
+				dev_err(&spi->dev, "don't set ctl_reg/enable_dma fields");
+				goto error;
+			}
+			chip->cs_chg_udelay = chip_info->cs_chg_udelay;
+			chip->idle_tx_val = chip_info->idle_tx_val;
+			spi->bits_per_word = chip_info->bits_per_word;
+		}
+	}
+
+	if (spi->bits_per_word != 8 && spi->bits_per_word != 16) {
+		ret = -EINVAL;
+		goto error;
+	}
+
+	/* translate common spi framework into our register
+	 * following configure contents are same for tx and rx.
+	 */
+
+	if (spi->mode & SPI_CPHA)
+		chip->ctl_reg &= ~TCKFE;
+	else
+		chip->ctl_reg |= TCKFE;
+
+	if (spi->mode & SPI_LSB_FIRST)
+		chip->ctl_reg |= TLSBIT;
+	else
+		chip->ctl_reg &= ~TLSBIT;
+
+	/* Sport in master mode */
+	chip->ctl_reg |= ITCLK | ITFS | TFSR | LATFS | LTFS;
+
+	chip->baud = bfin_sport_hz_to_spi_baud(spi->max_speed_hz);
+
+	chip->cs_gpio = spi->chip_select;
+	ret = gpio_request(chip->cs_gpio, spi->modalias);
+	if (ret)
+		goto error;
+
+	dev_dbg(&spi->dev, "setup spi chip %s, width is %d\n",
+			spi->modalias, spi->bits_per_word);
+	dev_dbg(&spi->dev, "ctl_reg is 0x%x, GPIO is %i\n",
+			chip->ctl_reg, spi->chip_select);
+
+	spi_set_ctldata(spi, chip);
+
+	bfin_sport_spi_cs_deactive(chip);
+
+	return ret;
+
+ error:
+	kfree(first);
+	return ret;
+}
+
+/*
+ * callback for spi framework.
+ * clean driver specific data
+ */
+static void
+bfin_sport_spi_cleanup(struct spi_device *spi)
+{
+	struct bfin_sport_spi_slave_data *chip = spi_get_ctldata(spi);
+
+	if (!chip)
+		return;
+
+	gpio_free(chip->cs_gpio);
+
+	kfree(chip);
+}
+
+static int
+bfin_sport_spi_init_queue(struct bfin_sport_spi_master_data *drv_data)
+{
+	INIT_LIST_HEAD(&drv_data->queue);
+	spin_lock_init(&drv_data->lock);
+
+	drv_data->run = false;
+	drv_data->busy = 0;
+
+	/* init transfer tasklet */
+	tasklet_init(&drv_data->pump_transfers,
+		     bfin_sport_spi_pump_transfers, (unsigned long)drv_data);
+
+	/* init messages workqueue */
+	INIT_WORK(&drv_data->pump_messages, bfin_sport_spi_pump_messages);
+	drv_data->workqueue =
+	    create_singlethread_workqueue(dev_name(drv_data->master->dev.parent));
+	if (drv_data->workqueue == NULL)
+		return -EBUSY;
+
+	return 0;
+}
+
+static int
+bfin_sport_spi_start_queue(struct bfin_sport_spi_master_data *drv_data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&drv_data->lock, flags);
+
+	if (drv_data->run || drv_data->busy) {
+		spin_unlock_irqrestore(&drv_data->lock, flags);
+		return -EBUSY;
+	}
+
+	drv_data->run = true;
+	drv_data->cur_msg = NULL;
+	drv_data->cur_transfer = NULL;
+	drv_data->cur_chip = NULL;
+	spin_unlock_irqrestore(&drv_data->lock, flags);
+
+	queue_work(drv_data->workqueue, &drv_data->pump_messages);
+
+	return 0;
+}
+
+static inline int
+bfin_sport_spi_stop_queue(struct bfin_sport_spi_master_data *drv_data)
+{
+	unsigned long flags;
+	unsigned limit = 500;
+	int status = 0;
+
+	spin_lock_irqsave(&drv_data->lock, flags);
+
+	/*
+	 * This is a bit lame, but is optimized for the common execution path.
+	 * A wait_queue on the drv_data->busy could be used, but then the common
+	 * execution path (pump_messages) would be required to call wake_up or
+	 * friends on every SPI message. Do this instead
+	 */
+	drv_data->run = false;
+	while (!list_empty(&drv_data->queue) && drv_data->busy && limit--) {
+		spin_unlock_irqrestore(&drv_data->lock, flags);
+		msleep(10);
+		spin_lock_irqsave(&drv_data->lock, flags);
+	}
+
+	if (!list_empty(&drv_data->queue) || drv_data->busy)
+		status = -EBUSY;
+
+	spin_unlock_irqrestore(&drv_data->lock, flags);
+
+	return status;
+}
+
+static inline int
+bfin_sport_spi_destroy_queue(struct bfin_sport_spi_master_data *drv_data)
+{
+	int status;
+
+	status = bfin_sport_spi_stop_queue(drv_data);
+	if (status)
+		return status;
+
+	destroy_workqueue(drv_data->workqueue);
+
+	return 0;
+}
+
+static int __devinit
+bfin_sport_spi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct bfin5xx_spi_master *platform_info;
+	struct spi_master *master;
+	struct resource *res, *ires;
+	struct bfin_sport_spi_master_data *drv_data;
+	int status;
+
+	platform_info = dev->platform_data;
+
+	/* Allocate master with space for drv_data */
+	master = spi_alloc_master(dev, sizeof(*master) + 16);
+	if (!master) {
+		dev_err(dev, "cannot alloc spi_master\n");
+		return -ENOMEM;
+	}
+
+	drv_data = spi_master_get_devdata(master);
+	drv_data->master = master;
+	drv_data->dev = dev;
+	drv_data->pin_req = platform_info->pin_req;
+
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
+	master->bus_num = pdev->id;
+	master->num_chipselect = platform_info->num_chipselect;
+	master->cleanup = bfin_sport_spi_cleanup;
+	master->setup = bfin_sport_spi_setup;
+	master->transfer = bfin_sport_spi_transfer;
+
+	/* Find and map our resources */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL) {
+		dev_err(dev, "cannot get IORESOURCE_MEM\n");
+		status = -ENOENT;
+		goto out_error_get_res;
+	}
+
+	drv_data->regs = ioremap(res->start, resource_size(res));
+	if (drv_data->regs == NULL) {
+		dev_err(dev, "cannot map registers\n");
+		status = -ENXIO;
+		goto out_error_ioremap;
+	}
+
+	ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!ires) {
+		dev_err(dev, "cannot get IORESOURCE_IRQ\n");
+		status = -ENODEV;
+		goto out_error_get_ires;
+	}
+	drv_data->err_irq = ires->start;
+
+	/* Initial and start queue */
+	status = bfin_sport_spi_init_queue(drv_data);
+	if (status) {
+		dev_err(dev, "problem initializing queue\n");
+		goto out_error_queue_alloc;
+	}
+
+	status = bfin_sport_spi_start_queue(drv_data);
+	if (status) {
+		dev_err(dev, "problem starting queue\n");
+		goto out_error_queue_alloc;
+	}
+
+	status = request_irq(drv_data->err_irq, sport_err_handler,
+		0, "sport_spi_err", drv_data);
+	if (status) {
+		dev_err(dev, "unable to request sport err irq\n");
+		goto out_error_irq;
+	}
+
+	status = peripheral_request_list(drv_data->pin_req, DRV_NAME);
+	if (status) {
+		dev_err(dev, "requesting peripherals failed\n");
+		goto out_error_peripheral;
+	}
+
+	/* Register with the SPI framework */
+	platform_set_drvdata(pdev, drv_data);
+	status = spi_register_master(master);
+	if (status) {
+		dev_err(dev, "problem registering spi master\n");
+		goto out_error_master;
+	}
+
+	dev_info(dev, "%s, regs_base@%p\n", DRV_DESC, drv_data->regs);
+	return 0;
+
+ out_error_master:
+	peripheral_free_list(drv_data->pin_req);
+ out_error_peripheral:
+	free_irq(drv_data->err_irq, drv_data);
+ out_error_irq:
+ out_error_queue_alloc:
+	bfin_sport_spi_destroy_queue(drv_data);
+ out_error_get_ires:
+	iounmap(drv_data->regs);
+ out_error_ioremap:
+ out_error_get_res:
+	spi_master_put(master);
+
+	return status;
+}
+
+/* stop hardware and remove the driver */
+static int __devexit
+bfin_sport_spi_remove(struct platform_device *pdev)
+{
+	struct bfin_sport_spi_master_data *drv_data = platform_get_drvdata(pdev);
+	int status = 0;
+
+	if (!drv_data)
+		return 0;
+
+	/* Remove the queue */
+	status = bfin_sport_spi_destroy_queue(drv_data);
+	if (status)
+		return status;
+
+	/* Disable the SSP at the peripheral and SOC level */
+	bfin_sport_spi_disable(drv_data);
+
+	/* Disconnect from the SPI framework */
+	spi_unregister_master(drv_data->master);
+
+	peripheral_free_list(drv_data->pin_req);
+
+	/* Prevent double remove */
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int
+bfin_sport_spi_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct bfin_sport_spi_master_data *drv_data = platform_get_drvdata(pdev);
+	int status;
+
+	status = bfin_sport_spi_stop_queue(drv_data);
+	if (status)
+		return status;
+
+	/* stop hardware */
+	bfin_sport_spi_disable(drv_data);
+
+	return status;
+}
+
+static int
+bfin_sport_spi_resume(struct platform_device *pdev)
+{
+	struct bfin_sport_spi_master_data *drv_data = platform_get_drvdata(pdev);
+	int status;
+
+	/* Enable the SPI interface */
+	bfin_sport_spi_enable(drv_data);
+
+	/* Start the queue running */
+	status = bfin_sport_spi_start_queue(drv_data);
+	if (status)
+		dev_err(drv_data->dev, "problem resuming queue\n");
+
+	return status;
+}
+#else
+# define bfin_sport_spi_suspend NULL
+# define bfin_sport_spi_resume  NULL
+#endif
+
+static struct platform_driver bfin_sport_spi_driver = {
+	.driver	= {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+	},
+	.probe   = bfin_sport_spi_probe,
+	.remove  = __devexit_p(bfin_sport_spi_remove),
+	.suspend = bfin_sport_spi_suspend,
+	.resume  = bfin_sport_spi_resume,
+};
+
+static int __init bfin_sport_spi_init(void)
+{
+	return platform_driver_register(&bfin_sport_spi_driver);
+}
+module_init(bfin_sport_spi_init);
+
+static void __exit bfin_sport_spi_exit(void)
+{
+	platform_driver_unregister(&bfin_sport_spi_driver);
+}
+module_exit(bfin_sport_spi_exit);

diff --git a/drivers/spi/tle62x0.c b/drivers/spi/tle62x0.c
index a393895..32a4087 100644
--- a/drivers/spi/tle62x0.c
+++ b/drivers/spi/tle62x0.c

@@ -283,7 +283,7 @@
 	return 0;
 
  err_gpios:
-	for (; ptr > 0; ptr--)
+	while (--ptr >= 0)
 		device_remove_file(&spi->dev, gpio_attrs[ptr]);
 
 	device_remove_file(&spi->dev, &dev_attr_status_show);
@@ -301,6 +301,7 @@
 	for (ptr = 0; ptr < st->nr_gpio; ptr++)
 		device_remove_file(&spi->dev, gpio_attrs[ptr]);
 
+	device_remove_file(&spi->dev, &dev_attr_status_show);
 	kfree(st);
 	return 0;
 }

diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index fc6f2a5..0b1c82a 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c

@@ -499,7 +499,7 @@
 	dev_set_drvdata(hwmon->device, hwmon);
 	result = device_create_file(hwmon->device, &dev_attr_name);
 	if (result)
-		goto unregister_hwmon_device;
+		goto free_mem;
 
  register_sys_interface:
 	tz->hwmon = hwmon;
@@ -513,7 +513,7 @@
 	sysfs_attr_init(&tz->temp_input.attr.attr);
 	result = device_create_file(hwmon->device, &tz->temp_input.attr);
 	if (result)
-		goto unregister_hwmon_device;
+		goto unregister_name;
 
 	if (tz->ops->get_crit_temp) {
 		unsigned long temperature;
@@ -527,7 +527,7 @@
 			result = device_create_file(hwmon->device,
 						    &tz->temp_crit.attr);
 			if (result)
-				goto unregister_hwmon_device;
+				goto unregister_input;
 		}
 	}
 
@@ -539,9 +539,9 @@
 
 	return 0;
 
- unregister_hwmon_device:
-	device_remove_file(hwmon->device, &tz->temp_crit.attr);
+ unregister_input:
 	device_remove_file(hwmon->device, &tz->temp_input.attr);
+ unregister_name:
 	if (new_hwmon_device) {
 		device_remove_file(hwmon->device, &dev_attr_name);
 		hwmon_device_unregister(hwmon->device);

diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index 660b80a..1102ce6 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c

@@ -348,11 +348,50 @@
 	return rc;
 }
 
+static bool usb_is_intel_switchable_ehci(struct pci_dev *pdev)
+{
+	return pdev->class == PCI_CLASS_SERIAL_USB_EHCI &&
+		pdev->vendor == PCI_VENDOR_ID_INTEL &&
+		pdev->device == 0x1E26;
+}
+
+static void ehci_enable_xhci_companion(void)
+{
+	struct pci_dev		*companion = NULL;
+
+	/* The xHCI and EHCI controllers are not on the same PCI slot */
+	for_each_pci_dev(companion) {
+		if (!usb_is_intel_switchable_xhci(companion))
+			continue;
+		usb_enable_xhci_ports(companion);
+		return;
+	}
+}
+
 static int ehci_pci_resume(struct usb_hcd *hcd, bool hibernated)
 {
 	struct ehci_hcd		*ehci = hcd_to_ehci(hcd);
 	struct pci_dev		*pdev = to_pci_dev(hcd->self.controller);
 
+	/* The BIOS on systems with the Intel Panther Point chipset may or may
+	 * not support xHCI natively.  That means that during system resume, it
+	 * may switch the ports back to EHCI so that users can use their
+	 * keyboard to select a kernel from GRUB after resume from hibernate.
+	 *
+	 * The BIOS is supposed to remember whether the OS had xHCI ports
+	 * enabled before resume, and switch the ports back to xHCI when the
+	 * BIOS/OS semaphore is written, but we all know we can't trust BIOS
+	 * writers.
+	 *
+	 * Unconditionally switch the ports back to xHCI after a system resume.
+	 * We can't tell whether the EHCI or xHCI controller will be resumed
+	 * first, so we have to do the port switchover in both drivers.  Writing
+	 * a '1' to the port switchover registers should have no effect if the
+	 * port was already switched over.
+	 */
+	if (usb_is_intel_switchable_ehci(pdev))
+		ehci_enable_xhci_companion();
+
 	// maybe restore FLADJ
 
 	if (time_before(jiffies, ehci->next_statechange))

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index f16c59d..fd93061 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c

@@ -69,6 +69,9 @@
 #define	NB_PIF0_PWRDOWN_0	0x01100012
 #define	NB_PIF0_PWRDOWN_1	0x01100013
 
+#define USB_INTEL_XUSB2PR      0xD0
+#define USB_INTEL_USB3_PSSEN   0xD8
+
 static struct amd_chipset_info {
 	struct pci_dev	*nb_dev;
 	struct pci_dev	*smbus_dev;
@@ -673,6 +676,64 @@
 	return -ETIMEDOUT;
 }
 
+bool usb_is_intel_switchable_xhci(struct pci_dev *pdev)
+{
+	return pdev->class == PCI_CLASS_SERIAL_USB_XHCI &&
+		pdev->vendor == PCI_VENDOR_ID_INTEL &&
+		pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI;
+}
+EXPORT_SYMBOL_GPL(usb_is_intel_switchable_xhci);
+
+/*
+ * Intel's Panther Point chipset has two host controllers (EHCI and xHCI) that
+ * share some number of ports.  These ports can be switched between either
+ * controller.  Not all of the ports under the EHCI host controller may be
+ * switchable.
+ *
+ * The ports should be switched over to xHCI before PCI probes for any device
+ * start.  This avoids active devices under EHCI being disconnected during the
+ * port switchover, which could cause loss of data on USB storage devices, or
+ * failed boot when the root file system is on a USB mass storage device and is
+ * enumerated under EHCI first.
+ *
+ * We write into the xHC's PCI configuration space in some Intel-specific
+ * registers to switch the ports over.  The USB 3.0 terminations and the USB
+ * 2.0 data wires are switched separately.  We want to enable the SuperSpeed
+ * terminations before switching the USB 2.0 wires over, so that USB 3.0
+ * devices connect at SuperSpeed, rather than at USB 2.0 speeds.
+ */
+void usb_enable_xhci_ports(struct pci_dev *xhci_pdev)
+{
+	u32		ports_available;
+
+	ports_available = 0xffffffff;
+	/* Write USB3_PSSEN, the USB 3.0 Port SuperSpeed Enable
+	 * Register, to turn on SuperSpeed terminations for all
+	 * available ports.
+	 */
+	pci_write_config_dword(xhci_pdev, USB_INTEL_USB3_PSSEN,
+			cpu_to_le32(ports_available));
+
+	pci_read_config_dword(xhci_pdev, USB_INTEL_USB3_PSSEN,
+			&ports_available);
+	dev_dbg(&xhci_pdev->dev, "USB 3.0 ports that are now enabled "
+			"under xHCI: 0x%x\n", ports_available);
+
+	ports_available = 0xffffffff;
+	/* Write XUSB2PR, the xHC USB 2.0 Port Routing Register, to
+	 * switch the USB 2.0 power and data lines over to the xHCI
+	 * host.
+	 */
+	pci_write_config_dword(xhci_pdev, USB_INTEL_XUSB2PR,
+			cpu_to_le32(ports_available));
+
+	pci_read_config_dword(xhci_pdev, USB_INTEL_XUSB2PR,
+			&ports_available);
+	dev_dbg(&xhci_pdev->dev, "USB 2.0 ports that are now switched over "
+			"to xHCI: 0x%x\n", ports_available);
+}
+EXPORT_SYMBOL_GPL(usb_enable_xhci_ports);
+
 /**
  * PCI Quirks for xHCI.
  *
@@ -732,6 +793,8 @@
 	writel(XHCI_LEGACY_DISABLE_SMI,
 			base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);
 
+	if (usb_is_intel_switchable_xhci(pdev))
+		usb_enable_xhci_ports(pdev);
 hc_init:
 	op_reg_base = base + XHCI_HC_LENGTH(readl(base));
 

diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h
index 6ae9f78..b1002a8 100644
--- a/drivers/usb/host/pci-quirks.h
+++ b/drivers/usb/host/pci-quirks.h

@@ -8,6 +8,8 @@
 void usb_amd_dev_put(void);
 void usb_amd_quirk_pll_disable(void);
 void usb_amd_quirk_pll_enable(void);
+bool usb_is_intel_switchable_xhci(struct pci_dev *pdev);
+void usb_enable_xhci_ports(struct pci_dev *xhci_pdev);
 #else
 static inline void usb_amd_quirk_pll_disable(void) {}
 static inline void usb_amd_quirk_pll_enable(void) {}

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index cbc4d49..c408e9f 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c

@@ -118,6 +118,12 @@
 	/* AMD PLL quirk */
 	if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
 		xhci->quirks |= XHCI_AMD_PLL_FIX;
+	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+			pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) {
+		xhci->quirks |= XHCI_SPURIOUS_SUCCESS;
+		xhci->quirks |= XHCI_EP_LIMIT_QUIRK;
+		xhci->limit_active_eps = 64;
+	}
 
 	/* Make sure the HC is halted. */
 	retval = xhci_halt(xhci);
@@ -242,8 +248,28 @@
 static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
 {
 	struct xhci_hcd		*xhci = hcd_to_xhci(hcd);
+	struct pci_dev		*pdev = to_pci_dev(hcd->self.controller);
 	int			retval = 0;
 
+	/* The BIOS on systems with the Intel Panther Point chipset may or may
+	 * not support xHCI natively.  That means that during system resume, it
+	 * may switch the ports back to EHCI so that users can use their
+	 * keyboard to select a kernel from GRUB after resume from hibernate.
+	 *
+	 * The BIOS is supposed to remember whether the OS had xHCI ports
+	 * enabled before resume, and switch the ports back to xHCI when the
+	 * BIOS/OS semaphore is written, but we all know we can't trust BIOS
+	 * writers.
+	 *
+	 * Unconditionally switch the ports back to xHCI after a system resume.
+	 * We can't tell whether the EHCI or xHCI controller will be resumed
+	 * first, so we have to do the port switchover in both drivers.  Writing
+	 * a '1' to the port switchover registers should have no effect if the
+	 * port was already switched over.
+	 */
+	if (usb_is_intel_switchable_xhci(pdev))
+		usb_enable_xhci_ports(pdev);
+
 	retval = xhci_resume(xhci, hibernated);
 	return retval;
 }

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 237a765..cc1485b 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c

@@ -167,12 +167,6 @@
 		next = ring->dequeue;
 	}
 	addr = (unsigned long long) xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue);
-	if (ring == xhci->event_ring)
-		xhci_dbg(xhci, "Event ring deq = 0x%llx (DMA)\n", addr);
-	else if (ring == xhci->cmd_ring)
-		xhci_dbg(xhci, "Command ring deq = 0x%llx (DMA)\n", addr);
-	else
-		xhci_dbg(xhci, "Ring deq = 0x%llx (DMA)\n", addr);
 }
 
 /*
@@ -248,12 +242,6 @@
 		next = ring->enqueue;
 	}
 	addr = (unsigned long long) xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue);
-	if (ring == xhci->event_ring)
-		xhci_dbg(xhci, "Event ring enq = 0x%llx (DMA)\n", addr);
-	else if (ring == xhci->cmd_ring)
-		xhci_dbg(xhci, "Command ring enq = 0x%llx (DMA)\n", addr);
-	else
-		xhci_dbg(xhci, "Ring enq = 0x%llx (DMA)\n", addr);
 }
 
 /*
@@ -636,13 +624,11 @@
 			}
 		}
 		usb_hcd_unlink_urb_from_ep(hcd, urb);
-		xhci_dbg(xhci, "Giveback %s URB %p\n", adjective, urb);
 
 		spin_unlock(&xhci->lock);
 		usb_hcd_giveback_urb(hcd, urb, status);
 		xhci_urb_free_priv(xhci, urb_priv);
 		spin_lock(&xhci->lock);
-		xhci_dbg(xhci, "%s URB given back\n", adjective);
 	}
 }
 
@@ -692,6 +678,8 @@
 
 	if (list_empty(&ep->cancelled_td_list)) {
 		xhci_stop_watchdog_timer_in_irq(xhci, ep);
+		ep->stopped_td = NULL;
+		ep->stopped_trb = NULL;
 		ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 		return;
 	}
@@ -1093,8 +1081,13 @@
 		complete(&xhci->addr_dev);
 		break;
 	case TRB_TYPE(TRB_DISABLE_SLOT):
-		if (xhci->devs[slot_id])
+		if (xhci->devs[slot_id]) {
+			if (xhci->quirks & XHCI_EP_LIMIT_QUIRK)
+				/* Delete default control endpoint resources */
+				xhci_free_device_endpoint_resources(xhci,
+						xhci->devs[slot_id], true);
 			xhci_free_virt_device(xhci, slot_id);
+		}
 		break;
 	case TRB_TYPE(TRB_CONFIG_EP):
 		virt_dev = xhci->devs[slot_id];
@@ -1630,7 +1623,6 @@
 					"without IOC set??\n");
 			*status = -ESHUTDOWN;
 		} else {
-			xhci_dbg(xhci, "Successful control transfer!\n");
 			*status = 0;
 		}
 		break;
@@ -1727,7 +1719,6 @@
 	switch (trb_comp_code) {
 	case COMP_SUCCESS:
 		frame->status = 0;
-		xhci_dbg(xhci, "Successful isoc transfer!\n");
 		break;
 	case COMP_SHORT_TX:
 		frame->status = td->urb->transfer_flags & URB_SHORT_NOT_OK ?
@@ -1837,12 +1828,6 @@
 			else
 				*status = 0;
 		} else {
-			if (usb_endpoint_xfer_bulk(&td->urb->ep->desc))
-				xhci_dbg(xhci, "Successful bulk "
-						"transfer!\n");
-			else
-				xhci_dbg(xhci, "Successful interrupt "
-						"transfer!\n");
 			*status = 0;
 		}
 		break;
@@ -1856,11 +1841,12 @@
 		/* Others already handled above */
 		break;
 	}
-	xhci_dbg(xhci, "ep %#x - asked for %d bytes, "
-			"%d bytes untransferred\n",
-			td->urb->ep->desc.bEndpointAddress,
-			td->urb->transfer_buffer_length,
-		 TRB_LEN(le32_to_cpu(event->transfer_len)));
+	if (trb_comp_code == COMP_SHORT_TX)
+		xhci_dbg(xhci, "ep %#x - asked for %d bytes, "
+				"%d bytes untransferred\n",
+				td->urb->ep->desc.bEndpointAddress,
+				td->urb->transfer_buffer_length,
+				TRB_LEN(le32_to_cpu(event->transfer_len)));
 	/* Fast path - was this the last TRB in the TD for this URB? */
 	if (event_trb == td->last_trb) {
 		if (TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
@@ -1954,7 +1940,6 @@
 
 	/* Endpoint ID is 1 based, our index is zero based */
 	ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1;
-	xhci_dbg(xhci, "%s - ep index = %d\n", __func__, ep_index);
 	ep = &xdev->eps[ep_index];
 	ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
 	ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
@@ -2081,6 +2066,16 @@
 		if (!event_seg) {
 			if (!ep->skip ||
 			    !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
+				/* Some host controllers give a spurious
+				 * successful event after a short transfer.
+				 * Ignore it.
+				 */
+				if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && 
+						ep_ring->last_td_was_short) {
+					ep_ring->last_td_was_short = false;
+					ret = 0;
+					goto cleanup;
+				}
 				/* HC is busted, give up! */
 				xhci_err(xhci,
 					"ERROR Transfer event TRB DMA ptr not "
@@ -2091,6 +2086,10 @@
 			ret = skip_isoc_td(xhci, td, event, ep, &status);
 			goto cleanup;
 		}
+		if (trb_comp_code == COMP_SHORT_TX)
+			ep_ring->last_td_was_short = true;
+		else
+			ep_ring->last_td_was_short = false;
 
 		if (ep->skip) {
 			xhci_dbg(xhci, "Found td. Clear skip flag.\n");
@@ -2149,9 +2148,15 @@
 				xhci_urb_free_priv(xhci, urb_priv);
 
 			usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), urb);
-			xhci_dbg(xhci, "Giveback URB %p, len = %d, "
-					"status = %d\n",
-					urb, urb->actual_length, status);
+			if ((urb->actual_length != urb->transfer_buffer_length &&
+						(urb->transfer_flags &
+						 URB_SHORT_NOT_OK)) ||
+					status != 0)
+				xhci_dbg(xhci, "Giveback URB %p, len = %d, "
+						"expected = %x, status = %d\n",
+						urb, urb->actual_length,
+						urb->transfer_buffer_length,
+						status);
 			spin_unlock(&xhci->lock);
 			usb_hcd_giveback_urb(bus_to_hcd(urb->dev->bus), urb, status);
 			spin_lock(&xhci->lock);
@@ -2180,7 +2185,6 @@
 	int update_ptrs = 1;
 	int ret;
 
-	xhci_dbg(xhci, "In %s\n", __func__);
 	if (!xhci->event_ring || !xhci->event_ring->dequeue) {
 		xhci->error_bitmask |= 1 << 1;
 		return 0;
@@ -2193,7 +2197,6 @@
 		xhci->error_bitmask |= 1 << 2;
 		return 0;
 	}
-	xhci_dbg(xhci, "%s - OS owns TRB\n", __func__);
 
 	/*
 	 * Barrier between reading the TRB_CYCLE (valid) flag above and any
@@ -2203,20 +2206,14 @@
 	/* FIXME: Handle more event types. */
 	switch ((le32_to_cpu(event->event_cmd.flags) & TRB_TYPE_BITMASK)) {
 	case TRB_TYPE(TRB_COMPLETION):
-		xhci_dbg(xhci, "%s - calling handle_cmd_completion\n", __func__);
 		handle_cmd_completion(xhci, &event->event_cmd);
-		xhci_dbg(xhci, "%s - returned from handle_cmd_completion\n", __func__);
 		break;
 	case TRB_TYPE(TRB_PORT_STATUS):
-		xhci_dbg(xhci, "%s - calling handle_port_status\n", __func__);
 		handle_port_status(xhci, event);
-		xhci_dbg(xhci, "%s - returned from handle_port_status\n", __func__);
 		update_ptrs = 0;
 		break;
 	case TRB_TYPE(TRB_TRANSFER):
-		xhci_dbg(xhci, "%s - calling handle_tx_event\n", __func__);
 		ret = handle_tx_event(xhci, &event->trans_event);
-		xhci_dbg(xhci, "%s - returned from handle_tx_event\n", __func__);
 		if (ret < 0)
 			xhci->error_bitmask |= 1 << 9;
 		else
@@ -2273,16 +2270,6 @@
 		spin_unlock(&xhci->lock);
 		return IRQ_NONE;
 	}
-	xhci_dbg(xhci, "op reg status = %08x\n", status);
-	xhci_dbg(xhci, "Event ring dequeue ptr:\n");
-	xhci_dbg(xhci, "@%llx %08x %08x %08x %08x\n",
-		 (unsigned long long)
-		 xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, trb),
-		 lower_32_bits(le64_to_cpu(trb->link.segment_ptr)),
-		 upper_32_bits(le64_to_cpu(trb->link.segment_ptr)),
-		 (unsigned int) le32_to_cpu(trb->link.intr_target),
-		 (unsigned int) le32_to_cpu(trb->link.control));
-
 	if (status & STS_FATAL) {
 		xhci_warn(xhci, "WARNING: Host System Error\n");
 		xhci_halt(xhci);
@@ -2397,7 +2384,6 @@
 		u32 ep_state, unsigned int num_trbs, gfp_t mem_flags)
 {
 	/* Make sure the endpoint has been added to xHC schedule */
-	xhci_dbg(xhci, "Endpoint state = 0x%x\n", ep_state);
 	switch (ep_state) {
 	case EP_STATE_DISABLED:
 		/*
@@ -2434,7 +2420,6 @@
 		struct xhci_ring *ring = ep_ring;
 		union xhci_trb *next;
 
-		xhci_dbg(xhci, "prepare_ring: pointing to link trb\n");
 		next = ring->enqueue;
 
 		while (last_trb(xhci, ring, ring->enq_seg, next)) {

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 8f2a56e..d9660eb 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c

@@ -1314,8 +1314,10 @@
 	if (ret <= 0)
 		return ret;
 	xhci = hcd_to_xhci(hcd);
-	xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev);
+	if (xhci->xhc_state & XHCI_STATE_DYING)
+		return -ENODEV;
 
+	xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev);
 	drop_flag = xhci_get_endpoint_flag(&ep->desc);
 	if (drop_flag == SLOT_FLAG || drop_flag == EP0_FLAG) {
 		xhci_dbg(xhci, "xHCI %s - can't drop slot or ep 0 %#x\n",
@@ -1401,6 +1403,8 @@
 		return ret;
 	}
 	xhci = hcd_to_xhci(hcd);
+	if (xhci->xhc_state & XHCI_STATE_DYING)
+		return -ENODEV;
 
 	added_ctxs = xhci_get_endpoint_flag(&ep->desc);
 	last_ctx = xhci_last_valid_endpoint(added_ctxs);
@@ -1578,6 +1582,113 @@
 	return ret;
 }
 
+static u32 xhci_count_num_new_endpoints(struct xhci_hcd *xhci,
+		struct xhci_container_ctx *in_ctx)
+{
+	struct xhci_input_control_ctx *ctrl_ctx;
+	u32 valid_add_flags;
+	u32 valid_drop_flags;
+
+	ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx);
+	/* Ignore the slot flag (bit 0), and the default control endpoint flag
+	 * (bit 1).  The default control endpoint is added during the Address
+	 * Device command and is never removed until the slot is disabled.
+	 */
+	valid_add_flags = ctrl_ctx->add_flags >> 2;
+	valid_drop_flags = ctrl_ctx->drop_flags >> 2;
+
+	/* Use hweight32 to count the number of ones in the add flags, or
+	 * number of endpoints added.  Don't count endpoints that are changed
+	 * (both added and dropped).
+	 */
+	return hweight32(valid_add_flags) -
+		hweight32(valid_add_flags & valid_drop_flags);
+}
+
+static unsigned int xhci_count_num_dropped_endpoints(struct xhci_hcd *xhci,
+		struct xhci_container_ctx *in_ctx)
+{
+	struct xhci_input_control_ctx *ctrl_ctx;
+	u32 valid_add_flags;
+	u32 valid_drop_flags;
+
+	ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx);
+	valid_add_flags = ctrl_ctx->add_flags >> 2;
+	valid_drop_flags = ctrl_ctx->drop_flags >> 2;
+
+	return hweight32(valid_drop_flags) -
+		hweight32(valid_add_flags & valid_drop_flags);
+}
+
+/*
+ * We need to reserve the new number of endpoints before the configure endpoint
+ * command completes.  We can't subtract the dropped endpoints from the number
+ * of active endpoints until the command completes because we can oversubscribe
+ * the host in this case:
+ *
+ *  - the first configure endpoint command drops more endpoints than it adds
+ *  - a second configure endpoint command that adds more endpoints is queued
+ *  - the first configure endpoint command fails, so the config is unchanged
+ *  - the second command may succeed, even though there isn't enough resources
+ *
+ * Must be called with xhci->lock held.
+ */
+static int xhci_reserve_host_resources(struct xhci_hcd *xhci,
+		struct xhci_container_ctx *in_ctx)
+{
+	u32 added_eps;
+
+	added_eps = xhci_count_num_new_endpoints(xhci, in_ctx);
+	if (xhci->num_active_eps + added_eps > xhci->limit_active_eps) {
+		xhci_dbg(xhci, "Not enough ep ctxs: "
+				"%u active, need to add %u, limit is %u.\n",
+				xhci->num_active_eps, added_eps,
+				xhci->limit_active_eps);
+		return -ENOMEM;
+	}
+	xhci->num_active_eps += added_eps;
+	xhci_dbg(xhci, "Adding %u ep ctxs, %u now active.\n", added_eps,
+			xhci->num_active_eps);
+	return 0;
+}
+
+/*
+ * The configure endpoint was failed by the xHC for some other reason, so we
+ * need to revert the resources that failed configuration would have used.
+ *
+ * Must be called with xhci->lock held.
+ */
+static void xhci_free_host_resources(struct xhci_hcd *xhci,
+		struct xhci_container_ctx *in_ctx)
+{
+	u32 num_failed_eps;
+
+	num_failed_eps = xhci_count_num_new_endpoints(xhci, in_ctx);
+	xhci->num_active_eps -= num_failed_eps;
+	xhci_dbg(xhci, "Removing %u failed ep ctxs, %u now active.\n",
+			num_failed_eps,
+			xhci->num_active_eps);
+}
+
+/*
+ * Now that the command has completed, clean up the active endpoint count by
+ * subtracting out the endpoints that were dropped (but not changed).
+ *
+ * Must be called with xhci->lock held.
+ */
+static void xhci_finish_resource_reservation(struct xhci_hcd *xhci,
+		struct xhci_container_ctx *in_ctx)
+{
+	u32 num_dropped_eps;
+
+	num_dropped_eps = xhci_count_num_dropped_endpoints(xhci, in_ctx);
+	xhci->num_active_eps -= num_dropped_eps;
+	if (num_dropped_eps)
+		xhci_dbg(xhci, "Removing %u dropped ep ctxs, %u now active.\n",
+				num_dropped_eps,
+				xhci->num_active_eps);
+}
+
 /* Issue a configure endpoint command or evaluate context command
  * and wait for it to finish.
  */
@@ -1598,6 +1709,15 @@
 	virt_dev = xhci->devs[udev->slot_id];
 	if (command) {
 		in_ctx = command->in_ctx;
+		if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK) &&
+				xhci_reserve_host_resources(xhci, in_ctx)) {
+			spin_unlock_irqrestore(&xhci->lock, flags);
+			xhci_warn(xhci, "Not enough host resources, "
+					"active endpoint contexts = %u\n",
+					xhci->num_active_eps);
+			return -ENOMEM;
+		}
+
 		cmd_completion = command->completion;
 		cmd_status = &command->status;
 		command->command_trb = xhci->cmd_ring->enqueue;
@@ -1613,6 +1733,14 @@
 		list_add_tail(&command->cmd_list, &virt_dev->cmd_list);
 	} else {
 		in_ctx = virt_dev->in_ctx;
+		if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK) &&
+				xhci_reserve_host_resources(xhci, in_ctx)) {
+			spin_unlock_irqrestore(&xhci->lock, flags);
+			xhci_warn(xhci, "Not enough host resources, "
+					"active endpoint contexts = %u\n",
+					xhci->num_active_eps);
+			return -ENOMEM;
+		}
 		cmd_completion = &virt_dev->cmd_completion;
 		cmd_status = &virt_dev->cmd_status;
 	}
@@ -1627,6 +1755,8 @@
 	if (ret < 0) {
 		if (command)
 			list_del(&command->cmd_list);
+		if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK))
+			xhci_free_host_resources(xhci, in_ctx);
 		spin_unlock_irqrestore(&xhci->lock, flags);
 		xhci_dbg(xhci, "FIXME allocate a new ring segment\n");
 		return -ENOMEM;
@@ -1649,8 +1779,22 @@
 	}
 
 	if (!ctx_change)
-		return xhci_configure_endpoint_result(xhci, udev, cmd_status);
-	return xhci_evaluate_context_result(xhci, udev, cmd_status);
+		ret = xhci_configure_endpoint_result(xhci, udev, cmd_status);
+	else
+		ret = xhci_evaluate_context_result(xhci, udev, cmd_status);
+
+	if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) {
+		spin_lock_irqsave(&xhci->lock, flags);
+		/* If the command failed, remove the reserved resources.
+		 * Otherwise, clean up the estimate to include dropped eps.
+		 */
+		if (ret)
+			xhci_free_host_resources(xhci, in_ctx);
+		else
+			xhci_finish_resource_reservation(xhci, in_ctx);
+		spin_unlock_irqrestore(&xhci->lock, flags);
+	}
+	return ret;
 }
 
 /* Called after one or more calls to xhci_add_endpoint() or
@@ -1676,6 +1820,8 @@
 	if (ret <= 0)
 		return ret;
 	xhci = hcd_to_xhci(hcd);
+	if (xhci->xhc_state & XHCI_STATE_DYING)
+		return -ENODEV;
 
 	xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev);
 	virt_dev = xhci->devs[udev->slot_id];
@@ -2266,6 +2412,34 @@
 }
 
 /*
+ * Deletes endpoint resources for endpoints that were active before a Reset
+ * Device command, or a Disable Slot command.  The Reset Device command leaves
+ * the control endpoint intact, whereas the Disable Slot command deletes it.
+ *
+ * Must be called with xhci->lock held.
+ */
+void xhci_free_device_endpoint_resources(struct xhci_hcd *xhci,
+	struct xhci_virt_device *virt_dev, bool drop_control_ep)
+{
+	int i;
+	unsigned int num_dropped_eps = 0;
+	unsigned int drop_flags = 0;
+
+	for (i = (drop_control_ep ? 0 : 1); i < 31; i++) {
+		if (virt_dev->eps[i].ring) {
+			drop_flags |= 1 << i;
+			num_dropped_eps++;
+		}
+	}
+	xhci->num_active_eps -= num_dropped_eps;
+	if (num_dropped_eps)
+		xhci_dbg(xhci, "Dropped %u ep ctxs, flags = 0x%x, "
+				"%u now active.\n",
+				num_dropped_eps, drop_flags,
+				xhci->num_active_eps);
+}
+
+/*
  * This submits a Reset Device Command, which will set the device state to 0,
  * set the device address to 0, and disable all the endpoints except the default
  * control endpoint.  The USB core should come back and call
@@ -2406,6 +2580,14 @@
 		goto command_cleanup;
 	}
 
+	/* Free up host controller endpoint resources */
+	if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) {
+		spin_lock_irqsave(&xhci->lock, flags);
+		/* Don't delete the default control endpoint resources */
+		xhci_free_device_endpoint_resources(xhci, virt_dev, false);
+		spin_unlock_irqrestore(&xhci->lock, flags);
+	}
+
 	/* Everything but endpoint 0 is disabled, so free or cache the rings. */
 	last_freed_endpoint = 1;
 	for (i = 1; i < 31; ++i) {
@@ -2479,6 +2661,27 @@
 }
 
 /*
+ * Checks if we have enough host controller resources for the default control
+ * endpoint.
+ *
+ * Must be called with xhci->lock held.
+ */
+static int xhci_reserve_host_control_ep_resources(struct xhci_hcd *xhci)
+{
+	if (xhci->num_active_eps + 1 > xhci->limit_active_eps) {
+		xhci_dbg(xhci, "Not enough ep ctxs: "
+				"%u active, need to add 1, limit is %u.\n",
+				xhci->num_active_eps, xhci->limit_active_eps);
+		return -ENOMEM;
+	}
+	xhci->num_active_eps += 1;
+	xhci_dbg(xhci, "Adding 1 ep ctx, %u now active.\n",
+			xhci->num_active_eps);
+	return 0;
+}
+
+
+/*
  * Returns 0 if the xHC ran out of device slots, the Enable Slot command
  * timed out, or allocating memory failed.  Returns 1 on success.
  */
@@ -2513,24 +2716,39 @@
 		xhci_err(xhci, "Error while assigning device slot ID\n");
 		return 0;
 	}
-	/* xhci_alloc_virt_device() does not touch rings; no need to lock.
-	 * Use GFP_NOIO, since this function can be called from
+
+	if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) {
+		spin_lock_irqsave(&xhci->lock, flags);
+		ret = xhci_reserve_host_control_ep_resources(xhci);
+		if (ret) {
+			spin_unlock_irqrestore(&xhci->lock, flags);
+			xhci_warn(xhci, "Not enough host resources, "
+					"active endpoint contexts = %u\n",
+					xhci->num_active_eps);
+			goto disable_slot;
+		}
+		spin_unlock_irqrestore(&xhci->lock, flags);
+	}
+	/* Use GFP_NOIO, since this function can be called from
 	 * xhci_discover_or_reset_device(), which may be called as part of
 	 * mass storage driver error handling.
 	 */
 	if (!xhci_alloc_virt_device(xhci, xhci->slot_id, udev, GFP_NOIO)) {
-		/* Disable slot, if we can do it without mem alloc */
 		xhci_warn(xhci, "Could not allocate xHCI USB device data structures\n");
-		spin_lock_irqsave(&xhci->lock, flags);
-		if (!xhci_queue_slot_control(xhci, TRB_DISABLE_SLOT, udev->slot_id))
-			xhci_ring_cmd_db(xhci);
-		spin_unlock_irqrestore(&xhci->lock, flags);
-		return 0;
+		goto disable_slot;
 	}
 	udev->slot_id = xhci->slot_id;
 	/* Is this a LS or FS device under a HS hub? */
 	/* Hub or peripherial? */
 	return 1;
+
+disable_slot:
+	/* Disable slot, if we can do it without mem alloc */
+	spin_lock_irqsave(&xhci->lock, flags);
+	if (!xhci_queue_slot_control(xhci, TRB_DISABLE_SLOT, udev->slot_id))
+		xhci_ring_cmd_db(xhci);
+	spin_unlock_irqrestore(&xhci->lock, flags);
+	return 0;
 }
 
 /*

diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index e12db7cf..ac0196e 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h

@@ -1123,6 +1123,7 @@
 	 */
 	u32			cycle_state;
 	unsigned int		stream_id;
+	bool			last_td_was_short;
 };
 
 struct xhci_erst_entry {
@@ -1290,6 +1291,19 @@
 #define XHCI_RESET_EP_QUIRK	(1 << 1)
 #define XHCI_NEC_HOST		(1 << 2)
 #define XHCI_AMD_PLL_FIX	(1 << 3)
+#define XHCI_SPURIOUS_SUCCESS	(1 << 4)
+/*
+ * Certain Intel host controllers have a limit to the number of endpoint
+ * contexts they can handle.  Ideally, they would signal that they can't handle
+ * anymore endpoint contexts by returning a Resource Error for the Configure
+ * Endpoint command, but they don't.  Instead they expect software to keep track
+ * of the number of active endpoints for them, across configure endpoint
+ * commands, reset device commands, disable slot commands, and address device
+ * commands.
+ */
+#define XHCI_EP_LIMIT_QUIRK	(1 << 5)
+	unsigned int		num_active_eps;
+	unsigned int		limit_active_eps;
 	/* There are two roothubs to keep track of bus suspend info for */
 	struct xhci_bus_state   bus_state[2];
 	/* Is each xHCI roothub port a USB 3.0, USB 2.0, or USB 1.1 port? */
@@ -1338,9 +1352,6 @@
 static inline void xhci_writel(struct xhci_hcd *xhci,
 		const unsigned int val, __le32 __iomem *regs)
 {
-	xhci_dbg(xhci,
-			"`MEM_WRITE_DWORD(3'b000, 32'h%p, 32'h%0x, 4'hf);\n",
-			regs, val);
 	writel(val, regs);
 }
 
@@ -1368,9 +1379,6 @@
 	u32 val_lo = lower_32_bits(val);
 	u32 val_hi = upper_32_bits(val);
 
-	xhci_dbg(xhci,
-			"`MEM_WRITE_DWORD(3'b000, 64'h%p, 64'h%0lx, 4'hf);\n",
-			regs, (long unsigned int) val);
 	writel(val_lo, ptr);
 	writel(val_hi, ptr + 1);
 }
@@ -1439,6 +1447,8 @@
 void xhci_setup_no_streams_ep_input_ctx(struct xhci_hcd *xhci,
 		struct xhci_ep_ctx *ep_ctx,
 		struct xhci_virt_ep *ep);
+void xhci_free_device_endpoint_resources(struct xhci_hcd *xhci,
+	struct xhci_virt_device *virt_dev, bool drop_control_ep);
 struct xhci_ring *xhci_dma_to_transfer_ring(
 		struct xhci_virt_ep *ep,
 		u64 address);

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 2f7c76a..e224a92 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c

@@ -144,7 +144,7 @@
 	}
 
 	mutex_lock(&vq->mutex);
-	vhost_disable_notify(vq);
+	vhost_disable_notify(&net->dev, vq);
 
 	if (wmem < sock->sk->sk_sndbuf / 2)
 		tx_poll_stop(net);
@@ -166,8 +166,8 @@
 				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
 				break;
 			}
-			if (unlikely(vhost_enable_notify(vq))) {
-				vhost_disable_notify(vq);
+			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+				vhost_disable_notify(&net->dev, vq);
 				continue;
 			}
 			break;
@@ -315,7 +315,7 @@
 		return;
 
 	mutex_lock(&vq->mutex);
-	vhost_disable_notify(vq);
+	vhost_disable_notify(&net->dev, vq);
 	vhost_hlen = vq->vhost_hlen;
 	sock_hlen = vq->sock_hlen;
 
@@ -334,10 +334,10 @@
 			break;
 		/* OK, now we need to know about added descriptors. */
 		if (!headcount) {
-			if (unlikely(vhost_enable_notify(vq))) {
+			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
 				/* They have slipped one in as we were
 				 * doing that: check again. */
-				vhost_disable_notify(vq);
+				vhost_disable_notify(&net->dev, vq);
 				continue;
 			}
 			/* Nothing new?  Wait for eventfd to tell us

diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 099f302..734e1d7 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c

@@ -49,7 +49,7 @@
 		return;
 
 	mutex_lock(&vq->mutex);
-	vhost_disable_notify(vq);
+	vhost_disable_notify(&n->dev, vq);
 
 	for (;;) {
 		head = vhost_get_vq_desc(&n->dev, vq, vq->iov,
@@ -61,8 +61,8 @@
 			break;
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (head == vq->num) {
-			if (unlikely(vhost_enable_notify(vq))) {
-				vhost_disable_notify(vq);
+			if (unlikely(vhost_enable_notify(&n->dev, vq))) {
+				vhost_disable_notify(&n->dev, vq);
 				continue;
 			}
 			break;

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 7aa4eea..ea966b3 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c

@@ -37,6 +37,9 @@
 	VHOST_MEMORY_F_LOG = 0x1,
 };
 
+#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
+#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
+
 static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
 			    poll_table *pt)
 {
@@ -161,6 +164,8 @@
 	vq->last_avail_idx = 0;
 	vq->avail_idx = 0;
 	vq->last_used_idx = 0;
+	vq->signalled_used = 0;
+	vq->signalled_used_valid = false;
 	vq->used_flags = 0;
 	vq->log_used = false;
 	vq->log_addr = -1ull;
@@ -489,16 +494,17 @@
 	return 1;
 }
 
-static int vq_access_ok(unsigned int num,
+static int vq_access_ok(struct vhost_dev *d, unsigned int num,
 			struct vring_desc __user *desc,
 			struct vring_avail __user *avail,
 			struct vring_used __user *used)
 {
+	size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 	return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
 	       access_ok(VERIFY_READ, avail,
-			 sizeof *avail + num * sizeof *avail->ring) &&
+			 sizeof *avail + num * sizeof *avail->ring + s) &&
 	       access_ok(VERIFY_WRITE, used,
-			sizeof *used + num * sizeof *used->ring);
+			sizeof *used + num * sizeof *used->ring + s);
 }
 
 /* Can we log writes? */
@@ -514,9 +520,11 @@
 
 /* Verify access for write logging. */
 /* Caller should have vq mutex and device mutex */
-static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
+static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq,
+			    void __user *log_base)
 {
 	struct vhost_memory *mp;
+	size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 
 	mp = rcu_dereference_protected(vq->dev->memory,
 				       lockdep_is_held(&vq->mutex));
@@ -524,15 +532,15 @@
 			    vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
 		(!vq->log_used || log_access_ok(log_base, vq->log_addr,
 					sizeof *vq->used +
-					vq->num * sizeof *vq->used->ring));
+					vq->num * sizeof *vq->used->ring + s));
 }
 
 /* Can we start vq? */
 /* Caller should have vq mutex and device mutex */
 int vhost_vq_access_ok(struct vhost_virtqueue *vq)
 {
-	return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) &&
-		vq_log_access_ok(vq, vq->log_base);
+	return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) &&
+		vq_log_access_ok(vq->dev, vq, vq->log_base);
 }
 
 static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
@@ -577,6 +585,7 @@
 
 	if (r)
 		return r;
+	vq->signalled_used_valid = false;
 	return get_user(vq->last_used_idx, &used->idx);
 }
 
@@ -674,7 +683,7 @@
 		 * If it is not, we don't as size might not have been setup.
 		 * We will verify when backend is configured. */
 		if (vq->private_data) {
-			if (!vq_access_ok(vq->num,
+			if (!vq_access_ok(d, vq->num,
 				(void __user *)(unsigned long)a.desc_user_addr,
 				(void __user *)(unsigned long)a.avail_user_addr,
 				(void __user *)(unsigned long)a.used_user_addr)) {
@@ -818,7 +827,7 @@
 			vq = d->vqs + i;
 			mutex_lock(&vq->mutex);
 			/* If ring is inactive, will check when it's enabled. */
-			if (vq->private_data && !vq_log_access_ok(vq, base))
+			if (vq->private_data && !vq_log_access_ok(d, vq, base))
 				r = -EFAULT;
 			else
 				vq->log_base = base;
@@ -1219,6 +1228,10 @@
 
 	/* On success, increment avail index. */
 	vq->last_avail_idx++;
+
+	/* Assume notifications from guest are disabled at this point,
+	 * if they aren't we would need to update avail_event index. */
+	BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
 	return head;
 }
 
@@ -1267,6 +1280,12 @@
 			eventfd_signal(vq->log_ctx, 1);
 	}
 	vq->last_used_idx++;
+	/* If the driver never bothers to signal in a very long while,
+	 * used index might wrap around. If that happens, invalidate
+	 * signalled_used index we stored. TODO: make sure driver
+	 * signals at least once in 2^16 and remove this. */
+	if (unlikely(vq->last_used_idx == vq->signalled_used))
+		vq->signalled_used_valid = false;
 	return 0;
 }
 
@@ -1275,6 +1294,7 @@
 			    unsigned count)
 {
 	struct vring_used_elem __user *used;
+	u16 old, new;
 	int start;
 
 	start = vq->last_used_idx % vq->num;
@@ -1292,7 +1312,14 @@
 			   ((void __user *)used - (void __user *)vq->used),
 			  count * sizeof *used);
 	}
-	vq->last_used_idx += count;
+	old = vq->last_used_idx;
+	new = (vq->last_used_idx += count);
+	/* If the driver never bothers to signal in a very long while,
+	 * used index might wrap around. If that happens, invalidate
+	 * signalled_used index we stored. TODO: make sure driver
+	 * signals at least once in 2^16 and remove this. */
+	if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
+		vq->signalled_used_valid = false;
 	return 0;
 }
 
@@ -1331,29 +1358,47 @@
 	return r;
 }
 
-/* This actually signals the guest, using eventfd. */
-void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
-	__u16 flags;
-
+	__u16 old, new, event;
+	bool v;
 	/* Flush out used index updates. This is paired
 	 * with the barrier that the Guest executes when enabling
 	 * interrupts. */
 	smp_mb();
 
-	if (__get_user(flags, &vq->avail->flags)) {
-		vq_err(vq, "Failed to get flags");
-		return;
+	if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+	    unlikely(vq->avail_idx == vq->last_avail_idx))
+		return true;
+
+	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+		__u16 flags;
+		if (__get_user(flags, &vq->avail->flags)) {
+			vq_err(vq, "Failed to get flags");
+			return true;
+		}
+		return !(flags & VRING_AVAIL_F_NO_INTERRUPT);
 	}
+	old = vq->signalled_used;
+	v = vq->signalled_used_valid;
+	new = vq->signalled_used = vq->last_used_idx;
+	vq->signalled_used_valid = true;
 
-	/* If they don't want an interrupt, don't signal, unless empty. */
-	if ((flags & VRING_AVAIL_F_NO_INTERRUPT) &&
-	    (vq->avail_idx != vq->last_avail_idx ||
-	     !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY)))
-		return;
+	if (unlikely(!v))
+		return true;
 
+	if (get_user(event, vhost_used_event(vq))) {
+		vq_err(vq, "Failed to get used event idx");
+		return true;
+	}
+	return vring_need_event(event, new, old);
+}
+
+/* This actually signals the guest, using eventfd. */
+void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
 	/* Signal the Guest tell them we used something up. */
-	if (vq->call_ctx)
+	if (vq->call_ctx && vhost_notify(dev, vq))
 		eventfd_signal(vq->call_ctx, 1);
 }
 
@@ -1376,7 +1421,7 @@
 }
 
 /* OK, now we need to know about added descriptors. */
-bool vhost_enable_notify(struct vhost_virtqueue *vq)
+bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
 	u16 avail_idx;
 	int r;
@@ -1384,11 +1429,34 @@
 	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
 		return false;
 	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
-	r = put_user(vq->used_flags, &vq->used->flags);
-	if (r) {
-		vq_err(vq, "Failed to enable notification at %p: %d\n",
-		       &vq->used->flags, r);
-		return false;
+	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+		r = put_user(vq->used_flags, &vq->used->flags);
+		if (r) {
+			vq_err(vq, "Failed to enable notification at %p: %d\n",
+			       &vq->used->flags, r);
+			return false;
+		}
+	} else {
+		r = put_user(vq->avail_idx, vhost_avail_event(vq));
+		if (r) {
+			vq_err(vq, "Failed to update avail event index at %p: %d\n",
+			       vhost_avail_event(vq), r);
+			return false;
+		}
+	}
+	if (unlikely(vq->log_used)) {
+		void __user *used;
+		/* Make sure data is seen before log. */
+		smp_wmb();
+		used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ?
+			&vq->used->flags : vhost_avail_event(vq);
+		/* Log used flags or event index entry write. Both are 16 bit
+		 * fields. */
+		log_write(vq->log_base, vq->log_addr +
+			   (used - (void __user *)vq->used),
+			  sizeof(u16));
+		if (vq->log_ctx)
+			eventfd_signal(vq->log_ctx, 1);
 	}
 	/* They could have slipped one in as we were doing that: make
 	 * sure it's written, then check again. */
@@ -1404,15 +1472,17 @@
 }
 
 /* We don't need to be notified again. */
-void vhost_disable_notify(struct vhost_virtqueue *vq)
+void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
 	int r;
 
 	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
 		return;
 	vq->used_flags |= VRING_USED_F_NO_NOTIFY;
-	r = put_user(vq->used_flags, &vq->used->flags);
-	if (r)
-		vq_err(vq, "Failed to enable notification at %p: %d\n",
-		       &vq->used->flags, r);
+	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+		r = put_user(vq->used_flags, &vq->used->flags);
+		if (r)
+			vq_err(vq, "Failed to enable notification at %p: %d\n",
+			       &vq->used->flags, r);
+	}
 }

diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index b3363ae..8e03379 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h

@@ -84,6 +84,12 @@
 	/* Used flags */
 	u16 used_flags;
 
+	/* Last used index value we have signalled on */
+	u16 signalled_used;
+
+	/* Last used index value we have signalled on */
+	bool signalled_used_valid;
+
 	/* Log writes to used structure. */
 	bool log_used;
 	u64 log_addr;
@@ -149,8 +155,8 @@
 void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
 			       struct vring_used_elem *heads, unsigned count);
 void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
-void vhost_disable_notify(struct vhost_virtqueue *);
-bool vhost_enable_notify(struct vhost_virtqueue *);
+void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
+bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
 
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
 		    unsigned int log_num, u64 len);
@@ -162,11 +168,12 @@
 	} while (0)
 
 enum {
-	VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
-			 (1 << VIRTIO_RING_F_INDIRECT_DESC) |
-			 (1 << VHOST_F_LOG_ALL) |
-			 (1 << VHOST_NET_F_VIRTIO_NET_HDR) |
-			 (1 << VIRTIO_NET_F_MRG_RXBUF),
+	VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
+			 (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+			 (1ULL << VIRTIO_RING_F_EVENT_IDX) |
+			 (1ULL << VHOST_F_LOG_ALL) |
+			 (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
+			 (1ULL << VIRTIO_NET_F_MRG_RXBUF),
 };
 
 static inline int vhost_has_feature(struct vhost_dev *dev, int bit)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 0f1da45..e058ace 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c

@@ -40,9 +40,6 @@
 	/* Waiting for host to ack the pages we released. */
 	struct completion acked;
 
-	/* Do we have to tell Host *before* we reuse pages? */
-	bool tell_host_first;
-
 	/* The pages we've told the Host we're not using. */
 	unsigned int num_pages;
 	struct list_head pages;
@@ -151,13 +148,14 @@
 		vb->num_pages--;
 	}
 
-	if (vb->tell_host_first) {
-		tell_host(vb, vb->deflate_vq);
-		release_pages_by_pfn(vb->pfns, vb->num_pfns);
-	} else {
-		release_pages_by_pfn(vb->pfns, vb->num_pfns);
-		tell_host(vb, vb->deflate_vq);
-	}
+
+	/*
+	 * Note that if
+	 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
+	 * is true, we *have* to do it in this order
+	 */
+	tell_host(vb, vb->deflate_vq);
+	release_pages_by_pfn(vb->pfns, vb->num_pfns);
 }
 
 static inline void update_stat(struct virtio_balloon *vb, int idx,
@@ -325,9 +323,6 @@
 		goto out_del_vqs;
 	}
 
-	vb->tell_host_first
-		= virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
-
 	return 0;
 
 out_del_vqs:

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b0043fb..68b9136 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c

@@ -82,6 +82,9 @@
 	/* Host supports indirect buffers */
 	bool indirect;
 
+	/* Host publishes avail event idx */
+	bool event;
+
 	/* Number of free buffers */
 	unsigned int num_free;
 	/* Head of free buffer list. */
@@ -237,18 +240,22 @@
 void virtqueue_kick(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 new, old;
 	START_USE(vq);
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
 	virtio_wmb();
 
-	vq->vring.avail->idx += vq->num_added;
+	old = vq->vring.avail->idx;
+	new = vq->vring.avail->idx = old + vq->num_added;
 	vq->num_added = 0;
 
 	/* Need to update avail index before checking if we should notify */
 	virtio_mb();
 
-	if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
+	if (vq->event ?
+	    vring_need_event(vring_avail_event(&vq->vring), new, old) :
+	    !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
 		/* Prod other side to tell it about changes. */
 		vq->notify(&vq->vq);
 
@@ -324,6 +331,14 @@
 	ret = vq->data[i];
 	detach_buf(vq, i);
 	vq->last_used_idx++;
+	/* If we expect an interrupt for the next entry, tell host
+	 * by writing event index and flush out the write before
+	 * the read in the next get_buf call. */
+	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
+		vring_used_event(&vq->vring) = vq->last_used_idx;
+		virtio_mb();
+	}
+
 	END_USE(vq);
 	return ret;
 }
@@ -345,7 +360,11 @@
 
 	/* We optimistically turn back on interrupts, then check if there was
 	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always do both to keep code simple. */
 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+	vring_used_event(&vq->vring) = vq->last_used_idx;
 	virtio_mb();
 	if (unlikely(more_used(vq))) {
 		END_USE(vq);
@@ -357,6 +376,33 @@
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 
+bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 bufs;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always do both to keep code simple. */
+	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+	/* TODO: tune this threshold */
+	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
+	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
+	virtio_mb();
+	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
+}
+EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
+
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -438,6 +484,7 @@
 #endif
 
 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback)
@@ -472,6 +519,8 @@
 		switch (i) {
 		case VIRTIO_RING_F_INDIRECT_DESC:
 			break;
+		case VIRTIO_RING_F_EVENT_IDX:
+			break;
 		default:
 			/* We don't understand this bit. */
 			clear_bit(i, vdev->features);

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8d7f3e6..7f6c677 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c

@@ -814,7 +814,6 @@
 
 int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
 {
-	dentry_unhash(d);
 	return v9fs_remove(i, d, 1);
 }
 
@@ -840,9 +839,6 @@
 	struct p9_fid *newdirfid;
 	struct p9_wstat wstat;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	P9_DPRINTK(P9_DEBUG_VFS, "\n");
 	retval = 0;
 	old_inode = old_dentry->d_inode;

diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 03330e2..e3e9efc 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c

@@ -320,8 +320,6 @@
 		 dentry->d_inode->i_ino,
 		 (int)dentry->d_name.len, dentry->d_name.name);
 
-	dentry_unhash(dentry);
-
 	return affs_remove_header(dentry);
 }
 
@@ -419,9 +417,6 @@
 	struct buffer_head *bh = NULL;
 	int retval;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n",
 		 (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name,
 		 (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name);

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 2c4e051..20c106f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c

@@ -845,8 +845,6 @@
 	_enter("{%x:%u},{%s}",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
 
-	dentry_unhash(dentry);
-
 	ret = -ENAMETOOLONG;
 	if (dentry->d_name.len >= AFSNAMEMAX)
 		goto error;
@@ -1148,9 +1146,6 @@
 	struct key *key;
 	int ret;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	vnode = AFS_FS_I(old_dentry->d_inode);
 	orig_dvnode = AFS_FS_I(old_dir);
 	new_dvnode = AFS_FS_I(new_dir);

diff --git a/fs/attr.c b/fs/attr.c
index 91dbe2a..caf2aa5 100644
--- a/fs/attr.c
+++ b/fs/attr.c

@@ -175,6 +175,13 @@
 			return -EPERM;
 	}
 
+	if ((ia_valid & ATTR_MODE)) {
+		mode_t amode = attr->ia_mode;
+		/* Flag setting protected by i_mutex */
+		if (is_sxid(amode))
+			inode->i_flags &= ~S_NOSEC;
+	}
+
 	now = current_fs_time(inode->i_sb);
 
 	attr->ia_ctime = now;

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 87d95a8..f55ae23 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c

@@ -583,8 +583,6 @@
 	if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	dentry_unhash(dentry);
-
 	if (atomic_dec_and_test(&ino->count)) {
 		p_ino = autofs4_dentry_ino(dentry->d_parent);
 		if (p_ino && dentry->d_parent != dentry)

diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index c7d1d06..b14cebf 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c

@@ -224,9 +224,6 @@
 	struct bfs_sb_info *info;
 	int error = -ENOENT;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	old_bh = new_bh = NULL;
 	old_inode = old_dentry->d_inode;
 	if (S_ISDIR(old_inode->i_mode))

diff --git a/fs/bio.c b/fs/bio.c
index 840a0d7..9bfade8 100644
--- a/fs/bio.c
+++ b/fs/bio.c

@@ -638,10 +638,11 @@
  *	@offset: vec entry offset
  *
  *	Attempt to add a page to the bio_vec maplist. This can fail for a
- *	number of reasons, such as the bio being full or target block
- *	device limitations. The target block device must allow bio's
- *      smaller than PAGE_SIZE, so it is always possible to add a single
- *      page to an empty bio. This should only be used by REQ_PC bios.
+ *	number of reasons, such as the bio being full or target block device
+ *	limitations. The target block device must allow bio's up to PAGE_SIZE,
+ *	so it is always possible to add a single page to an empty bio.
+ *
+ *	This should only be used by REQ_PC bios.
  */
 int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
 		    unsigned int len, unsigned int offset)
@@ -659,10 +660,9 @@
  *	@offset: vec entry offset
  *
  *	Attempt to add a page to the bio_vec maplist. This can fail for a
- *	number of reasons, such as the bio being full or target block
- *	device limitations. The target block device must allow bio's
- *      smaller than PAGE_SIZE, so it is always possible to add a single
- *      page to an empty bio.
+ *	number of reasons, such as the bio being full or target block device
+ *	limitations. The target block device must allow bio's up to PAGE_SIZE,
+ *	so it is always possible to add a single page to an empty bio.
  */
 int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
 		 unsigned int offset)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 332323e..6c093fa9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h

@@ -2524,7 +2524,7 @@
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_evict_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
-void btrfs_dirty_inode(struct inode *inode);
+void btrfs_dirty_inode(struct inode *inode, int flags);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
 int btrfs_drop_inode(struct inode *inode);

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bb51bb1..39a9d57 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c

@@ -4294,7 +4294,7 @@
  * FIXME, needs more benchmarking...there are no reasons other than performance
  * to keep or drop this code.
  */
-void btrfs_dirty_inode(struct inode *inode)
+void btrfs_dirty_inode(struct inode *inode, int flags)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;

diff --git a/fs/buffer.c b/fs/buffer.c
index 698c6b2..49c9aad 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c

@@ -2382,6 +2382,7 @@
 		ret = -EAGAIN;
 		goto out_unlock;
 	}
+	wait_on_page_writeback(page);
 	return 0;
 out_unlock:
 	unlock_page(page);

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 8f17006..21de1d6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c

@@ -74,8 +74,9 @@
  * Run idmap cache shrinker.
  */
 static int
-cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+cifs_idmap_shrinker(struct shrinker *shrink, struct shrink_control *sc)
 {
+	int nr_to_scan = sc->nr_to_scan;
 	int nr_del = 0;
 	int nr_rem = 0;
 	struct rb_root *root;

diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index a46126f..2b8dae4 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c

@@ -336,8 +336,6 @@
 	int len = de->d_name.len;
 	int error;
 
-	dentry_unhash(de);
-
 	error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len);
 	if (!error) {
 		/* VFS may delete the child */
@@ -361,9 +359,6 @@
 	int new_length = new_dentry->d_name.len;
 	int error;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	error = venus_rename(old_dir->i_sb, coda_i2f(old_dir),
 			     coda_i2f(new_dir), old_length, new_length,
 			     (const char *) old_name, (const char *)new_name);

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 9d17d35..9a37a9b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c

@@ -1359,8 +1359,6 @@
 	struct module *subsys_owner = NULL, *dead_item_owner = NULL;
 	int ret;
 
-	dentry_unhash(dentry);
-
 	if (dentry->d_parent == configfs_sb->s_root)
 		return -EPERM;
 

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index b8d5c80..58609bd 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c

@@ -1024,25 +1024,25 @@
 }
 
 /**
- * contains_ecryptfs_marker - check for the ecryptfs marker
+ * ecryptfs_validate_marker - check for the ecryptfs marker
  * @data: The data block in which to check
  *
- * Returns one if marker found; zero if not found
+ * Returns zero if marker found; -EINVAL if not found
  */
-static int contains_ecryptfs_marker(char *data)
+static int ecryptfs_validate_marker(char *data)
 {
 	u32 m_1, m_2;
 
 	m_1 = get_unaligned_be32(data);
 	m_2 = get_unaligned_be32(data + 4);
 	if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
-		return 1;
+		return 0;
 	ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
 			"MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2,
 			MAGIC_ECRYPTFS_MARKER);
 	ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = "
 			"[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER));
-	return 0;
+	return -EINVAL;
 }
 
 struct ecryptfs_flag_map_elem {
@@ -1201,27 +1201,19 @@
 	return rc;
 }
 
-int ecryptfs_read_and_validate_header_region(char *data,
-					     struct inode *ecryptfs_inode)
+int ecryptfs_read_and_validate_header_region(struct inode *inode)
 {
-	struct ecryptfs_crypt_stat *crypt_stat =
-		&(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
+	u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES];
+	u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES;
 	int rc;
 
-	if (crypt_stat->extent_size == 0)
-		crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
-	rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size,
-				 ecryptfs_inode);
-	if (rc < 0) {
-		printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
-		       __func__, rc);
-		goto out;
-	}
-	if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
-		rc = -EINVAL;
-	} else
-		rc = 0;
-out:
+	rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES,
+				 inode);
+	if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES)
+		return rc >= 0 ? -EINVAL : rc;
+	rc = ecryptfs_validate_marker(marker);
+	if (!rc)
+		ecryptfs_i_size_init(file_size, inode);
 	return rc;
 }
 
@@ -1242,8 +1234,7 @@
 	(*written) = 6;
 }
 
-struct kmem_cache *ecryptfs_header_cache_1;
-struct kmem_cache *ecryptfs_header_cache_2;
+struct kmem_cache *ecryptfs_header_cache;
 
 /**
  * ecryptfs_write_headers_virt
@@ -1496,11 +1487,9 @@
 	crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private(
 		ecryptfs_dentry->d_sb)->mount_crypt_stat;
 	offset = ECRYPTFS_FILE_SIZE_BYTES;
-	rc = contains_ecryptfs_marker(page_virt + offset);
-	if (rc == 0) {
-		rc = -EINVAL;
+	rc = ecryptfs_validate_marker(page_virt + offset);
+	if (rc)
 		goto out;
-	}
 	if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED))
 		ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
 	offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
@@ -1567,20 +1556,21 @@
 	return rc;
 }
 
-int ecryptfs_read_and_validate_xattr_region(char *page_virt,
-					    struct dentry *ecryptfs_dentry)
+int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry,
+					    struct inode *inode)
 {
+	u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES];
+	u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES;
 	int rc;
 
-	rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry->d_inode);
-	if (rc)
-		goto out;
-	if (!contains_ecryptfs_marker(page_virt	+ ECRYPTFS_FILE_SIZE_BYTES)) {
-		printk(KERN_WARNING "Valid data found in [%s] xattr, but "
-			"the marker is invalid\n", ECRYPTFS_XATTR_NAME);
-		rc = -EINVAL;
-	}
-out:
+	rc = ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry),
+				     ECRYPTFS_XATTR_NAME, file_size,
+				     ECRYPTFS_SIZE_AND_MARKER_BYTES);
+	if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES)
+		return rc >= 0 ? -EINVAL : rc;
+	rc = ecryptfs_validate_marker(marker);
+	if (!rc)
+		ecryptfs_i_size_init(file_size, inode);
 	return rc;
 }
 
@@ -1610,7 +1600,7 @@
 	ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
 						      mount_crypt_stat);
 	/* Read the first page from the underlying file */
-	page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER);
+	page_virt = kmem_cache_alloc(ecryptfs_header_cache, GFP_USER);
 	if (!page_virt) {
 		rc = -ENOMEM;
 		printk(KERN_ERR "%s: Unable to allocate page_virt\n",
@@ -1655,7 +1645,7 @@
 out:
 	if (page_virt) {
 		memset(page_virt, 0, PAGE_CACHE_SIZE);
-		kmem_cache_free(ecryptfs_header_cache_1, page_virt);
+		kmem_cache_free(ecryptfs_header_cache, page_virt);
 	}
 	return rc;
 }

diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index e702827..43c7c43 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h

@@ -200,6 +200,8 @@
 #define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5
 #define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8	/* 4*2 */
 #define ECRYPTFS_FILE_SIZE_BYTES (sizeof(u64))
+#define ECRYPTFS_SIZE_AND_MARKER_BYTES (ECRYPTFS_FILE_SIZE_BYTES \
+					+ MAGIC_ECRYPTFS_MARKER_SIZE_BYTES)
 #define ECRYPTFS_DEFAULT_CIPHER "aes"
 #define ECRYPTFS_DEFAULT_KEY_BYTES 16
 #define ECRYPTFS_DEFAULT_HASH "md5"
@@ -603,8 +605,7 @@
 extern struct kmem_cache *ecryptfs_dentry_info_cache;
 extern struct kmem_cache *ecryptfs_inode_info_cache;
 extern struct kmem_cache *ecryptfs_sb_info_cache;
-extern struct kmem_cache *ecryptfs_header_cache_1;
-extern struct kmem_cache *ecryptfs_header_cache_2;
+extern struct kmem_cache *ecryptfs_header_cache;
 extern struct kmem_cache *ecryptfs_xattr_cache;
 extern struct kmem_cache *ecryptfs_key_record_cache;
 extern struct kmem_cache *ecryptfs_key_sig_cache;
@@ -625,14 +626,9 @@
 	struct list_head kthread_ctl_list;
 };
 
-#define ECRYPTFS_INTERPOSE_FLAG_D_ADD                 0x00000001
-int ecryptfs_interpose(struct dentry *hidden_dentry,
-		       struct dentry *this_dentry, struct super_block *sb,
-		       u32 flags);
+struct inode *ecryptfs_get_inode(struct inode *lower_inode,
+				 struct super_block *sb);
 void ecryptfs_i_size_init(const char *page_virt, struct inode *inode);
-int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
-					struct dentry *lower_dentry,
-					struct inode *ecryptfs_dir_inode);
 int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
 					 size_t *decrypted_name_size,
 					 struct dentry *ecryptfs_dentry,
@@ -664,10 +660,9 @@
 void ecryptfs_write_crypt_stat_flags(char *page_virt,
 				     struct ecryptfs_crypt_stat *crypt_stat,
 				     size_t *written);
-int ecryptfs_read_and_validate_header_region(char *data,
-					     struct inode *ecryptfs_inode);
-int ecryptfs_read_and_validate_xattr_region(char *page_virt,
-					    struct dentry *ecryptfs_dentry);
+int ecryptfs_read_and_validate_header_region(struct inode *inode);
+int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry,
+					    struct inode *inode);
 u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes);
 int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code);
 void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat);
@@ -679,9 +674,6 @@
 ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 			  unsigned char *src, struct dentry *ecryptfs_dentry);
 int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
-int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
-int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
-void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
 ssize_t
 ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
 			void *value, size_t size);
@@ -761,7 +753,7 @@
 			     struct dentry *lower_dentry,
 			     struct vfsmount *lower_mnt,
 			     const struct cred *cred);
-int ecryptfs_get_lower_file(struct dentry *ecryptfs_dentry);
+int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode);
 void ecryptfs_put_lower_file(struct inode *inode);
 int
 ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,

diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 566e547..4ec9eb0 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c

@@ -191,7 +191,7 @@
 				      | ECRYPTFS_ENCRYPTED);
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
-	rc = ecryptfs_get_lower_file(ecryptfs_dentry);
+	rc = ecryptfs_get_lower_file(ecryptfs_dentry, inode);
 	if (rc) {
 		printk(KERN_ERR "%s: Error attempting to initialize "
 			"the lower file for the dentry with name "

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index bc116b9..7349ade 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c

@@ -51,6 +51,97 @@
 	dput(dir);
 }
 
+static int ecryptfs_inode_test(struct inode *inode, void *lower_inode)
+{
+	if (ecryptfs_inode_to_lower(inode) == (struct inode *)lower_inode)
+		return 1;
+	return 0;
+}
+
+static int ecryptfs_inode_set(struct inode *inode, void *opaque)
+{
+	struct inode *lower_inode = opaque;
+
+	ecryptfs_set_inode_lower(inode, lower_inode);
+	fsstack_copy_attr_all(inode, lower_inode);
+	/* i_size will be overwritten for encrypted regular files */
+	fsstack_copy_inode_size(inode, lower_inode);
+	inode->i_ino = lower_inode->i_ino;
+	inode->i_version++;
+	inode->i_mapping->a_ops = &ecryptfs_aops;
+
+	if (S_ISLNK(inode->i_mode))
+		inode->i_op = &ecryptfs_symlink_iops;
+	else if (S_ISDIR(inode->i_mode))
+		inode->i_op = &ecryptfs_dir_iops;
+	else
+		inode->i_op = &ecryptfs_main_iops;
+
+	if (S_ISDIR(inode->i_mode))
+		inode->i_fop = &ecryptfs_dir_fops;
+	else if (special_file(inode->i_mode))
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
+	else
+		inode->i_fop = &ecryptfs_main_fops;
+
+	return 0;
+}
+
+static struct inode *__ecryptfs_get_inode(struct inode *lower_inode,
+					  struct super_block *sb)
+{
+	struct inode *inode;
+
+	if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb))
+		return ERR_PTR(-EXDEV);
+	if (!igrab(lower_inode))
+		return ERR_PTR(-ESTALE);
+	inode = iget5_locked(sb, (unsigned long)lower_inode,
+			     ecryptfs_inode_test, ecryptfs_inode_set,
+			     lower_inode);
+	if (!inode) {
+		iput(lower_inode);
+		return ERR_PTR(-EACCES);
+	}
+	if (!(inode->i_state & I_NEW))
+		iput(lower_inode);
+
+	return inode;
+}
+
+struct inode *ecryptfs_get_inode(struct inode *lower_inode,
+				 struct super_block *sb)
+{
+	struct inode *inode = __ecryptfs_get_inode(lower_inode, sb);
+
+	if (!IS_ERR(inode) && (inode->i_state & I_NEW))
+		unlock_new_inode(inode);
+
+	return inode;
+}
+
+/**
+ * ecryptfs_interpose
+ * @lower_dentry: Existing dentry in the lower filesystem
+ * @dentry: ecryptfs' dentry
+ * @sb: ecryptfs's super_block
+ *
+ * Interposes upper and lower dentries.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_interpose(struct dentry *lower_dentry,
+			      struct dentry *dentry, struct super_block *sb)
+{
+	struct inode *inode = ecryptfs_get_inode(lower_dentry->d_inode, sb);
+
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	d_instantiate(dentry, inode);
+
+	return 0;
+}
+
 /**
  * ecryptfs_create_underlying_file
  * @lower_dir_inode: inode of the parent in the lower fs of the new file
@@ -129,7 +220,7 @@
 		goto out_lock;
 	}
 	rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
-				directory_inode->i_sb, 0);
+				directory_inode->i_sb);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n");
 		goto out_lock;
@@ -168,7 +259,8 @@
 				"context; rc = [%d]\n", rc);
 		goto out;
 	}
-	rc = ecryptfs_get_lower_file(ecryptfs_dentry);
+	rc = ecryptfs_get_lower_file(ecryptfs_dentry,
+				     ecryptfs_dentry->d_inode);
 	if (rc) {
 		printk(KERN_ERR "%s: Error attempting to initialize "
 			"the lower file for the dentry with name "
@@ -215,102 +307,90 @@
 	return rc;
 }
 
-/**
- * ecryptfs_lookup_and_interpose_lower - Perform a lookup
- */
-int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
-					struct dentry *lower_dentry,
-					struct inode *ecryptfs_dir_inode)
+static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode)
 {
-	struct dentry *lower_dir_dentry;
-	struct vfsmount *lower_mnt;
-	struct inode *lower_inode;
 	struct ecryptfs_crypt_stat *crypt_stat;
-	char *page_virt = NULL;
-	int put_lower = 0, rc = 0;
+	int rc;
 
-	lower_dir_dentry = lower_dentry->d_parent;
-	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
-				   ecryptfs_dentry->d_parent));
-	lower_inode = lower_dentry->d_inode;
-	fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
-	BUG_ON(!lower_dentry->d_count);
-	ecryptfs_set_dentry_private(ecryptfs_dentry,
-				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
-						     GFP_KERNEL));
-	if (!ecryptfs_dentry_to_private(ecryptfs_dentry)) {
-		rc = -ENOMEM;
-		printk(KERN_ERR "%s: Out of memory whilst attempting "
-		       "to allocate ecryptfs_dentry_info struct\n",
-			__func__);
-		goto out_put;
-	}
-	ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry);
-	ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt);
-	if (!lower_dentry->d_inode) {
-		/* We want to add because we couldn't find in lower */
-		d_add(ecryptfs_dentry, NULL);
-		goto out;
-	}
-	rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
-				ecryptfs_dir_inode->i_sb,
-				ECRYPTFS_INTERPOSE_FLAG_D_ADD);
-	if (rc) {
-		printk(KERN_ERR "%s: Error interposing; rc = [%d]\n",
-		       __func__, rc);
-		goto out;
-	}
-	if (S_ISDIR(lower_inode->i_mode))
-		goto out;
-	if (S_ISLNK(lower_inode->i_mode))
-		goto out;
-	if (special_file(lower_inode->i_mode))
-		goto out;
-	/* Released in this function */
-	page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
-	if (!page_virt) {
-		printk(KERN_ERR "%s: Cannot kmem_cache_zalloc() a page\n",
-		       __func__);
-		rc = -ENOMEM;
-		goto out;
-	}
-	rc = ecryptfs_get_lower_file(ecryptfs_dentry);
+	rc = ecryptfs_get_lower_file(dentry, inode);
 	if (rc) {
 		printk(KERN_ERR "%s: Error attempting to initialize "
 			"the lower file for the dentry with name "
 			"[%s]; rc = [%d]\n", __func__,
-			ecryptfs_dentry->d_name.name, rc);
-		goto out_free_kmem;
+			dentry->d_name.name, rc);
+		return rc;
 	}
-	put_lower = 1;
-	crypt_stat = &ecryptfs_inode_to_private(
-					ecryptfs_dentry->d_inode)->crypt_stat;
+
+	crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
 	/* TODO: lock for crypt_stat comparison */
 	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
-			ecryptfs_set_default_sizes(crypt_stat);
-	rc = ecryptfs_read_and_validate_header_region(page_virt,
-						      ecryptfs_dentry->d_inode);
+		ecryptfs_set_default_sizes(crypt_stat);
+
+	rc = ecryptfs_read_and_validate_header_region(inode);
+	ecryptfs_put_lower_file(inode);
 	if (rc) {
-		memset(page_virt, 0, PAGE_CACHE_SIZE);
-		rc = ecryptfs_read_and_validate_xattr_region(page_virt,
-							     ecryptfs_dentry);
-		if (rc) {
-			rc = 0;
-			goto out_free_kmem;
-		}
-		crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
+		rc = ecryptfs_read_and_validate_xattr_region(dentry, inode);
+		if (!rc)
+			crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
 	}
-	ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
-out_free_kmem:
-	kmem_cache_free(ecryptfs_header_cache_2, page_virt);
-	goto out;
-out_put:
-	dput(lower_dentry);
-	mntput(lower_mnt);
-	d_drop(ecryptfs_dentry);
-out:
-	if (put_lower)
-		ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
+
+	/* Must return 0 to allow non-eCryptfs files to be looked up, too */
+	return 0;
+}
+
+/**
+ * ecryptfs_lookup_interpose - Dentry interposition for a lookup
+ */
+static int ecryptfs_lookup_interpose(struct dentry *dentry,
+				     struct dentry *lower_dentry,
+				     struct inode *dir_inode)
+{
+	struct inode *inode, *lower_inode = lower_dentry->d_inode;
+	struct ecryptfs_dentry_info *dentry_info;
+	struct vfsmount *lower_mnt;
+	int rc = 0;
+
+	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
+	fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
+	BUG_ON(!lower_dentry->d_count);
+
+	dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
+	ecryptfs_set_dentry_private(dentry, dentry_info);
+	if (!dentry_info) {
+		printk(KERN_ERR "%s: Out of memory whilst attempting "
+		       "to allocate ecryptfs_dentry_info struct\n",
+			__func__);
+		dput(lower_dentry);
+		mntput(lower_mnt);
+		d_drop(dentry);
+		return -ENOMEM;
+	}
+	ecryptfs_set_dentry_lower(dentry, lower_dentry);
+	ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
+
+	if (!lower_dentry->d_inode) {
+		/* We want to add because we couldn't find in lower */
+		d_add(dentry, NULL);
+		return 0;
+	}
+	inode = __ecryptfs_get_inode(lower_inode, dir_inode->i_sb);
+	if (IS_ERR(inode)) {
+		printk(KERN_ERR "%s: Error interposing; rc = [%ld]\n",
+		       __func__, PTR_ERR(inode));
+		return PTR_ERR(inode);
+	}
+	if (S_ISREG(inode->i_mode)) {
+		rc = ecryptfs_i_size_read(dentry, inode);
+		if (rc) {
+			make_bad_inode(inode);
+			return rc;
+		}
+	}
+
+	if (inode->i_state & I_NEW)
+		unlock_new_inode(inode);
+	d_add(dentry, inode);
+
 	return rc;
 }
 
@@ -353,12 +433,12 @@
 		goto out_d_drop;
 	}
 	if (lower_dentry->d_inode)
-		goto lookup_and_interpose;
+		goto interpose;
 	mount_crypt_stat = &ecryptfs_superblock_to_private(
 				ecryptfs_dentry->d_sb)->mount_crypt_stat;
 	if (!(mount_crypt_stat
 	    && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
-		goto lookup_and_interpose;
+		goto interpose;
 	dput(lower_dentry);
 	rc = ecryptfs_encrypt_and_encode_filename(
 		&encrypted_and_encoded_name, &encrypted_and_encoded_name_size,
@@ -381,9 +461,9 @@
 				encrypted_and_encoded_name);
 		goto out_d_drop;
 	}
-lookup_and_interpose:
-	rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
-						 ecryptfs_dir_inode);
+interpose:
+	rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry,
+				       ecryptfs_dir_inode);
 	goto out;
 out_d_drop:
 	d_drop(ecryptfs_dentry);
@@ -411,7 +491,7 @@
 		      lower_new_dentry);
 	if (rc || !lower_new_dentry->d_inode)
 		goto out_lock;
-	rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
+	rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
 	if (rc)
 		goto out_lock;
 	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
@@ -478,7 +558,7 @@
 	kfree(encoded_symname);
 	if (rc || !lower_dentry->d_inode)
 		goto out_lock;
-	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
+	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
 	if (rc)
 		goto out_lock;
 	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
@@ -502,7 +582,7 @@
 	rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode);
 	if (rc || !lower_dentry->d_inode)
 		goto out;
-	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
+	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
 	if (rc)
 		goto out;
 	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
@@ -521,8 +601,6 @@
 	struct dentry *lower_dir_dentry;
 	int rc;
 
-	dentry_unhash(dentry);
-
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	dget(dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
@@ -552,7 +630,7 @@
 	rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev);
 	if (rc || !lower_dentry->d_inode)
 		goto out;
-	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
+	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
 	if (rc)
 		goto out;
 	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
@@ -575,9 +653,6 @@
 	struct dentry *lower_new_dir_dentry;
 	struct dentry *trap = NULL;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
 	lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
 	dget(lower_old_dentry);
@@ -755,7 +830,7 @@
 		lower_ia->ia_valid &= ~ATTR_SIZE;
 		return 0;
 	}
-	rc = ecryptfs_get_lower_file(dentry);
+	rc = ecryptfs_get_lower_file(dentry, inode);
 	if (rc)
 		return rc;
 	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
@@ -911,7 +986,7 @@
 
 		mount_crypt_stat = &ecryptfs_superblock_to_private(
 			dentry->d_sb)->mount_crypt_stat;
-		rc = ecryptfs_get_lower_file(dentry);
+		rc = ecryptfs_get_lower_file(dentry, inode);
 		if (rc) {
 			mutex_unlock(&crypt_stat->cs_mutex);
 			goto out;
@@ -1084,21 +1159,6 @@
 	return rc;
 }
 
-int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode)
-{
-	if ((ecryptfs_inode_to_lower(inode)
-	     == (struct inode *)candidate_lower_inode))
-		return 1;
-	else
-		return 0;
-}
-
-int ecryptfs_inode_set(struct inode *inode, void *lower_inode)
-{
-	ecryptfs_init_inode(inode, (struct inode *)lower_inode);
-	return 0;
-}
-
 const struct inode_operations ecryptfs_symlink_iops = {
 	.readlink = ecryptfs_readlink,
 	.follow_link = ecryptfs_follow_link,

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 89b9338..9f1bb74 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c

@@ -135,12 +135,12 @@
 	return rc;
 }
 
-int ecryptfs_get_lower_file(struct dentry *dentry)
+int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode)
 {
-	struct ecryptfs_inode_info *inode_info =
-		ecryptfs_inode_to_private(dentry->d_inode);
+	struct ecryptfs_inode_info *inode_info;
 	int count, rc = 0;
 
+	inode_info = ecryptfs_inode_to_private(inode);
 	mutex_lock(&inode_info->lower_file_mutex);
 	count = atomic_inc_return(&inode_info->lower_file_count);
 	if (WARN_ON_ONCE(count < 1))
@@ -168,75 +168,6 @@
 	}
 }
 
-static struct inode *ecryptfs_get_inode(struct inode *lower_inode,
-		       struct super_block *sb)
-{
-	struct inode *inode;
-	int rc = 0;
-
-	if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) {
-		rc = -EXDEV;
-		goto out;
-	}
-	if (!igrab(lower_inode)) {
-		rc = -ESTALE;
-		goto out;
-	}
-	inode = iget5_locked(sb, (unsigned long)lower_inode,
-			     ecryptfs_inode_test, ecryptfs_inode_set,
-			     lower_inode);
-	if (!inode) {
-		rc = -EACCES;
-		iput(lower_inode);
-		goto out;
-	}
-	if (inode->i_state & I_NEW)
-		unlock_new_inode(inode);
-	else
-		iput(lower_inode);
-	if (S_ISLNK(lower_inode->i_mode))
-		inode->i_op = &ecryptfs_symlink_iops;
-	else if (S_ISDIR(lower_inode->i_mode))
-		inode->i_op = &ecryptfs_dir_iops;
-	if (S_ISDIR(lower_inode->i_mode))
-		inode->i_fop = &ecryptfs_dir_fops;
-	if (special_file(lower_inode->i_mode))
-		init_special_inode(inode, lower_inode->i_mode,
-				   lower_inode->i_rdev);
-	fsstack_copy_attr_all(inode, lower_inode);
-	/* This size will be overwritten for real files w/ headers and
-	 * other metadata */
-	fsstack_copy_inode_size(inode, lower_inode);
-	return inode;
-out:
-	return ERR_PTR(rc);
-}
-
-/**
- * ecryptfs_interpose
- * @lower_dentry: Existing dentry in the lower filesystem
- * @dentry: ecryptfs' dentry
- * @sb: ecryptfs's super_block
- * @flags: flags to govern behavior of interpose procedure
- *
- * Interposes upper and lower dentries.
- *
- * Returns zero on success; non-zero otherwise
- */
-int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
-		       struct super_block *sb, u32 flags)
-{
-	struct inode *lower_inode = lower_dentry->d_inode;
-	struct inode *inode = ecryptfs_get_inode(lower_inode, sb);
-	if (IS_ERR(inode))
-		return PTR_ERR(inode);
-	if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
-		d_add(dentry, inode);
-	else
-		d_instantiate(dentry, inode);
-	return 0;
-}
-
 enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
        ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher,
        ecryptfs_opt_ecryptfs_key_bytes,
@@ -704,13 +635,8 @@
 		.size = sizeof(struct ecryptfs_sb_info),
 	},
 	{
-		.cache = &ecryptfs_header_cache_1,
-		.name = "ecryptfs_headers_1",
-		.size = PAGE_CACHE_SIZE,
-	},
-	{
-		.cache = &ecryptfs_header_cache_2,
-		.name = "ecryptfs_headers_2",
+		.cache = &ecryptfs_header_cache,
+		.name = "ecryptfs_headers",
 		.size = PAGE_CACHE_SIZE,
 	},
 	{

diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 245b517..dbd52d40 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c

@@ -93,22 +93,6 @@
 }
 
 /**
- * ecryptfs_init_inode
- * @inode: The ecryptfs inode
- *
- * Set up the ecryptfs inode.
- */
-void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
-{
-	ecryptfs_set_inode_lower(inode, lower_inode);
-	inode->i_ino = lower_inode->i_ino;
-	inode->i_version++;
-	inode->i_op = &ecryptfs_main_iops;
-	inode->i_fop = &ecryptfs_main_fops;
-	inode->i_mapping->a_ops = &ecryptfs_aops;
-}
-
-/**
  * ecryptfs_statfs
  * @sb: The ecryptfs super block
  * @buf: The struct kstatfs to fill in with stats

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 68b2e43..3451d23 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c

@@ -3392,7 +3392,7 @@
  * so would cause a commit on atime updates, which we don't bother doing.
  * We handle synchronous inodes at the highest possible level.
  */
-void ext3_dirty_inode(struct inode *inode)
+void ext3_dirty_inode(struct inode *inode, int flags)
 {
 	handle_t *current_handle = ext3_journal_current_handle();
 	handle_t *handle;

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a74b89c..1921392 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h

@@ -1813,7 +1813,7 @@
 extern void ext4_evict_inode(struct inode *);
 extern void ext4_clear_inode(struct inode *);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
-extern void ext4_dirty_inode(struct inode *);
+extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
 extern int ext4_can_truncate(struct inode *inode);

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 50d0e9c..a5763e3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c

@@ -5733,7 +5733,7 @@
  * so would cause a commit on atime updates, which we don't bother doing.
  * We handle synchronous inodes at the highest possible level.
  */
-void ext4_dirty_inode(struct inode *inode)
+void ext4_dirty_inode(struct inode *inode, int flags)
 {
 	handle_t *handle;
 

diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index be15437..3b222da 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c

@@ -326,8 +326,6 @@
 	struct fat_slot_info sinfo;
 	int err;
 
-	dentry_unhash(dentry);
-
 	lock_super(sb);
 	/*
 	 * Check whether the directory is not in use, then check
@@ -459,9 +457,6 @@
 	old_inode = old_dentry->d_inode;
 	new_inode = new_dentry->d_inode;
 
-	if (new_inode && S_ISDIR(new_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	err = fat_scan(old_dir, old_name, &old_sinfo);
 	if (err) {
 		err = -EIO;

diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index c61a678..20b4ea5 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c

@@ -824,8 +824,6 @@
 	struct fat_slot_info sinfo;
 	int err;
 
-	dentry_unhash(dentry);
-
 	lock_super(sb);
 
 	err = fat_dir_empty(inode);
@@ -933,9 +931,6 @@
 	int err, is_dir, update_dotdot, corrupt = 0;
 	struct super_block *sb = old_dir->i_sb;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
 	old_inode = old_dentry->d_inode;
 	new_inode = new_dentry->d_inode;

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 34591ee8..0f015a0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c

@@ -1007,9 +1007,6 @@
  * In short, make sure you hash any inodes _before_ you start marking
  * them dirty.
  *
- * This function *must* be atomic for the I_DIRTY_PAGES case -
- * set_page_dirty() is called under spinlock in several places.
- *
  * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
  * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
  * the kernel-internal blockdev inode represents the dirtying time of the
@@ -1028,7 +1025,7 @@
 	 */
 	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
 		if (sb->s_op->dirty_inode)
-			sb->s_op->dirty_inode(inode);
+			sb->s_op->dirty_inode(inode, flags);
 	}
 
 	/*

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0d0e3fa..d5016071 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c

@@ -667,8 +667,6 @@
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	dentry_unhash(entry);
-
 	req->in.h.opcode = FUSE_RMDIR;
 	req->in.h.nodeid = get_node_id(dir);
 	req->in.numargs = 1;
@@ -694,9 +692,6 @@
 	struct fuse_conn *fc = get_fuse_conn(olddir);
 	struct fuse_req *req = fuse_get_req(fc);
 
-	if (newent->d_inode && S_ISDIR(newent->d_inode->i_mode))
-		dentry_unhash(newent);
-
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 

diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 1cb70cd..b4d70b1 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c

@@ -253,9 +253,6 @@
 	struct inode *inode = dentry->d_inode;
 	int res;
 
-	if (S_ISDIR(inode->i_mode))
-		dentry_unhash(dentry);
-
 	if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
 		return -ENOTEMPTY;
 	res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
@@ -286,9 +283,6 @@
 
 	/* Unlink destination if it already exists */
 	if (new_dentry->d_inode) {
-		if (S_ISDIR(new_dentry->d_inode->i_mode))
-			dentry_unhash(new_dentry);
-
 		res = hfs_remove(new_dir, new_dentry);
 		if (res)
 			return res;

diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index b288350..4df5059 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c

@@ -370,8 +370,6 @@
 	struct inode *inode = dentry->d_inode;
 	int res;
 
-	dentry_unhash(dentry);
-
 	if (inode->i_size != 2)
 		return -ENOTEMPTY;
 
@@ -469,12 +467,10 @@
 
 	/* Unlink destination if it already exists */
 	if (new_dentry->d_inode) {
-		if (S_ISDIR(new_dentry->d_inode->i_mode)) {
-			dentry_unhash(new_dentry);
+		if (S_ISDIR(new_dentry->d_inode->i_mode))
 			res = hfsplus_rmdir(new_dir, new_dentry);
-		} else {
+		else
 			res = hfsplus_unlink(new_dir, new_dentry);
-		}
 		if (res)
 			return res;
 	}

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index e6816b9..2638c834e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c

@@ -683,8 +683,6 @@
 	char *file;
 	int err;
 
-	dentry_unhash(dentry);
-
 	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = do_rmdir(file);
@@ -738,9 +736,6 @@
 	char *from_name, *to_name;
 	int err;
 
-	if (to->d_inode && S_ISDIR(to->d_inode->i_mode))
-		dentry_unhash(to);
-
 	if ((from_name = dentry_name(from)) == NULL)
 		return -ENOMEM;
 	if ((to_name = dentry_name(to)) == NULL) {

diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index ff0ce21..acf95da 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c

@@ -439,8 +439,6 @@
 	int err;
 	int r;
 
-	dentry_unhash(dentry);
-
 	hpfs_adjust_length(name, &len);
 	hpfs_lock(dir->i_sb);
 	err = -ENOENT;
@@ -535,9 +533,6 @@
 	struct fnode *fnode;
 	int err;
 
-	if (new_inode && S_ISDIR(new_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	if ((err = hpfs_chk_name(new_name, &new_len))) return err;
 	err = 0;
 	hpfs_adjust_length(old_name, &old_len);

diff --git a/fs/inode.c b/fs/inode.c
index 990d284..0f7e88a 100644
--- a/fs/inode.c
+++ b/fs/inode.c

@@ -1,9 +1,7 @@
 /*
- * linux/fs/inode.c
- *
  * (C) 1997 Linus Torvalds
+ * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
  */
-
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/dcache.h>
@@ -27,10 +25,11 @@
 #include <linux/prefetch.h>
 #include <linux/ima.h>
 #include <linux/cred.h>
+#include <linux/buffer_head.h> /* for inode_has_buffers */
 #include "internal.h"
 
 /*
- * inode locking rules.
+ * Inode locking rules:
  *
  * inode->i_lock protects:
  *   inode->i_state, inode->i_hash, __iget()
@@ -60,54 +59,11 @@
  *   inode_hash_lock
  */
 
-/*
- * This is needed for the following functions:
- *  - inode_has_buffers
- *  - invalidate_bdev
- *
- * FIXME: remove all knowledge of the buffer layer from this file
- */
-#include <linux/buffer_head.h>
-
-/*
- * New inode.c implementation.
- *
- * This implementation has the basic premise of trying
- * to be extremely low-overhead and SMP-safe, yet be
- * simple enough to be "obviously correct".
- *
- * Famous last words.
- */
-
-/* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
-
-/* #define INODE_PARANOIA 1 */
-/* #define INODE_DEBUG 1 */
-
-/*
- * Inode lookup is no longer as critical as it used to be:
- * most of the lookups are going to be through the dcache.
- */
-#define I_HASHBITS	i_hash_shift
-#define I_HASHMASK	i_hash_mask
-
 static unsigned int i_hash_mask __read_mostly;
 static unsigned int i_hash_shift __read_mostly;
 static struct hlist_head *inode_hashtable __read_mostly;
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
 
-/*
- * Each inode can be on two separate lists. One is
- * the hash list of the inode, used for lookups. The
- * other linked list is the "type" list:
- *  "in_use" - valid inode, i_count > 0, i_nlink > 0
- *  "dirty"  - as "in_use" but also dirty
- *  "unused" - valid inode, i_count = 0
- *
- * A "dirty" list is maintained for each super block,
- * allowing for low-overhead inode sync() operations.
- */
-
 static LIST_HEAD(inode_lru);
 static DEFINE_SPINLOCK(inode_lru_lock);
 
@@ -424,8 +380,8 @@
 
 	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
 			L1_CACHE_BYTES;
-	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
-	return tmp & I_HASHMASK;
+	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
+	return tmp & i_hash_mask;
 }
 
 /**

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 9a1e86f..4bca6a2 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c

@@ -605,8 +605,6 @@
 	int ret;
 	uint32_t now = get_seconds();
 
-	dentry_unhash(dentry);
-
 	for (fd = f->dents ; fd; fd = fd->next) {
 		if (fd->ino)
 			return -ENOTEMPTY;
@@ -782,9 +780,6 @@
 	uint8_t type;
 	uint32_t now;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	/* The VFS will check for us and prevent trying to rename a
 	 * file over a directory and vice versa, but if it's a directory,
 	 * the VFS can't check whether the victim is empty. The filesystem

diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index e896e67..46ad619 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c

@@ -357,7 +357,7 @@
 	return ERR_PTR(ret);
 }
 
-void jffs2_dirty_inode(struct inode *inode)
+void jffs2_dirty_inode(struct inode *inode, int flags)
 {
 	struct iattr iattr;
 

diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 00bae7c..65c6c43 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h

@@ -172,7 +172,7 @@
 int jffs2_do_setattr (struct inode *, struct iattr *);
 struct inode *jffs2_iget(struct super_block *, unsigned long);
 void jffs2_evict_inode (struct inode *);
-void jffs2_dirty_inode(struct inode *inode);
+void jffs2_dirty_inode(struct inode *inode, int flags);
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
 			       struct jffs2_raw_inode *ri);
 int jffs2_statfs (struct dentry *, struct kstatfs *);

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index eddbb37..1096559 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c

@@ -173,7 +173,7 @@
 	dquot_drop(inode);
 }
 
-void jfs_dirty_inode(struct inode *inode)
+void jfs_dirty_inode(struct inode *inode, int flags)
 {
 	static int noisy = 5;
 

diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 155e91e..ec2fb8b 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h

@@ -28,7 +28,7 @@
 extern int jfs_commit_inode(struct inode *, int);
 extern int jfs_write_inode(struct inode *, struct writeback_control *);
 extern void jfs_evict_inode(struct inode *);
-extern void jfs_dirty_inode(struct inode *);
+extern void jfs_dirty_inode(struct inode *, int);
 extern void jfs_truncate(struct inode *);
 extern void jfs_truncate_nolock(struct inode *, loff_t);
 extern void jfs_free_zero_link(struct inode *);

diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 865df16..eaaf2b5 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c

@@ -360,8 +360,6 @@
 
 	jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
-	dentry_unhash(dentry);
-
 	/* Init inode for quota operations. */
 	dquot_initialize(dip);
 	dquot_initialize(ip);
@@ -1097,9 +1095,6 @@
 	jfs_info("jfs_rename: %s %s", old_dentry->d_name.name,
 		 new_dentry->d_name.name);
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	dquot_initialize(old_dir);
 	dquot_initialize(new_dir);
 

diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f34c9cd..9ed89d1 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c

@@ -273,8 +273,6 @@
 {
 	struct inode *inode = dentry->d_inode;
 
-	dentry_unhash(dentry);
-
 	if (!logfs_empty_dir(inode))
 		return -ENOTEMPTY;
 
@@ -624,9 +622,6 @@
 	loff_t pos;
 	int err;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	/* 1. locate source dd */
 	err = logfs_get_dd(old_dir, old_dentry, &dd, &pos);
 	if (err)

diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index f60aed8..6e6777f 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c

@@ -168,8 +168,6 @@
 	struct inode * inode = dentry->d_inode;
 	int err = -ENOTEMPTY;
 
-	dentry_unhash(dentry);
-
 	if (minix_empty_dir(inode)) {
 		err = minix_unlink(dir, dentry);
 		if (!err) {
@@ -192,9 +190,6 @@
 	struct minix_dir_entry * old_de;
 	int err = -ENOENT;
 
-	if (new_inode && S_ISDIR(new_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	old_de = minix_find_entry(old_dentry, &old_page);
 	if (!old_de)
 		goto out;

diff --git a/fs/namei.c b/fs/namei.c
index 2358b32..e2e4e8d 100644
--- a/fs/namei.c
+++ b/fs/namei.c

@@ -919,12 +919,11 @@
 }
 
 /*
- * Skip to top of mountpoint pile in rcuwalk mode.  We abort the rcu-walk if we
- * meet a managed dentry and we're not walking to "..".  True is returned to
- * continue, false to abort.
+ * Try to skip to top of mountpoint pile in rcuwalk mode.  Fail if
+ * we meet a managed dentry that would need blocking.
  */
 static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
-			       struct inode **inode, bool reverse_transit)
+			       struct inode **inode)
 {
 	for (;;) {
 		struct vfsmount *mounted;
@@ -933,8 +932,7 @@
 		 * that wants to block transit.
 		 */
 		*inode = path->dentry->d_inode;
-		if (!reverse_transit &&
-		     unlikely(managed_dentry_might_block(path->dentry)))
+		if (unlikely(managed_dentry_might_block(path->dentry)))
 			return false;
 
 		if (!d_mountpoint(path->dentry))
@@ -947,16 +945,24 @@
 		path->dentry = mounted->mnt_root;
 		nd->seq = read_seqcount_begin(&path->dentry->d_seq);
 	}
-
-	if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
-		return reverse_transit;
 	return true;
 }
 
+static void follow_mount_rcu(struct nameidata *nd)
+{
+	while (d_mountpoint(nd->path.dentry)) {
+		struct vfsmount *mounted;
+		mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
+		if (!mounted)
+			break;
+		nd->path.mnt = mounted;
+		nd->path.dentry = mounted->mnt_root;
+		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+	}
+}
+
 static int follow_dotdot_rcu(struct nameidata *nd)
 {
-	struct inode *inode = nd->inode;
-
 	set_root_rcu(nd);
 
 	while (1) {
@@ -972,7 +978,6 @@
 			seq = read_seqcount_begin(&parent->d_seq);
 			if (read_seqcount_retry(&old->d_seq, nd->seq))
 				goto failed;
-			inode = parent->d_inode;
 			nd->path.dentry = parent;
 			nd->seq = seq;
 			break;
@@ -980,10 +985,9 @@
 		if (!follow_up_rcu(&nd->path))
 			break;
 		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
-		inode = nd->path.dentry->d_inode;
 	}
-	__follow_mount_rcu(nd, &nd->path, &inode, true);
-	nd->inode = inode;
+	follow_mount_rcu(nd);
+	nd->inode = nd->path.dentry->d_inode;
 	return 0;
 
 failed:
@@ -1157,8 +1161,11 @@
 		}
 		path->mnt = mnt;
 		path->dentry = dentry;
-		if (likely(__follow_mount_rcu(nd, path, inode, false)))
-			return 0;
+		if (unlikely(!__follow_mount_rcu(nd, path, inode)))
+			goto unlazy;
+		if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
+			goto unlazy;
+		return 0;
 unlazy:
 		if (unlazy_walk(nd, dentry))
 			return -ECHILD;
@@ -2572,6 +2579,7 @@
 	if (error)
 		goto out;
 
+	shrink_dcache_parent(dentry);
 	error = dir->i_op->rmdir(dir, dentry);
 	if (error)
 		goto out;
@@ -2986,6 +2994,8 @@
 	if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
 		goto out;
 
+	if (target)
+		shrink_dcache_parent(new_dentry);
 	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	if (error)
 		goto out;

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index e3e646b..9c51f62 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c

@@ -1033,8 +1033,11 @@
 	DPRINTK("ncp_rmdir: removing %s/%s\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
+	/*
+	 * fail with EBUSY if there are still references to this
+	 * directory.
+	 */
 	dentry_unhash(dentry);
-
 	error = -EBUSY;
 	if (!d_unhashed(dentry))
 		goto out;
@@ -1141,8 +1144,16 @@
 		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		new_dentry->d_parent->d_name.name, new_dentry->d_name.name);
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
+	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) {
+		/*
+		 * fail with EBUSY if there are still references to this
+		 * directory.
+		 */
 		dentry_unhash(new_dentry);
+		error = -EBUSY;
+		if (!d_unhashed(new_dentry))
+			goto out;
+	}
 
 	ncp_age_dentry(server, old_dentry);
 	ncp_age_dentry(server, new_dentry);

diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index ba30665..8151554 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig

@@ -87,6 +87,16 @@
 config PNFS_FILE_LAYOUT
 	tristate
 
+config PNFS_OBJLAYOUT
+	tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
+	depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
+	help
+	  Say M here if you want your pNFS client to support the Objects Layout Driver.
+	  Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
+	  upper level driver (SCSI_OSD_ULD).
+
+	  If unsure, say N.
+
 config ROOT_NFS
 	bool "Root file system on NFS"
 	depends on NFS_FS=y && IP_PNP

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 4776ff9..6a34f7d 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile

@@ -15,9 +15,11 @@
 			   delegation.o idmap.o \
 			   callback.o callback_xdr.o callback_proc.o \
 			   nfs4namespace.o
-nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o
+nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o
 nfs-$(CONFIG_SYSCTL) += sysctl.o
 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
 
 obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
 nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
+
+obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 46d93ce..b257383 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h

@@ -167,6 +167,23 @@
 
 extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
 extern void nfs4_cb_take_slot(struct nfs_client *clp);
+
+struct cb_devicenotifyitem {
+	uint32_t		cbd_notify_type;
+	uint32_t		cbd_layout_type;
+	struct nfs4_deviceid	cbd_dev_id;
+	uint32_t		cbd_immediate;
+};
+
+struct cb_devicenotifyargs {
+	int				 ndevs;
+	struct cb_devicenotifyitem	 *devs;
+};
+
+extern __be32 nfs4_callback_devicenotify(
+	struct cb_devicenotifyargs *args,
+	void *dummy, struct cb_process_state *cps);
+
 #endif /* CONFIG_NFS_V4_1 */
 extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
 extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2f41dcce..d4d1954 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c

@@ -139,7 +139,7 @@
 	spin_lock(&ino->i_lock);
 	if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
 	    mark_matching_lsegs_invalid(lo, &free_me_list,
-					args->cbl_range.iomode))
+					&args->cbl_range))
 		rv = NFS4ERR_DELAY;
 	else
 		rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -184,7 +184,7 @@
 		ino = lo->plh_inode;
 		spin_lock(&ino->i_lock);
 		set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
-		if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
+		if (mark_matching_lsegs_invalid(lo, &free_me_list, &range))
 			rv = NFS4ERR_DELAY;
 		list_del_init(&lo->plh_bulk_recall);
 		spin_unlock(&ino->i_lock);
@@ -241,6 +241,53 @@
 	do_callback_layoutrecall(clp, &args);
 }
 
+__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
+				  void *dummy, struct cb_process_state *cps)
+{
+	int i;
+	__be32 res = 0;
+	struct nfs_client *clp = cps->clp;
+	struct nfs_server *server = NULL;
+
+	dprintk("%s: -->\n", __func__);
+
+	if (!clp) {
+		res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+		goto out;
+	}
+
+	for (i = 0; i < args->ndevs; i++) {
+		struct cb_devicenotifyitem *dev = &args->devs[i];
+
+		if (!server ||
+		    server->pnfs_curr_ld->id != dev->cbd_layout_type) {
+			rcu_read_lock();
+			list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+				if (server->pnfs_curr_ld &&
+				    server->pnfs_curr_ld->id == dev->cbd_layout_type) {
+					rcu_read_unlock();
+					goto found;
+				}
+			rcu_read_unlock();
+			dprintk("%s: layout type %u not found\n",
+				__func__, dev->cbd_layout_type);
+			continue;
+		}
+
+	found:
+		if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE)
+			dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, "
+				"deleting instead\n", __func__);
+		nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id);
+	}
+
+out:
+	kfree(args->devs);
+	dprintk("%s: exit with status = %u\n",
+		__func__, be32_to_cpu(res));
+	return res;
+}
+
 int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
 {
 	if (delegation == NULL)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 00ecf62..c6c86a7 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c

@@ -25,6 +25,7 @@
 
 #if defined(CONFIG_NFS_V4_1)
 #define CB_OP_LAYOUTRECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+#define CB_OP_DEVICENOTIFY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
 #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
 					4 + 1 + 3)
 #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
@@ -284,6 +285,93 @@
 	return status;
 }
 
+static
+__be32 decode_devicenotify_args(struct svc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct cb_devicenotifyargs *args)
+{
+	__be32 *p;
+	__be32 status = 0;
+	u32 tmp;
+	int n, i;
+	args->ndevs = 0;
+
+	/* Num of device notifications */
+	p = read_buf(xdr, sizeof(uint32_t));
+	if (unlikely(p == NULL)) {
+		status = htonl(NFS4ERR_BADXDR);
+		goto out;
+	}
+	n = ntohl(*p++);
+	if (n <= 0)
+		goto out;
+
+	args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL);
+	if (!args->devs) {
+		status = htonl(NFS4ERR_DELAY);
+		goto out;
+	}
+
+	/* Decode each dev notification */
+	for (i = 0; i < n; i++) {
+		struct cb_devicenotifyitem *dev = &args->devs[i];
+
+		p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE);
+		if (unlikely(p == NULL)) {
+			status = htonl(NFS4ERR_BADXDR);
+			goto err;
+		}
+
+		tmp = ntohl(*p++);	/* bitmap size */
+		if (tmp != 1) {
+			status = htonl(NFS4ERR_INVAL);
+			goto err;
+		}
+		dev->cbd_notify_type = ntohl(*p++);
+		if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
+		    dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) {
+			status = htonl(NFS4ERR_INVAL);
+			goto err;
+		}
+
+		tmp = ntohl(*p++);	/* opaque size */
+		if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) &&
+		     (tmp != NFS4_DEVICEID4_SIZE + 8)) ||
+		    ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) &&
+		     (tmp != NFS4_DEVICEID4_SIZE + 4))) {
+			status = htonl(NFS4ERR_INVAL);
+			goto err;
+		}
+		dev->cbd_layout_type = ntohl(*p++);
+		memcpy(dev->cbd_dev_id.data, p, NFS4_DEVICEID4_SIZE);
+		p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
+
+		if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
+			p = read_buf(xdr, sizeof(uint32_t));
+			if (unlikely(p == NULL)) {
+				status = htonl(NFS4ERR_BADXDR);
+				goto err;
+			}
+			dev->cbd_immediate = ntohl(*p++);
+		} else {
+			dev->cbd_immediate = 0;
+		}
+
+		args->ndevs++;
+
+		dprintk("%s: type %d layout 0x%x immediate %d\n",
+			__func__, dev->cbd_notify_type, dev->cbd_layout_type,
+			dev->cbd_immediate);
+	}
+out:
+	dprintk("%s: status %d ndevs %d\n",
+		__func__, ntohl(status), args->ndevs);
+	return status;
+err:
+	kfree(args->devs);
+	goto out;
+}
+
 static __be32 decode_sessionid(struct xdr_stream *xdr,
 				 struct nfs4_sessionid *sid)
 {
@@ -639,10 +727,10 @@
 	case OP_CB_RECALL_ANY:
 	case OP_CB_RECALL_SLOT:
 	case OP_CB_LAYOUTRECALL:
+	case OP_CB_NOTIFY_DEVICEID:
 		*op = &callback_ops[op_nr];
 		break;
 
-	case OP_CB_NOTIFY_DEVICEID:
 	case OP_CB_NOTIFY:
 	case OP_CB_PUSH_DELEG:
 	case OP_CB_RECALLABLE_OBJ_AVAIL:
@@ -849,6 +937,12 @@
 			(callback_decode_arg_t)decode_layoutrecall_args,
 		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
 	},
+	[OP_CB_NOTIFY_DEVICEID] = {
+		.process_op = (callback_process_op_t)nfs4_callback_devicenotify,
+		.decode_args =
+			(callback_decode_arg_t)decode_devicenotify_args,
+		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
+	},
 	[OP_CB_SEQUENCE] = {
 		.process_op = (callback_process_op_t)nfs4_callback_sequence,
 		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 139be96..b3dc2b8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c

@@ -290,6 +290,8 @@
 	if (clp->cl_machine_cred != NULL)
 		put_rpccred(clp->cl_machine_cred);
 
+	nfs4_deviceid_purge_client(clp);
+
 	kfree(clp->cl_hostname);
 	kfree(clp);
 

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index bbbc6bf..dd25c2a 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c

@@ -21,25 +21,13 @@
 #include "delegation.h"
 #include "internal.h"
 
-static void nfs_do_free_delegation(struct nfs_delegation *delegation)
-{
-	kfree(delegation);
-}
-
-static void nfs_free_delegation_callback(struct rcu_head *head)
-{
-	struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
-
-	nfs_do_free_delegation(delegation);
-}
-
 static void nfs_free_delegation(struct nfs_delegation *delegation)
 {
 	if (delegation->cred) {
 		put_rpccred(delegation->cred);
 		delegation->cred = NULL;
 	}
-	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
+	kfree_rcu(delegation, rcu);
 }
 
 /**

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 424e477..ededdbd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c

@@ -512,12 +512,7 @@
 				struct page **xdr_pages, struct page *page, unsigned int buflen)
 {
 	struct xdr_stream stream;
-	struct xdr_buf buf = {
-		.pages = xdr_pages,
-		.page_len = buflen,
-		.buflen = buflen,
-		.len = buflen,
-	};
+	struct xdr_buf buf;
 	struct page *scratch;
 	struct nfs_cache_array *array;
 	unsigned int count = 0;
@@ -527,7 +522,7 @@
 	if (scratch == NULL)
 		return -ENOMEM;
 
-	xdr_init_decode(&stream, &buf, NULL);
+	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	do {

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 57bb31a..144f2a3 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c

@@ -1298,8 +1298,12 @@
 				i_size_write(inode, new_isize);
 				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 			}
-			dprintk("NFS: isize change on server for file %s/%ld\n",
-					inode->i_sb->s_id, inode->i_ino);
+			dprintk("NFS: isize change on server for file %s/%ld "
+					"(%Ld to %Ld)\n",
+					inode->i_sb->s_id,
+					inode->i_ino,
+					(long long)cur_isize,
+					(long long)new_isize);
 		}
 	} else
 		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
@@ -1424,9 +1428,10 @@
  */
 void nfs4_evict_inode(struct inode *inode)
 {
-	pnfs_destroy_layout(NFS_I(inode));
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
+	pnfs_return_layout(inode);
+	pnfs_destroy_layout(NFS_I(inode));
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation_noreclaim(inode);
 	/* First call standard NFS clear_inode() code */

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2df6ca7..b9056cb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h

@@ -310,6 +310,7 @@
 #endif
 
 /* nfs4proc.c */
+extern void __nfs4_read_done_cb(struct nfs_read_data *);
 extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
 extern int nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index be79dc9..4269088 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c

@@ -421,6 +421,7 @@
 			struct nfs4_deviceid *id,
 			gfp_t gfp_flags)
 {
+	struct nfs4_deviceid_node *d;
 	struct nfs4_file_layout_dsaddr *dsaddr;
 	int status = -EINVAL;
 	struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
@@ -428,7 +429,7 @@
 	dprintk("--> %s\n", __func__);
 
 	if (fl->pattern_offset > lgr->range.offset) {
-		dprintk("%s pattern_offset %lld to large\n",
+		dprintk("%s pattern_offset %lld too large\n",
 				__func__, fl->pattern_offset);
 		goto out;
 	}
@@ -440,12 +441,14 @@
 	}
 
 	/* find and reference the deviceid */
-	dsaddr = nfs4_fl_find_get_deviceid(id);
-	if (dsaddr == NULL) {
+	d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld,
+				   NFS_SERVER(lo->plh_inode)->nfs_client, id);
+	if (d == NULL) {
 		dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
 		if (dsaddr == NULL)
 			goto out;
-	}
+	} else
+		dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
 	fl->dsaddr = dsaddr;
 
 	if (fl->first_stripe_index < 0 ||
@@ -507,12 +510,7 @@
 			 gfp_t gfp_flags)
 {
 	struct xdr_stream stream;
-	struct xdr_buf buf = {
-		.pages =  lgr->layoutp->pages,
-		.page_len =  lgr->layoutp->len,
-		.buflen =  lgr->layoutp->len,
-		.len = lgr->layoutp->len,
-	};
+	struct xdr_buf buf;
 	struct page *scratch;
 	__be32 *p;
 	uint32_t nfl_util;
@@ -524,7 +522,7 @@
 	if (!scratch)
 		return -ENOMEM;
 
-	xdr_init_decode(&stream, &buf, NULL);
+	xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
@@ -535,7 +533,7 @@
 
 	memcpy(id, p, sizeof(*id));
 	p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
-	print_deviceid(id);
+	nfs4_print_deviceid(id);
 
 	nfl_util = be32_to_cpup(p++);
 	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
@@ -653,16 +651,19 @@
 /*
  * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
  *
- * return 1 :  coalesce page
- * return 0 :  don't coalesce page
+ * return true  : coalesce page
+ * return false : don't coalesce page
  */
-int
+bool
 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		   struct nfs_page *req)
 {
 	u64 p_stripe, r_stripe;
 	u32 stripe_unit;
 
+	if (!pnfs_generic_pg_test(pgio, prev, req))
+		return 0;
+
 	if (!pgio->pg_lseg)
 		return 1;
 	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
@@ -860,6 +861,12 @@
 	return -ENOMEM;
 }
 
+static void
+filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
+{
+	nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
+}
+
 static struct pnfs_layoutdriver_type filelayout_type = {
 	.id			= LAYOUT_NFSV4_1_FILES,
 	.name			= "LAYOUT_NFSV4_1_FILES",
@@ -872,6 +879,7 @@
 	.commit_pagelist	= filelayout_commit_pagelist,
 	.read_pagelist		= filelayout_read_pagelist,
 	.write_pagelist		= filelayout_write_pagelist,
+	.free_deviceid_node	= filelayout_free_deveiceid_node,
 };
 
 static int __init nfs4filelayout_init(void)

diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 2b461d7..cebe01e 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h

@@ -59,9 +59,7 @@
 #define NFS4_DEVICE_ID_NEG_ENTRY	0x00000001
 
 struct nfs4_file_layout_dsaddr {
-	struct hlist_node		node;
-	struct nfs4_deviceid		deviceid;
-	atomic_t			ref;
+	struct nfs4_deviceid_node	id_node;
 	unsigned long			flags;
 	u32				stripe_count;
 	u8				*stripe_indices;
@@ -95,14 +93,12 @@
 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
 
 extern void print_ds(struct nfs4_pnfs_ds *ds);
-extern void print_deviceid(struct nfs4_deviceid *dev_id);
 u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
 u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
 struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
 					u32 ds_idx);
-extern struct nfs4_file_layout_dsaddr *
-nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
 extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
+extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 struct nfs4_file_layout_dsaddr *
 get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
 

diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index db07c7a..3b7bf13 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c

@@ -37,30 +37,6 @@
 #define NFSDBG_FACILITY		NFSDBG_PNFS_LD
 
 /*
- * Device ID RCU cache. A device ID is unique per client ID and layout type.
- */
-#define NFS4_FL_DEVICE_ID_HASH_BITS	5
-#define NFS4_FL_DEVICE_ID_HASH_SIZE	(1 << NFS4_FL_DEVICE_ID_HASH_BITS)
-#define NFS4_FL_DEVICE_ID_HASH_MASK	(NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
-
-static inline u32
-nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
-{
-	unsigned char *cptr = (unsigned char *)id->data;
-	unsigned int nbytes = NFS4_DEVICEID4_SIZE;
-	u32 x = 0;
-
-	while (nbytes--) {
-		x *= 37;
-		x += *cptr++;
-	}
-	return x & NFS4_FL_DEVICE_ID_HASH_MASK;
-}
-
-static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
-static DEFINE_SPINLOCK(filelayout_deviceid_lock);
-
-/*
  * Data server cache
  *
  * Data servers can be mapped to different device ids.
@@ -89,27 +65,6 @@
 		ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
 }
 
-void
-print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
-{
-	int i;
-
-	ifdebug(FACILITY) {
-		printk("%s dsaddr->ds_num %d\n", __func__,
-		       dsaddr->ds_num);
-		for (i = 0; i < dsaddr->ds_num; i++)
-			print_ds(dsaddr->ds_list[i]);
-	}
-}
-
-void print_deviceid(struct nfs4_deviceid *id)
-{
-	u32 *p = (u32 *)id;
-
-	dprintk("%s: device id= [%x%x%x%x]\n", __func__,
-		p[0], p[1], p[2], p[3]);
-}
-
 /* nfs4_ds_cache_lock is held */
 static struct nfs4_pnfs_ds *
 _data_server_lookup_locked(u32 ip_addr, u32 port)
@@ -201,13 +156,13 @@
 	kfree(ds);
 }
 
-static void
+void
 nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 {
 	struct nfs4_pnfs_ds *ds;
 	int i;
 
-	print_deviceid(&dsaddr->deviceid);
+	nfs4_print_deviceid(&dsaddr->id_node.deviceid);
 
 	for (i = 0; i < dsaddr->ds_num; i++) {
 		ds = dsaddr->ds_list[i];
@@ -353,12 +308,7 @@
 	u8 max_stripe_index;
 	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
 	struct xdr_stream stream;
-	struct xdr_buf buf = {
-		.pages = pdev->pages,
-		.page_len = pdev->pglen,
-		.buflen = pdev->pglen,
-		.len = pdev->pglen,
-	};
+	struct xdr_buf buf;
 	struct page *scratch;
 
 	/* set up xdr stream */
@@ -366,7 +316,7 @@
 	if (!scratch)
 		goto out_err;
 
-	xdr_init_decode(&stream, &buf, NULL);
+	xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	/* Get the stripe count (number of stripe index) */
@@ -431,8 +381,10 @@
 	dsaddr->stripe_indices = stripe_indices;
 	stripe_indices = NULL;
 	dsaddr->ds_num = num;
-
-	memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
+	nfs4_init_deviceid_node(&dsaddr->id_node,
+				NFS_SERVER(ino)->pnfs_curr_ld,
+				NFS_SERVER(ino)->nfs_client,
+				&pdev->dev_id);
 
 	for (i = 0; i < dsaddr->ds_num; i++) {
 		int j;
@@ -505,8 +457,8 @@
 static struct nfs4_file_layout_dsaddr *
 decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
 {
-	struct nfs4_file_layout_dsaddr *d, *new;
-	long hash;
+	struct nfs4_deviceid_node *d;
+	struct nfs4_file_layout_dsaddr *n, *new;
 
 	new = decode_device(inode, dev, gfp_flags);
 	if (!new) {
@@ -515,20 +467,13 @@
 		return NULL;
 	}
 
-	spin_lock(&filelayout_deviceid_lock);
-	d = nfs4_fl_find_get_deviceid(&new->deviceid);
-	if (d) {
-		spin_unlock(&filelayout_deviceid_lock);
+	d = nfs4_insert_deviceid_node(&new->id_node);
+	n = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+	if (n != new) {
 		nfs4_fl_free_deviceid(new);
-		return d;
+		return n;
 	}
 
-	INIT_HLIST_NODE(&new->node);
-	atomic_set(&new->ref, 1);
-	hash = nfs4_fl_deviceid_hash(&new->deviceid);
-	hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
-	spin_unlock(&filelayout_deviceid_lock);
-
 	return new;
 }
 
@@ -600,35 +545,7 @@
 void
 nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 {
-	if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
-		hlist_del_rcu(&dsaddr->node);
-		spin_unlock(&filelayout_deviceid_lock);
-
-		synchronize_rcu();
-		nfs4_fl_free_deviceid(dsaddr);
-	}
-}
-
-struct nfs4_file_layout_dsaddr *
-nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
-{
-	struct nfs4_file_layout_dsaddr *d;
-	struct hlist_node *n;
-	long hash = nfs4_fl_deviceid_hash(id);
-
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
-		if (!memcmp(&d->deviceid, id, sizeof(*id))) {
-			if (!atomic_inc_not_zero(&d->ref))
-				goto fail;
-			rcu_read_unlock();
-			return d;
-		}
-	}
-fail:
-	rcu_read_unlock();
-	return NULL;
+	nfs4_put_deviceid_node(&dsaddr->id_node);
 }
 
 /*
@@ -676,15 +593,15 @@
 filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
 			       int err, u32 ds_addr)
 {
-	u32 *p = (u32 *)&dsaddr->deviceid;
+	u32 *p = (u32 *)&dsaddr->id_node.deviceid;
 
 	printk(KERN_ERR "NFS: data server %x connection error %d."
 		" Deviceid [%x%x%x%x] marked out of use.\n",
 		ds_addr, err, p[0], p[1], p[2], p[3]);
 
-	spin_lock(&filelayout_deviceid_lock);
+	spin_lock(&nfs4_ds_cache_lock);
 	dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
-	spin_unlock(&filelayout_deviceid_lock);
+	spin_unlock(&nfs4_ds_cache_lock);
 }
 
 struct nfs4_pnfs_ds *

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf1b339..d2c4b59 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c

@@ -267,9 +267,11 @@
 				break;
 			nfs4_schedule_stateid_recovery(server, state);
 			goto wait_on_recovery;
+		case -NFS4ERR_EXPIRED:
+			if (state != NULL)
+				nfs4_schedule_stateid_recovery(server, state);
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_STALE_CLIENTID:
-		case -NFS4ERR_EXPIRED:
 			nfs4_schedule_lease_recovery(clp);
 			goto wait_on_recovery;
 #if defined(CONFIG_NFS_V4_1)
@@ -2361,6 +2363,9 @@
 	struct nfs4_state *state = NULL;
 	int status;
 
+	if (pnfs_ld_layoutret_on_setattr(inode))
+		pnfs_return_layout(inode);
+
 	nfs_fattr_init(fattr);
 	
 	/* Search for an existing open(O_WRITE) file */
@@ -3175,6 +3180,11 @@
 	return err;
 }
 
+void __nfs4_read_done_cb(struct nfs_read_data *data)
+{
+	nfs_invalidate_atime(data->inode);
+}
+
 static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
 {
 	struct nfs_server *server = NFS_SERVER(data->inode);
@@ -3184,7 +3194,7 @@
 		return -EAGAIN;
 	}
 
-	nfs_invalidate_atime(data->inode);
+	__nfs4_read_done_cb(data);
 	if (task->tk_status > 0)
 		renew_lease(server, data->timestamp);
 	return 0;
@@ -3198,7 +3208,8 @@
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return -EAGAIN;
 
-	return data->read_done_cb(task, data);
+	return data->read_done_cb ? data->read_done_cb(task, data) :
+				    nfs4_read_done_cb(task, data);
 }
 
 static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
@@ -3243,7 +3254,8 @@
 {
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return -EAGAIN;
-	return data->write_done_cb(task, data);
+	return data->write_done_cb ? data->write_done_cb(task, data) :
+		nfs4_write_done_cb(task, data);
 }
 
 /* Reset the the nfs_write_data to send the write to the MDS. */
@@ -3670,9 +3682,11 @@
 				break;
 			nfs4_schedule_stateid_recovery(server, state);
 			goto wait_on_recovery;
+		case -NFS4ERR_EXPIRED:
+			if (state != NULL)
+				nfs4_schedule_stateid_recovery(server, state);
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_STALE_CLIENTID:
-		case -NFS4ERR_EXPIRED:
 			nfs4_schedule_lease_recovery(clp);
 			goto wait_on_recovery;
 #if defined(CONFIG_NFS_V4_1)
@@ -4543,6 +4557,7 @@
 			case -ESTALE:
 				goto out;
 			case -NFS4ERR_EXPIRED:
+				nfs4_schedule_stateid_recovery(server, state);
 			case -NFS4ERR_STALE_CLIENTID:
 			case -NFS4ERR_STALE_STATEID:
 				nfs4_schedule_lease_recovery(server->nfs_client);
@@ -5666,6 +5681,88 @@
 	return status;
 }
 
+static void
+nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutreturn *lrp = calldata;
+
+	dprintk("--> %s\n", __func__);
+	if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
+				&lrp->res.seq_res, 0, task))
+		return;
+	rpc_call_start(task);
+}
+
+static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutreturn *lrp = calldata;
+	struct nfs_server *server;
+
+	dprintk("--> %s\n", __func__);
+
+	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
+		return;
+
+	server = NFS_SERVER(lrp->args.inode);
+	if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+		nfs_restart_rpc(task, lrp->clp);
+		return;
+	}
+	if (task->tk_status == 0) {
+		struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+
+		if (lrp->res.lrs_present) {
+			spin_lock(&lo->plh_inode->i_lock);
+			pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+			spin_unlock(&lo->plh_inode->i_lock);
+		} else
+			BUG_ON(!list_empty(&lo->plh_segs));
+	}
+	dprintk("<-- %s\n", __func__);
+}
+
+static void nfs4_layoutreturn_release(void *calldata)
+{
+	struct nfs4_layoutreturn *lrp = calldata;
+
+	dprintk("--> %s\n", __func__);
+	put_layout_hdr(NFS_I(lrp->args.inode)->layout);
+	kfree(calldata);
+	dprintk("<-- %s\n", __func__);
+}
+
+static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
+	.rpc_call_prepare = nfs4_layoutreturn_prepare,
+	.rpc_call_done = nfs4_layoutreturn_done,
+	.rpc_release = nfs4_layoutreturn_release,
+};
+
+int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
+{
+	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
+		.rpc_argp = &lrp->args,
+		.rpc_resp = &lrp->res,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = lrp->clp->cl_rpcclient,
+		.rpc_message = &msg,
+		.callback_ops = &nfs4_layoutreturn_call_ops,
+		.callback_data = lrp,
+	};
+	int status;
+
+	dprintk("--> %s\n", __func__);
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	status = task->tk_status;
+	dprintk("<-- %s status=%d\n", __func__, status);
+	rpc_put_task(task);
+	return status;
+}
+
 static int
 _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
 {

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 036f5ad..e97dd21 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c

@@ -1466,7 +1466,10 @@
 #ifdef CONFIG_NFS_V4_1
 void nfs4_schedule_session_recovery(struct nfs4_session *session)
 {
-	nfs4_schedule_lease_recovery(session->clp);
+	struct nfs_client *clp = session->clp;
+
+	set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+	nfs4_schedule_lease_recovery(clp);
 }
 EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
 
@@ -1549,6 +1552,7 @@
 		status = nfs4_recovery_handle_error(clp, status);
 		goto out;
 	}
+	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
 	/* create_session negotiated new slot table */
 	clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
 

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c3ccd2c..d869a5e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c

@@ -338,7 +338,11 @@
 				1 /* layoutupdate4 layout type */ + \
 				1 /* NULL filelayout layoutupdate4 payload */)
 #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
-
+#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
+				encode_stateid_maxsz + \
+				1 /* FIXME: opaque lrf_body always empty at the moment */)
+#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
+				1 + decode_stateid_maxsz)
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz	0
 #define decode_sequence_maxsz	0
@@ -760,7 +764,14 @@
 				decode_putfh_maxsz + \
 				decode_layoutcommit_maxsz + \
 				decode_getattr_maxsz)
-
+#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
+				encode_putfh_maxsz + \
+				encode_layoutreturn_maxsz)
+#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
+				decode_putfh_maxsz + \
+				decode_layoutreturn_maxsz)
 
 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
 				      compound_encode_hdr_maxsz +
@@ -1864,6 +1875,7 @@
 
 static int
 encode_layoutcommit(struct xdr_stream *xdr,
+		    struct inode *inode,
 		    const struct nfs4_layoutcommit_args *args,
 		    struct compound_hdr *hdr)
 {
@@ -1872,7 +1884,7 @@
 	dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
 		NFS_SERVER(args->inode)->pnfs_curr_ld->id);
 
-	p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE);
+	p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
 	*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
 	/* Only whole file layouts */
 	p = xdr_encode_hyper(p, 0); /* offset */
@@ -1883,12 +1895,49 @@
 	p = xdr_encode_hyper(p, args->lastbytewritten);
 	*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
 	*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
-	*p++ = cpu_to_be32(0); /* no file layout payload */
+
+	if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit)
+		NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
+			NFS_I(inode)->layout, xdr, args);
+	else {
+		p = reserve_space(xdr, 4);
+		*p = cpu_to_be32(0); /* no layout-type payload */
+	}
 
 	hdr->nops++;
 	hdr->replen += decode_layoutcommit_maxsz;
 	return 0;
 }
+
+static void
+encode_layoutreturn(struct xdr_stream *xdr,
+		    const struct nfs4_layoutreturn_args *args,
+		    struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	p = reserve_space(xdr, 20);
+	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
+	*p++ = cpu_to_be32(0);		/* reclaim. always 0 for now */
+	*p++ = cpu_to_be32(args->layout_type);
+	*p++ = cpu_to_be32(IOMODE_ANY);
+	*p = cpu_to_be32(RETURN_FILE);
+	p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
+	p = xdr_encode_hyper(p, 0);
+	p = xdr_encode_hyper(p, NFS4_MAX_UINT64);
+	spin_lock(&args->inode->i_lock);
+	xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
+	spin_unlock(&args->inode->i_lock);
+	if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) {
+		NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn(
+			NFS_I(args->inode)->layout, xdr, args);
+	} else {
+		p = reserve_space(xdr, 4);
+		*p = cpu_to_be32(0);
+	}
+	hdr->nops++;
+	hdr->replen += decode_layoutreturn_maxsz;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -2706,9 +2755,30 @@
 /*
  *  Encode LAYOUTCOMMIT request
  */
-static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
-				     struct xdr_stream *xdr,
-				     struct nfs4_layoutcommit_args *args)
+static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs4_layoutcommit_args *args)
+{
+	struct nfs4_layoutcommit_data *data =
+		container_of(args, struct nfs4_layoutcommit_data, args);
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+	encode_layoutcommit(xdr, data->args.inode, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_nops(&hdr);
+}
+
+/*
+ * Encode LAYOUTRETURN request
+ */
+static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs4_layoutreturn_args *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2717,10 +2787,8 @@
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
-	encode_layoutcommit(xdr, args, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_layoutreturn(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -5203,6 +5271,27 @@
 	return -EIO;
 }
 
+static int decode_layoutreturn(struct xdr_stream *xdr,
+			       struct nfs4_layoutreturn_res *res)
+{
+	__be32 *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
+	if (status)
+		return status;
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	res->lrs_present = be32_to_cpup(p);
+	if (res->lrs_present)
+		status = decode_stateid(xdr, &res->stateid);
+	return status;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 static int decode_layoutcommit(struct xdr_stream *xdr,
 			       struct rpc_rqst *req,
 			       struct nfs4_layoutcommit_res *res)
@@ -6320,6 +6409,30 @@
 }
 
 /*
+ * Decode LAYOUTRETURN response
+ */
+static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
+				     struct xdr_stream *xdr,
+				     struct nfs4_layoutreturn_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_layoutreturn(xdr, res);
+out:
+	return status;
+}
+
+/*
  * Decode LAYOUTCOMMIT response
  */
 static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
@@ -6547,6 +6660,7 @@
 	PROC(GETDEVICEINFO,	enc_getdeviceinfo,	dec_getdeviceinfo),
 	PROC(LAYOUTGET,		enc_layoutget,		dec_layoutget),
 	PROC(LAYOUTCOMMIT,	enc_layoutcommit,	dec_layoutcommit),
+	PROC(LAYOUTRETURN,	enc_layoutreturn,	dec_layoutreturn),
 #endif /* CONFIG_NFS_V4_1 */
 };
 

diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index c541093..c4744e1 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c

@@ -87,7 +87,7 @@
 #define NFS_ROOT		"/tftpboot/%s"
 
 /* Default NFSROOT mount options. */
-#define NFS_DEF_OPTIONS		"udp"
+#define NFS_DEF_OPTIONS		"vers=2,udp,rsize=4096,wsize=4096"
 
 /* Parameters passed from the kernel command line */
 static char nfs_root_parms[256] __initdata = "";

diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild
new file mode 100644
index 0000000..ed30ea0
--- /dev/null
+++ b/fs/nfs/objlayout/Kbuild

@@ -0,0 +1,5 @@
+#
+# Makefile for the pNFS Objects Layout Driver kernel module
+#
+objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o
+obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
new file mode 100644
index 0000000..9cf208d
--- /dev/null
+++ b/fs/nfs/objlayout/objio_osd.c

@@ -0,0 +1,1057 @@
+/*
+ *  pNFS Objects layout implementation over open-osd initiator library
+ *
+ *  Copyright (C) 2009 Panasas Inc. [year of first publication]
+ *  All rights reserved.
+ *
+ *  Benny Halevy <bhalevy@panasas.com>
+ *  Boaz Harrosh <bharrosh@panasas.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  See the file COPYING included with this distribution for more details.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the Panasas company nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <scsi/osd_initiator.h>
+
+#include "objlayout.h"
+
+#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
+
+#define _LLU(x) ((unsigned long long)x)
+
+enum { BIO_MAX_PAGES_KMALLOC =
+		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
+};
+
+struct objio_dev_ent {
+	struct nfs4_deviceid_node id_node;
+	struct osd_dev *od;
+};
+
+static void
+objio_free_deviceid_node(struct nfs4_deviceid_node *d)
+{
+	struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
+
+	dprintk("%s: free od=%p\n", __func__, de->od);
+	osduld_put_device(de->od);
+	kfree(de);
+}
+
+static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss,
+	const struct nfs4_deviceid *d_id)
+{
+	struct nfs4_deviceid_node *d;
+	struct objio_dev_ent *de;
+
+	d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id);
+	if (!d)
+		return NULL;
+
+	de = container_of(d, struct objio_dev_ent, id_node);
+	return de;
+}
+
+static struct objio_dev_ent *
+_dev_list_add(const struct nfs_server *nfss,
+	const struct nfs4_deviceid *d_id, struct osd_dev *od,
+	gfp_t gfp_flags)
+{
+	struct nfs4_deviceid_node *d;
+	struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags);
+	struct objio_dev_ent *n;
+
+	if (!de) {
+		dprintk("%s: -ENOMEM od=%p\n", __func__, od);
+		return NULL;
+	}
+
+	dprintk("%s: Adding od=%p\n", __func__, od);
+	nfs4_init_deviceid_node(&de->id_node,
+				nfss->pnfs_curr_ld,
+				nfss->nfs_client,
+				d_id);
+	de->od = od;
+
+	d = nfs4_insert_deviceid_node(&de->id_node);
+	n = container_of(d, struct objio_dev_ent, id_node);
+	if (n != de) {
+		dprintk("%s: Race with other n->od=%p\n", __func__, n->od);
+		objio_free_deviceid_node(&de->id_node);
+		de = n;
+	}
+
+	atomic_inc(&de->id_node.ref);
+	return de;
+}
+
+struct caps_buffers {
+	u8 caps_key[OSD_CRYPTO_KEYID_SIZE];
+	u8 creds[OSD_CAP_LEN];
+};
+
+struct objio_segment {
+	struct pnfs_layout_segment lseg;
+
+	struct pnfs_osd_object_cred *comps;
+
+	unsigned mirrors_p1;
+	unsigned stripe_unit;
+	unsigned group_width;	/* Data stripe_units without integrity comps */
+	u64 group_depth;
+	unsigned group_count;
+
+	unsigned max_io_size;
+
+	unsigned comps_index;
+	unsigned num_comps;
+	/* variable length */
+	struct objio_dev_ent *ods[];
+};
+
+static inline struct objio_segment *
+OBJIO_LSEG(struct pnfs_layout_segment *lseg)
+{
+	return container_of(lseg, struct objio_segment, lseg);
+}
+
+struct objio_state;
+typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
+
+struct objio_state {
+	/* Generic layer */
+	struct objlayout_io_state ol_state;
+
+	struct objio_segment *layout;
+
+	struct kref kref;
+	objio_done_fn done;
+	void *private;
+
+	unsigned long length;
+	unsigned numdevs; /* Actually used devs in this IO */
+	/* A per-device variable array of size numdevs */
+	struct _objio_per_comp {
+		struct bio *bio;
+		struct osd_request *or;
+		unsigned long length;
+		u64 offset;
+		unsigned dev;
+	} per_dev[];
+};
+
+/* Send and wait for a get_device_info of devices in the layout,
+   then look them up with the osd_initiator library */
+static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay,
+				struct objio_segment *objio_seg, unsigned comp,
+				gfp_t gfp_flags)
+{
+	struct pnfs_osd_deviceaddr *deviceaddr;
+	struct nfs4_deviceid *d_id;
+	struct objio_dev_ent *ode;
+	struct osd_dev *od;
+	struct osd_dev_info odi;
+	int err;
+
+	d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id;
+
+	ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
+	if (ode)
+		return ode;
+
+	err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags);
+	if (unlikely(err)) {
+		dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n",
+			__func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err);
+		return ERR_PTR(err);
+	}
+
+	odi.systemid_len = deviceaddr->oda_systemid.len;
+	if (odi.systemid_len > sizeof(odi.systemid)) {
+		err = -EINVAL;
+		goto out;
+	} else if (odi.systemid_len)
+		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
+		       odi.systemid_len);
+	odi.osdname_len	 = deviceaddr->oda_osdname.len;
+	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
+
+	if (!odi.osdname_len && !odi.systemid_len) {
+		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
+			__func__);
+		err = -ENODEV;
+		goto out;
+	}
+
+	od = osduld_info_lookup(&odi);
+	if (unlikely(IS_ERR(od))) {
+		err = PTR_ERR(od);
+		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
+		goto out;
+	}
+
+	ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od,
+			    gfp_flags);
+
+out:
+	dprintk("%s: return=%d\n", __func__, err);
+	objlayout_put_deviceinfo(deviceaddr);
+	return err ? ERR_PTR(err) : ode;
+}
+
+static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
+	struct objio_segment *objio_seg,
+	gfp_t gfp_flags)
+{
+	unsigned i;
+	int err;
+
+	/* lookup all devices */
+	for (i = 0; i < objio_seg->num_comps; i++) {
+		struct objio_dev_ent *ode;
+
+		ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags);
+		if (unlikely(IS_ERR(ode))) {
+			err = PTR_ERR(ode);
+			goto out;
+		}
+		objio_seg->ods[i] = ode;
+	}
+	err = 0;
+
+out:
+	dprintk("%s: return=%d\n", __func__, err);
+	return err;
+}
+
+static int _verify_data_map(struct pnfs_osd_layout *layout)
+{
+	struct pnfs_osd_data_map *data_map = &layout->olo_map;
+	u64 stripe_length;
+	u32 group_width;
+
+/* FIXME: Only raid0 for now. if not go through MDS */
+	if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
+		printk(KERN_ERR "Only RAID_0 for now\n");
+		return -ENOTSUPP;
+	}
+	if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
+		printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
+			  data_map->odm_num_comps, data_map->odm_mirror_cnt);
+		return -EINVAL;
+	}
+
+	if (data_map->odm_group_width)
+		group_width = data_map->odm_group_width;
+	else
+		group_width = data_map->odm_num_comps /
+						(data_map->odm_mirror_cnt + 1);
+
+	stripe_length = (u64)data_map->odm_stripe_unit * group_width;
+	if (stripe_length >= (1ULL << 32)) {
+		printk(KERN_ERR "Total Stripe length(0x%llx)"
+			  " >= 32bit is not supported\n", _LLU(stripe_length));
+		return -ENOTSUPP;
+	}
+
+	if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
+		printk(KERN_ERR "Stripe Unit(0x%llx)"
+			  " must be Multples of PAGE_SIZE(0x%lx)\n",
+			  _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
+		return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp,
+			     struct pnfs_osd_object_cred *src_comp,
+			     struct caps_buffers *caps_p)
+{
+	WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key));
+	WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds));
+
+	*cur_comp = *src_comp;
+
+	memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred,
+	       sizeof(caps_p->caps_key));
+	cur_comp->oc_cap_key.cred = caps_p->caps_key;
+
+	memcpy(caps_p->creds, src_comp->oc_cap.cred,
+	       sizeof(caps_p->creds));
+	cur_comp->oc_cap.cred = caps_p->creds;
+}
+
+int objio_alloc_lseg(struct pnfs_layout_segment **outp,
+	struct pnfs_layout_hdr *pnfslay,
+	struct pnfs_layout_range *range,
+	struct xdr_stream *xdr,
+	gfp_t gfp_flags)
+{
+	struct objio_segment *objio_seg;
+	struct pnfs_osd_xdr_decode_layout_iter iter;
+	struct pnfs_osd_layout layout;
+	struct pnfs_osd_object_cred *cur_comp, src_comp;
+	struct caps_buffers *caps_p;
+	int err;
+
+	err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
+	if (unlikely(err))
+		return err;
+
+	err = _verify_data_map(&layout);
+	if (unlikely(err))
+		return err;
+
+	objio_seg = kzalloc(sizeof(*objio_seg) +
+			    sizeof(objio_seg->ods[0]) * layout.olo_num_comps +
+			    sizeof(*objio_seg->comps) * layout.olo_num_comps +
+			    sizeof(struct caps_buffers) * layout.olo_num_comps,
+			    gfp_flags);
+	if (!objio_seg)
+		return -ENOMEM;
+
+	objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps);
+	cur_comp = objio_seg->comps;
+	caps_p = (void *)(cur_comp + layout.olo_num_comps);
+	while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err))
+		copy_single_comp(cur_comp++, &src_comp, caps_p++);
+	if (unlikely(err))
+		goto err;
+
+	objio_seg->num_comps = layout.olo_num_comps;
+	objio_seg->comps_index = layout.olo_comps_index;
+	err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags);
+	if (err)
+		goto err;
+
+	objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
+	objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit;
+	if (layout.olo_map.odm_group_width) {
+		objio_seg->group_width = layout.olo_map.odm_group_width;
+		objio_seg->group_depth = layout.olo_map.odm_group_depth;
+		objio_seg->group_count = layout.olo_map.odm_num_comps /
+						objio_seg->mirrors_p1 /
+						objio_seg->group_width;
+	} else {
+		objio_seg->group_width = layout.olo_map.odm_num_comps /
+						objio_seg->mirrors_p1;
+		objio_seg->group_depth = -1;
+		objio_seg->group_count = 1;
+	}
+
+	/* Cache this calculation it will hit for every page */
+	objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE -
+				  objio_seg->stripe_unit) *
+				 objio_seg->group_width;
+
+	*outp = &objio_seg->lseg;
+	return 0;
+
+err:
+	kfree(objio_seg);
+	dprintk("%s: Error: return %d\n", __func__, err);
+	*outp = NULL;
+	return err;
+}
+
+void objio_free_lseg(struct pnfs_layout_segment *lseg)
+{
+	int i;
+	struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
+
+	for (i = 0; i < objio_seg->num_comps; i++) {
+		if (!objio_seg->ods[i])
+			break;
+		nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node);
+	}
+	kfree(objio_seg);
+}
+
+int objio_alloc_io_state(struct pnfs_layout_segment *lseg,
+			 struct objlayout_io_state **outp,
+			 gfp_t gfp_flags)
+{
+	struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
+	struct objio_state *ios;
+	const unsigned first_size = sizeof(*ios) +
+				objio_seg->num_comps * sizeof(ios->per_dev[0]);
+	const unsigned sec_size = objio_seg->num_comps *
+						sizeof(ios->ol_state.ioerrs[0]);
+
+	ios = kzalloc(first_size + sec_size, gfp_flags);
+	if (unlikely(!ios))
+		return -ENOMEM;
+
+	ios->layout = objio_seg;
+	ios->ol_state.ioerrs = ((void *)ios) + first_size;
+	ios->ol_state.num_comps = objio_seg->num_comps;
+
+	*outp = &ios->ol_state;
+	return 0;
+}
+
+void objio_free_io_state(struct objlayout_io_state *ol_state)
+{
+	struct objio_state *ios = container_of(ol_state, struct objio_state,
+					       ol_state);
+
+	kfree(ios);
+}
+
+enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
+{
+	switch (oep) {
+	case OSD_ERR_PRI_NO_ERROR:
+		return (enum pnfs_osd_errno)0;
+
+	case OSD_ERR_PRI_CLEAR_PAGES:
+		BUG_ON(1);
+		return 0;
+
+	case OSD_ERR_PRI_RESOURCE:
+		return PNFS_OSD_ERR_RESOURCE;
+	case OSD_ERR_PRI_BAD_CRED:
+		return PNFS_OSD_ERR_BAD_CRED;
+	case OSD_ERR_PRI_NO_ACCESS:
+		return PNFS_OSD_ERR_NO_ACCESS;
+	case OSD_ERR_PRI_UNREACHABLE:
+		return PNFS_OSD_ERR_UNREACHABLE;
+	case OSD_ERR_PRI_NOT_FOUND:
+		return PNFS_OSD_ERR_NOT_FOUND;
+	case OSD_ERR_PRI_NO_SPACE:
+		return PNFS_OSD_ERR_NO_SPACE;
+	default:
+		WARN_ON(1);
+		/* fallthrough */
+	case OSD_ERR_PRI_EIO:
+		return PNFS_OSD_ERR_EIO;
+	}
+}
+
+static void _clear_bio(struct bio *bio)
+{
+	struct bio_vec *bv;
+	unsigned i;
+
+	__bio_for_each_segment(bv, bio, i, 0) {
+		unsigned this_count = bv->bv_len;
+
+		if (likely(PAGE_SIZE == this_count))
+			clear_highpage(bv->bv_page);
+		else
+			zero_user(bv->bv_page, bv->bv_offset, this_count);
+	}
+}
+
+static int _io_check(struct objio_state *ios, bool is_write)
+{
+	enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
+	int lin_ret = 0;
+	int i;
+
+	for (i = 0; i <  ios->numdevs; i++) {
+		struct osd_sense_info osi;
+		struct osd_request *or = ios->per_dev[i].or;
+		unsigned dev;
+		int ret;
+
+		if (!or)
+			continue;
+
+		ret = osd_req_decode_sense(or, &osi);
+		if (likely(!ret))
+			continue;
+
+		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
+			/* start read offset passed endof file */
+			BUG_ON(is_write);
+			_clear_bio(ios->per_dev[i].bio);
+			dprintk("%s: start read offset passed end of file "
+				"offset=0x%llx, length=0x%lx\n", __func__,
+				_LLU(ios->per_dev[i].offset),
+				ios->per_dev[i].length);
+
+			continue; /* we recovered */
+		}
+		dev = ios->per_dev[i].dev;
+		objlayout_io_set_result(&ios->ol_state, dev,
+					&ios->layout->comps[dev].oc_object_id,
+					osd_pri_2_pnfs_err(osi.osd_err_pri),
+					ios->per_dev[i].offset,
+					ios->per_dev[i].length,
+					is_write);
+
+		if (osi.osd_err_pri >= oep) {
+			oep = osi.osd_err_pri;
+			lin_ret = ret;
+		}
+	}
+
+	return lin_ret;
+}
+
+/*
+ * Common IO state helpers.
+ */
+static void _io_free(struct objio_state *ios)
+{
+	unsigned i;
+
+	for (i = 0; i < ios->numdevs; i++) {
+		struct _objio_per_comp *per_dev = &ios->per_dev[i];
+
+		if (per_dev->or) {
+			osd_end_request(per_dev->or);
+			per_dev->or = NULL;
+		}
+
+		if (per_dev->bio) {
+			bio_put(per_dev->bio);
+			per_dev->bio = NULL;
+		}
+	}
+}
+
+struct osd_dev *_io_od(struct objio_state *ios, unsigned dev)
+{
+	unsigned min_dev = ios->layout->comps_index;
+	unsigned max_dev = min_dev + ios->layout->num_comps;
+
+	BUG_ON(dev < min_dev || max_dev <= dev);
+	return ios->layout->ods[dev - min_dev]->od;
+}
+
+struct _striping_info {
+	u64 obj_offset;
+	u64 group_length;
+	unsigned dev;
+	unsigned unit_off;
+};
+
+static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
+			      struct _striping_info *si)
+{
+	u32	stripe_unit = ios->layout->stripe_unit;
+	u32	group_width = ios->layout->group_width;
+	u64	group_depth = ios->layout->group_depth;
+	u32	U = stripe_unit * group_width;
+
+	u64	T = U * group_depth;
+	u64	S = T * ios->layout->group_count;
+	u64	M = div64_u64(file_offset, S);
+
+	/*
+	G = (L - (M * S)) / T
+	H = (L - (M * S)) % T
+	*/
+	u64	LmodU = file_offset - M * S;
+	u32	G = div64_u64(LmodU, T);
+	u64	H = LmodU - G * T;
+
+	u32	N = div_u64(H, U);
+
+	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
+	si->obj_offset = si->unit_off + (N * stripe_unit) +
+				  (M * group_depth * stripe_unit);
+
+	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
+	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
+	si->dev *= ios->layout->mirrors_p1;
+
+	si->group_length = T - H;
+}
+
+static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
+		unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len,
+		gfp_t gfp_flags)
+{
+	unsigned pg = *cur_pg;
+	struct request_queue *q =
+			osd_request_queue(_io_od(ios, per_dev->dev));
+
+	per_dev->length += cur_len;
+
+	if (per_dev->bio == NULL) {
+		unsigned stripes = ios->layout->num_comps /
+						     ios->layout->mirrors_p1;
+		unsigned pages_in_stripe = stripes *
+				      (ios->layout->stripe_unit / PAGE_SIZE);
+		unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
+				    stripes;
+
+		if (BIO_MAX_PAGES_KMALLOC < bio_size)
+			bio_size = BIO_MAX_PAGES_KMALLOC;
+
+		per_dev->bio = bio_kmalloc(gfp_flags, bio_size);
+		if (unlikely(!per_dev->bio)) {
+			dprintk("Faild to allocate BIO size=%u\n", bio_size);
+			return -ENOMEM;
+		}
+	}
+
+	while (cur_len > 0) {
+		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
+		unsigned added_len;
+
+		BUG_ON(ios->ol_state.nr_pages <= pg);
+		cur_len -= pglen;
+
+		added_len = bio_add_pc_page(q, per_dev->bio,
+					ios->ol_state.pages[pg], pglen, pgbase);
+		if (unlikely(pglen != added_len))
+			return -ENOMEM;
+		pgbase = 0;
+		++pg;
+	}
+	BUG_ON(cur_len);
+
+	*cur_pg = pg;
+	return 0;
+}
+
+static int _prepare_one_group(struct objio_state *ios, u64 length,
+			      struct _striping_info *si, unsigned *last_pg,
+			      gfp_t gfp_flags)
+{
+	unsigned stripe_unit = ios->layout->stripe_unit;
+	unsigned mirrors_p1 = ios->layout->mirrors_p1;
+	unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
+	unsigned dev = si->dev;
+	unsigned first_dev = dev - (dev % devs_in_group);
+	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
+	unsigned cur_pg = *last_pg;
+	int ret = 0;
+
+	while (length) {
+		struct _objio_per_comp *per_dev = &ios->per_dev[dev];
+		unsigned cur_len, page_off = 0;
+
+		if (!per_dev->length) {
+			per_dev->dev = dev;
+			if (dev < si->dev) {
+				per_dev->offset = si->obj_offset + stripe_unit -
+								   si->unit_off;
+				cur_len = stripe_unit;
+			} else if (dev == si->dev) {
+				per_dev->offset = si->obj_offset;
+				cur_len = stripe_unit - si->unit_off;
+				page_off = si->unit_off & ~PAGE_MASK;
+				BUG_ON(page_off &&
+				      (page_off != ios->ol_state.pgbase));
+			} else { /* dev > si->dev */
+				per_dev->offset = si->obj_offset - si->unit_off;
+				cur_len = stripe_unit;
+			}
+
+			if (max_comp < dev)
+				max_comp = dev;
+		} else {
+			cur_len = stripe_unit;
+		}
+		if (cur_len >= length)
+			cur_len = length;
+
+		ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
+				       cur_len, gfp_flags);
+		if (unlikely(ret))
+			goto out;
+
+		dev += mirrors_p1;
+		dev = (dev % devs_in_group) + first_dev;
+
+		length -= cur_len;
+		ios->length += cur_len;
+	}
+out:
+	ios->numdevs = max_comp + mirrors_p1;
+	*last_pg = cur_pg;
+	return ret;
+}
+
+static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags)
+{
+	u64 length = ios->ol_state.count;
+	u64 offset = ios->ol_state.offset;
+	struct _striping_info si;
+	unsigned last_pg = 0;
+	int ret = 0;
+
+	while (length) {
+		_calc_stripe_info(ios, offset, &si);
+
+		if (length < si.group_length)
+			si.group_length = length;
+
+		ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags);
+		if (unlikely(ret))
+			goto out;
+
+		offset += si.group_length;
+		length -= si.group_length;
+	}
+
+out:
+	if (!ios->length)
+		return ret;
+
+	return 0;
+}
+
+static ssize_t _sync_done(struct objio_state *ios)
+{
+	struct completion *waiting = ios->private;
+
+	complete(waiting);
+	return 0;
+}
+
+static void _last_io(struct kref *kref)
+{
+	struct objio_state *ios = container_of(kref, struct objio_state, kref);
+
+	ios->done(ios);
+}
+
+static void _done_io(struct osd_request *or, void *p)
+{
+	struct objio_state *ios = p;
+
+	kref_put(&ios->kref, _last_io);
+}
+
+static ssize_t _io_exec(struct objio_state *ios)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+	ssize_t status = 0; /* sync status */
+	unsigned i;
+	objio_done_fn saved_done_fn = ios->done;
+	bool sync = ios->ol_state.sync;
+
+	if (sync) {
+		ios->done = _sync_done;
+		ios->private = &wait;
+	}
+
+	kref_init(&ios->kref);
+
+	for (i = 0; i < ios->numdevs; i++) {
+		struct osd_request *or = ios->per_dev[i].or;
+
+		if (!or)
+			continue;
+
+		kref_get(&ios->kref);
+		osd_execute_request_async(or, _done_io, ios);
+	}
+
+	kref_put(&ios->kref, _last_io);
+
+	if (sync) {
+		wait_for_completion(&wait);
+		status = saved_done_fn(ios);
+	}
+
+	return status;
+}
+
+/*
+ * read
+ */
+static ssize_t _read_done(struct objio_state *ios)
+{
+	ssize_t status;
+	int ret = _io_check(ios, false);
+
+	_io_free(ios);
+
+	if (likely(!ret))
+		status = ios->length;
+	else
+		status = ret;
+
+	objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync);
+	return status;
+}
+
+static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
+{
+	struct osd_request *or = NULL;
+	struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
+	unsigned dev = per_dev->dev;
+	struct pnfs_osd_object_cred *cred =
+			&ios->layout->comps[dev];
+	struct osd_obj_id obj = {
+		.partition = cred->oc_object_id.oid_partition_id,
+		.id = cred->oc_object_id.oid_object_id,
+	};
+	int ret;
+
+	or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
+	if (unlikely(!or)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	per_dev->or = or;
+
+	osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
+
+	ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
+	if (ret) {
+		dprintk("%s: Faild to osd_finalize_request() => %d\n",
+			__func__, ret);
+		goto err;
+	}
+
+	dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
+		__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
+		per_dev->length);
+
+err:
+	return ret;
+}
+
+static ssize_t _read_exec(struct objio_state *ios)
+{
+	unsigned i;
+	int ret;
+
+	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
+		if (!ios->per_dev[i].length)
+			continue;
+		ret = _read_mirrors(ios, i);
+		if (unlikely(ret))
+			goto err;
+	}
+
+	ios->done = _read_done;
+	return _io_exec(ios); /* In sync mode exec returns the io status */
+
+err:
+	_io_free(ios);
+	return ret;
+}
+
+ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
+{
+	struct objio_state *ios = container_of(ol_state, struct objio_state,
+					       ol_state);
+	int ret;
+
+	ret = _io_rw_pagelist(ios, GFP_KERNEL);
+	if (unlikely(ret))
+		return ret;
+
+	return _read_exec(ios);
+}
+
+/*
+ * write
+ */
+static ssize_t _write_done(struct objio_state *ios)
+{
+	ssize_t status;
+	int ret = _io_check(ios, true);
+
+	_io_free(ios);
+
+	if (likely(!ret)) {
+		/* FIXME: should be based on the OSD's persistence model
+		 * See OSD2r05 Section 4.13 Data persistence model */
+		ios->ol_state.committed = NFS_FILE_SYNC;
+		status = ios->length;
+	} else {
+		status = ret;
+	}
+
+	objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync);
+	return status;
+}
+
+static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
+{
+	struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
+	unsigned dev = ios->per_dev[cur_comp].dev;
+	unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
+	int ret;
+
+	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
+		struct osd_request *or = NULL;
+		struct pnfs_osd_object_cred *cred =
+					&ios->layout->comps[dev];
+		struct osd_obj_id obj = {
+			.partition = cred->oc_object_id.oid_partition_id,
+			.id = cred->oc_object_id.oid_object_id,
+		};
+		struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
+		struct bio *bio;
+
+		or = osd_start_request(_io_od(ios, dev), GFP_NOFS);
+		if (unlikely(!or)) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		per_dev->or = or;
+
+		if (per_dev != master_dev) {
+			bio = bio_kmalloc(GFP_NOFS,
+					  master_dev->bio->bi_max_vecs);
+			if (unlikely(!bio)) {
+				dprintk("Faild to allocate BIO size=%u\n",
+					master_dev->bio->bi_max_vecs);
+				ret = -ENOMEM;
+				goto err;
+			}
+
+			__bio_clone(bio, master_dev->bio);
+			bio->bi_bdev = NULL;
+			bio->bi_next = NULL;
+			per_dev->bio = bio;
+			per_dev->dev = dev;
+			per_dev->length = master_dev->length;
+			per_dev->offset =  master_dev->offset;
+		} else {
+			bio = master_dev->bio;
+			bio->bi_rw |= REQ_WRITE;
+		}
+
+		osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
+
+		ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
+		if (ret) {
+			dprintk("%s: Faild to osd_finalize_request() => %d\n",
+				__func__, ret);
+			goto err;
+		}
+
+		dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
+			__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
+			per_dev->length);
+	}
+
+err:
+	return ret;
+}
+
+static ssize_t _write_exec(struct objio_state *ios)
+{
+	unsigned i;
+	int ret;
+
+	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
+		if (!ios->per_dev[i].length)
+			continue;
+		ret = _write_mirrors(ios, i);
+		if (unlikely(ret))
+			goto err;
+	}
+
+	ios->done = _write_done;
+	return _io_exec(ios); /* In sync mode exec returns the io->status */
+
+err:
+	_io_free(ios);
+	return ret;
+}
+
+ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
+{
+	struct objio_state *ios = container_of(ol_state, struct objio_state,
+					       ol_state);
+	int ret;
+
+	/* TODO: ios->stable = stable; */
+	ret = _io_rw_pagelist(ios, GFP_NOFS);
+	if (unlikely(ret))
+		return ret;
+
+	return _write_exec(ios);
+}
+
+static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
+			  struct nfs_page *prev, struct nfs_page *req)
+{
+	if (!pnfs_generic_pg_test(pgio, prev, req))
+		return false;
+
+	return pgio->pg_count + req->wb_bytes <=
+			OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
+}
+
+static struct pnfs_layoutdriver_type objlayout_type = {
+	.id = LAYOUT_OSD2_OBJECTS,
+	.name = "LAYOUT_OSD2_OBJECTS",
+	.flags                   = PNFS_LAYOUTRET_ON_SETATTR,
+
+	.alloc_layout_hdr        = objlayout_alloc_layout_hdr,
+	.free_layout_hdr         = objlayout_free_layout_hdr,
+
+	.alloc_lseg              = objlayout_alloc_lseg,
+	.free_lseg               = objlayout_free_lseg,
+
+	.read_pagelist           = objlayout_read_pagelist,
+	.write_pagelist          = objlayout_write_pagelist,
+	.pg_test                 = objio_pg_test,
+
+	.free_deviceid_node	 = objio_free_deviceid_node,
+
+	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
+	.encode_layoutreturn     = objlayout_encode_layoutreturn,
+};
+
+MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
+MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
+MODULE_LICENSE("GPL");
+
+static int __init
+objlayout_init(void)
+{
+	int ret = pnfs_register_layoutdriver(&objlayout_type);
+
+	if (ret)
+		printk(KERN_INFO
+			"%s: Registering OSD pNFS Layout Driver failed: error=%d\n",
+			__func__, ret);
+	else
+		printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
+			__func__);
+	return ret;
+}
+
+static void __exit
+objlayout_exit(void)
+{
+	pnfs_unregister_layoutdriver(&objlayout_type);
+	printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
+	       __func__);
+}
+
+module_init(objlayout_init);
+module_exit(objlayout_exit);

diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
new file mode 100644
index 0000000..dc3956c
--- /dev/null
+++ b/fs/nfs/objlayout/objlayout.c

@@ -0,0 +1,712 @@
+/*
+ *  pNFS Objects layout driver high level definitions
+ *
+ *  Copyright (C) 2007 Panasas Inc. [year of first publication]
+ *  All rights reserved.
+ *
+ *  Benny Halevy <bhalevy@panasas.com>
+ *  Boaz Harrosh <bharrosh@panasas.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  See the file COPYING included with this distribution for more details.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the Panasas company nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <scsi/osd_initiator.h>
+#include "objlayout.h"
+
+#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
+/*
+ * Create a objlayout layout structure for the given inode and return it.
+ */
+struct pnfs_layout_hdr *
+objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
+{
+	struct objlayout *objlay;
+
+	objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
+	if (objlay) {
+		spin_lock_init(&objlay->lock);
+		INIT_LIST_HEAD(&objlay->err_list);
+	}
+	dprintk("%s: Return %p\n", __func__, objlay);
+	return &objlay->pnfs_layout;
+}
+
+/*
+ * Free an objlayout layout structure
+ */
+void
+objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+	struct objlayout *objlay = OBJLAYOUT(lo);
+
+	dprintk("%s: objlay %p\n", __func__, objlay);
+
+	WARN_ON(!list_empty(&objlay->err_list));
+	kfree(objlay);
+}
+
+/*
+ * Unmarshall layout and store it in pnfslay.
+ */
+struct pnfs_layout_segment *
+objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
+		     struct nfs4_layoutget_res *lgr,
+		     gfp_t gfp_flags)
+{
+	int status = -ENOMEM;
+	struct xdr_stream stream;
+	struct xdr_buf buf = {
+		.pages =  lgr->layoutp->pages,
+		.page_len =  lgr->layoutp->len,
+		.buflen =  lgr->layoutp->len,
+		.len = lgr->layoutp->len,
+	};
+	struct page *scratch;
+	struct pnfs_layout_segment *lseg;
+
+	dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
+
+	scratch = alloc_page(gfp_flags);
+	if (!scratch)
+		goto err_nofree;
+
+	xdr_init_decode(&stream, &buf, NULL);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
+
+	status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
+	if (unlikely(status)) {
+		dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
+			status);
+		goto err;
+	}
+
+	__free_page(scratch);
+
+	dprintk("%s: Return %p\n", __func__, lseg);
+	return lseg;
+
+err:
+	__free_page(scratch);
+err_nofree:
+	dprintk("%s: Err Return=>%d\n", __func__, status);
+	return ERR_PTR(status);
+}
+
+/*
+ * Free a layout segement
+ */
+void
+objlayout_free_lseg(struct pnfs_layout_segment *lseg)
+{
+	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
+
+	if (unlikely(!lseg))
+		return;
+
+	objio_free_lseg(lseg);
+}
+
+/*
+ * I/O Operations
+ */
+static inline u64
+end_offset(u64 start, u64 len)
+{
+	u64 end;
+
+	end = start + len;
+	return end >= start ? end : NFS4_MAX_UINT64;
+}
+
+/* last octet in a range */
+static inline u64
+last_byte_offset(u64 start, u64 len)
+{
+	u64 end;
+
+	BUG_ON(!len);
+	end = start + len;
+	return end > start ? end - 1 : NFS4_MAX_UINT64;
+}
+
+static struct objlayout_io_state *
+objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
+			struct page **pages,
+			unsigned pgbase,
+			loff_t offset,
+			size_t count,
+			struct pnfs_layout_segment *lseg,
+			void *rpcdata,
+			gfp_t gfp_flags)
+{
+	struct objlayout_io_state *state;
+	u64 lseg_end_offset;
+
+	dprintk("%s: allocating io_state\n", __func__);
+	if (objio_alloc_io_state(lseg, &state, gfp_flags))
+		return NULL;
+
+	BUG_ON(offset < lseg->pls_range.offset);
+	lseg_end_offset = end_offset(lseg->pls_range.offset,
+				     lseg->pls_range.length);
+	BUG_ON(offset >= lseg_end_offset);
+	if (offset + count > lseg_end_offset) {
+		count = lseg->pls_range.length -
+				(offset - lseg->pls_range.offset);
+		dprintk("%s: truncated count %Zd\n", __func__, count);
+	}
+
+	if (pgbase > PAGE_SIZE) {
+		pages += pgbase >> PAGE_SHIFT;
+		pgbase &= ~PAGE_MASK;
+	}
+
+	INIT_LIST_HEAD(&state->err_list);
+	state->lseg = lseg;
+	state->rpcdata = rpcdata;
+	state->pages = pages;
+	state->pgbase = pgbase;
+	state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	state->offset = offset;
+	state->count = count;
+	state->sync = 0;
+
+	return state;
+}
+
+static void
+objlayout_free_io_state(struct objlayout_io_state *state)
+{
+	dprintk("%s: freeing io_state\n", __func__);
+	if (unlikely(!state))
+		return;
+
+	objio_free_io_state(state);
+}
+
+/*
+ * I/O done common code
+ */
+static void
+objlayout_iodone(struct objlayout_io_state *state)
+{
+	dprintk("%s: state %p status\n", __func__, state);
+
+	if (likely(state->status >= 0)) {
+		objlayout_free_io_state(state);
+	} else {
+		struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
+
+		spin_lock(&objlay->lock);
+		objlay->delta_space_valid = OBJ_DSU_INVALID;
+		list_add(&objlay->err_list, &state->err_list);
+		spin_unlock(&objlay->lock);
+	}
+}
+
+/*
+ * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
+ *
+ * The @index component IO failed (error returned from target). Register
+ * the error for later reporting at layout-return.
+ */
+void
+objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
+			struct pnfs_osd_objid *pooid, int osd_error,
+			u64 offset, u64 length, bool is_write)
+{
+	struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
+
+	BUG_ON(index >= state->num_comps);
+	if (osd_error) {
+		ioerr->oer_component = *pooid;
+		ioerr->oer_comp_offset = offset;
+		ioerr->oer_comp_length = length;
+		ioerr->oer_iswrite = is_write;
+		ioerr->oer_errno = osd_error;
+
+		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
+			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
+			__func__, index, ioerr->oer_errno,
+			ioerr->oer_iswrite,
+			_DEVID_LO(&ioerr->oer_component.oid_device_id),
+			_DEVID_HI(&ioerr->oer_component.oid_device_id),
+			ioerr->oer_component.oid_partition_id,
+			ioerr->oer_component.oid_object_id,
+			ioerr->oer_comp_offset,
+			ioerr->oer_comp_length);
+	} else {
+		/* User need not call if no error is reported */
+		ioerr->oer_errno = 0;
+	}
+}
+
+/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
+ * This is because the osd completion is called with ints-off from
+ * the block layer
+ */
+static void _rpc_read_complete(struct work_struct *work)
+{
+	struct rpc_task *task;
+	struct nfs_read_data *rdata;
+
+	dprintk("%s enter\n", __func__);
+	task = container_of(work, struct rpc_task, u.tk_work);
+	rdata = container_of(task, struct nfs_read_data, task);
+
+	pnfs_ld_read_done(rdata);
+}
+
+void
+objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
+{
+	int eof = state->eof;
+	struct nfs_read_data *rdata;
+
+	state->status = status;
+	dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
+	rdata = state->rpcdata;
+	rdata->task.tk_status = status;
+	if (status >= 0) {
+		rdata->res.count = status;
+		rdata->res.eof = eof;
+	}
+	objlayout_iodone(state);
+	/* must not use state after this point */
+
+	if (sync)
+		pnfs_ld_read_done(rdata);
+	else {
+		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
+		schedule_work(&rdata->task.u.tk_work);
+	}
+}
+
+/*
+ * Perform sync or async reads.
+ */
+enum pnfs_try_status
+objlayout_read_pagelist(struct nfs_read_data *rdata)
+{
+	loff_t offset = rdata->args.offset;
+	size_t count = rdata->args.count;
+	struct objlayout_io_state *state;
+	ssize_t status = 0;
+	loff_t eof;
+
+	dprintk("%s: Begin inode %p offset %llu count %d\n",
+		__func__, rdata->inode, offset, (int)count);
+
+	eof = i_size_read(rdata->inode);
+	if (unlikely(offset + count > eof)) {
+		if (offset >= eof) {
+			status = 0;
+			rdata->res.count = 0;
+			rdata->res.eof = 1;
+			goto out;
+		}
+		count = eof - offset;
+	}
+
+	state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
+					 rdata->args.pages, rdata->args.pgbase,
+					 offset, count,
+					 rdata->lseg, rdata,
+					 GFP_KERNEL);
+	if (unlikely(!state)) {
+		status = -ENOMEM;
+		goto out;
+	}
+
+	state->eof = state->offset + state->count >= eof;
+
+	status = objio_read_pagelist(state);
+ out:
+	dprintk("%s: Return status %Zd\n", __func__, status);
+	rdata->pnfs_error = status;
+	return PNFS_ATTEMPTED;
+}
+
+/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
+ * This is because the osd completion is called with ints-off from
+ * the block layer
+ */
+static void _rpc_write_complete(struct work_struct *work)
+{
+	struct rpc_task *task;
+	struct nfs_write_data *wdata;
+
+	dprintk("%s enter\n", __func__);
+	task = container_of(work, struct rpc_task, u.tk_work);
+	wdata = container_of(task, struct nfs_write_data, task);
+
+	pnfs_ld_write_done(wdata);
+}
+
+void
+objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
+		     bool sync)
+{
+	struct nfs_write_data *wdata;
+
+	dprintk("%s: Begin\n", __func__);
+	wdata = state->rpcdata;
+	state->status = status;
+	wdata->task.tk_status = status;
+	if (status >= 0) {
+		wdata->res.count = status;
+		wdata->verf.committed = state->committed;
+		dprintk("%s: Return status %d committed %d\n",
+			__func__, wdata->task.tk_status,
+			wdata->verf.committed);
+	} else
+		dprintk("%s: Return status %d\n",
+			__func__, wdata->task.tk_status);
+	objlayout_iodone(state);
+	/* must not use state after this point */
+
+	if (sync)
+		pnfs_ld_write_done(wdata);
+	else {
+		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
+		schedule_work(&wdata->task.u.tk_work);
+	}
+}
+
+/*
+ * Perform sync or async writes.
+ */
+enum pnfs_try_status
+objlayout_write_pagelist(struct nfs_write_data *wdata,
+			 int how)
+{
+	struct objlayout_io_state *state;
+	ssize_t status;
+
+	dprintk("%s: Begin inode %p offset %llu count %u\n",
+		__func__, wdata->inode, wdata->args.offset, wdata->args.count);
+
+	state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
+					 wdata->args.pages,
+					 wdata->args.pgbase,
+					 wdata->args.offset,
+					 wdata->args.count,
+					 wdata->lseg, wdata,
+					 GFP_NOFS);
+	if (unlikely(!state)) {
+		status = -ENOMEM;
+		goto out;
+	}
+
+	state->sync = how & FLUSH_SYNC;
+
+	status = objio_write_pagelist(state, how & FLUSH_STABLE);
+ out:
+	dprintk("%s: Return status %Zd\n", __func__, status);
+	wdata->pnfs_error = status;
+	return PNFS_ATTEMPTED;
+}
+
+void
+objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
+			      struct xdr_stream *xdr,
+			      const struct nfs4_layoutcommit_args *args)
+{
+	struct objlayout *objlay = OBJLAYOUT(pnfslay);
+	struct pnfs_osd_layoutupdate lou;
+	__be32 *start;
+
+	dprintk("%s: Begin\n", __func__);
+
+	spin_lock(&objlay->lock);
+	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
+	lou.dsu_delta = objlay->delta_space_used;
+	objlay->delta_space_used = 0;
+	objlay->delta_space_valid = OBJ_DSU_INIT;
+	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
+	spin_unlock(&objlay->lock);
+
+	start = xdr_reserve_space(xdr, 4);
+
+	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
+
+	*start = cpu_to_be32((xdr->p - start - 1) * 4);
+
+	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
+		lou.dsu_delta, lou.olu_ioerr_flag);
+}
+
+static int
+err_prio(u32 oer_errno)
+{
+	switch (oer_errno) {
+	case 0:
+		return 0;
+
+	case PNFS_OSD_ERR_RESOURCE:
+		return OSD_ERR_PRI_RESOURCE;
+	case PNFS_OSD_ERR_BAD_CRED:
+		return OSD_ERR_PRI_BAD_CRED;
+	case PNFS_OSD_ERR_NO_ACCESS:
+		return OSD_ERR_PRI_NO_ACCESS;
+	case PNFS_OSD_ERR_UNREACHABLE:
+		return OSD_ERR_PRI_UNREACHABLE;
+	case PNFS_OSD_ERR_NOT_FOUND:
+		return OSD_ERR_PRI_NOT_FOUND;
+	case PNFS_OSD_ERR_NO_SPACE:
+		return OSD_ERR_PRI_NO_SPACE;
+	default:
+		WARN_ON(1);
+		/* fallthrough */
+	case PNFS_OSD_ERR_EIO:
+		return OSD_ERR_PRI_EIO;
+	}
+}
+
+static void
+merge_ioerr(struct pnfs_osd_ioerr *dest_err,
+	    const struct pnfs_osd_ioerr *src_err)
+{
+	u64 dest_end, src_end;
+
+	if (!dest_err->oer_errno) {
+		*dest_err = *src_err;
+		/* accumulated device must be blank */
+		memset(&dest_err->oer_component.oid_device_id, 0,
+			sizeof(dest_err->oer_component.oid_device_id));
+
+		return;
+	}
+
+	if (dest_err->oer_component.oid_partition_id !=
+				src_err->oer_component.oid_partition_id)
+		dest_err->oer_component.oid_partition_id = 0;
+
+	if (dest_err->oer_component.oid_object_id !=
+				src_err->oer_component.oid_object_id)
+		dest_err->oer_component.oid_object_id = 0;
+
+	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
+		dest_err->oer_comp_offset = src_err->oer_comp_offset;
+
+	dest_end = end_offset(dest_err->oer_comp_offset,
+			      dest_err->oer_comp_length);
+	src_end =  end_offset(src_err->oer_comp_offset,
+			      src_err->oer_comp_length);
+	if (dest_end < src_end)
+		dest_end = src_end;
+
+	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
+
+	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
+	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
+			dest_err->oer_errno = src_err->oer_errno;
+	} else if (src_err->oer_iswrite) {
+		dest_err->oer_iswrite = true;
+		dest_err->oer_errno = src_err->oer_errno;
+	}
+}
+
+static void
+encode_accumulated_error(struct objlayout *objlay, __be32 *p)
+{
+	struct objlayout_io_state *state, *tmp;
+	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
+
+	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
+		unsigned i;
+
+		for (i = 0; i < state->num_comps; i++) {
+			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
+
+			if (!ioerr->oer_errno)
+				continue;
+
+			printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
+				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
+				"offset=0x%llx length=0x%llx\n",
+				__func__, i, ioerr->oer_errno,
+				ioerr->oer_iswrite,
+				_DEVID_LO(&ioerr->oer_component.oid_device_id),
+				_DEVID_HI(&ioerr->oer_component.oid_device_id),
+				ioerr->oer_component.oid_partition_id,
+				ioerr->oer_component.oid_object_id,
+				ioerr->oer_comp_offset,
+				ioerr->oer_comp_length);
+
+			merge_ioerr(&accumulated_err, ioerr);
+		}
+		list_del(&state->err_list);
+		objlayout_free_io_state(state);
+	}
+
+	pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
+}
+
+void
+objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
+			      struct xdr_stream *xdr,
+			      const struct nfs4_layoutreturn_args *args)
+{
+	struct objlayout *objlay = OBJLAYOUT(pnfslay);
+	struct objlayout_io_state *state, *tmp;
+	__be32 *start;
+
+	dprintk("%s: Begin\n", __func__);
+	start = xdr_reserve_space(xdr, 4);
+	BUG_ON(!start);
+
+	spin_lock(&objlay->lock);
+
+	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
+		__be32 *last_xdr = NULL, *p;
+		unsigned i;
+		int res = 0;
+
+		for (i = 0; i < state->num_comps; i++) {
+			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
+
+			if (!ioerr->oer_errno)
+				continue;
+
+			dprintk("%s: err[%d]: errno=%d is_write=%d "
+				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
+				"offset=0x%llx length=0x%llx\n",
+				__func__, i, ioerr->oer_errno,
+				ioerr->oer_iswrite,
+				_DEVID_LO(&ioerr->oer_component.oid_device_id),
+				_DEVID_HI(&ioerr->oer_component.oid_device_id),
+				ioerr->oer_component.oid_partition_id,
+				ioerr->oer_component.oid_object_id,
+				ioerr->oer_comp_offset,
+				ioerr->oer_comp_length);
+
+			p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
+			if (unlikely(!p)) {
+				res = -E2BIG;
+				break; /* accumulated_error */
+			}
+
+			last_xdr = p;
+			pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]);
+		}
+
+		/* TODO: use xdr_write_pages */
+		if (unlikely(res)) {
+			/* no space for even one error descriptor */
+			BUG_ON(!last_xdr);
+
+			/* we've encountered a situation with lots and lots of
+			 * errors and no space to encode them all. Use the last
+			 * available slot to report the union of all the
+			 * remaining errors.
+			 */
+			encode_accumulated_error(objlay, last_xdr);
+			goto loop_done;
+		}
+		list_del(&state->err_list);
+		objlayout_free_io_state(state);
+	}
+loop_done:
+	spin_unlock(&objlay->lock);
+
+	*start = cpu_to_be32((xdr->p - start - 1) * 4);
+	dprintk("%s: Return\n", __func__);
+}
+
+
+/*
+ * Get Device Info API for io engines
+ */
+struct objlayout_deviceinfo {
+	struct page *page;
+	struct pnfs_osd_deviceaddr da; /* This must be last */
+};
+
+/* Initialize and call nfs_getdeviceinfo, then decode and return a
+ * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
+ * should be called.
+ */
+int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
+	struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
+	gfp_t gfp_flags)
+{
+	struct objlayout_deviceinfo *odi;
+	struct pnfs_device pd;
+	struct super_block *sb;
+	struct page *page, **pages;
+	u32 *p;
+	int err;
+
+	page = alloc_page(gfp_flags);
+	if (!page)
+		return -ENOMEM;
+
+	pages = &page;
+	pd.pages = pages;
+
+	memcpy(&pd.dev_id, d_id, sizeof(*d_id));
+	pd.layout_type = LAYOUT_OSD2_OBJECTS;
+	pd.pages = &page;
+	pd.pgbase = 0;
+	pd.pglen = PAGE_SIZE;
+	pd.mincount = 0;
+
+	sb = pnfslay->plh_inode->i_sb;
+	err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
+	dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
+	if (err)
+		goto err_out;
+
+	p = page_address(page);
+	odi = kzalloc(sizeof(*odi), gfp_flags);
+	if (!odi) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
+	odi->page = page;
+	*deviceaddr = &odi->da;
+	return 0;
+
+err_out:
+	__free_page(page);
+	return err;
+}
+
+void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
+{
+	struct objlayout_deviceinfo *odi = container_of(deviceaddr,
+						struct objlayout_deviceinfo,
+						da);
+
+	__free_page(odi->page);
+	kfree(odi);
+}

diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
new file mode 100644
index 0000000..a8244c8
--- /dev/null
+++ b/fs/nfs/objlayout/objlayout.h

@@ -0,0 +1,187 @@
+/*
+ *  Data types and function declerations for interfacing with the
+ *  pNFS standard object layout driver.
+ *
+ *  Copyright (C) 2007 Panasas Inc. [year of first publication]
+ *  All rights reserved.
+ *
+ *  Benny Halevy <bhalevy@panasas.com>
+ *  Boaz Harrosh <bharrosh@panasas.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  See the file COPYING included with this distribution for more details.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the Panasas company nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _OBJLAYOUT_H
+#define _OBJLAYOUT_H
+
+#include <linux/nfs_fs.h>
+#include <linux/pnfs_osd_xdr.h>
+#include "../pnfs.h"
+
+/*
+ * per-inode layout
+ */
+struct objlayout {
+	struct pnfs_layout_hdr pnfs_layout;
+
+	 /* for layout_commit */
+	enum osd_delta_space_valid_enum {
+		OBJ_DSU_INIT = 0,
+		OBJ_DSU_VALID,
+		OBJ_DSU_INVALID,
+	} delta_space_valid;
+	s64 delta_space_used;  /* consumed by write ops */
+
+	 /* for layout_return */
+	spinlock_t lock;
+	struct list_head err_list;
+};
+
+static inline struct objlayout *
+OBJLAYOUT(struct pnfs_layout_hdr *lo)
+{
+	return container_of(lo, struct objlayout, pnfs_layout);
+}
+
+/*
+ * per-I/O operation state
+ * embedded in objects provider io_state data structure
+ */
+struct objlayout_io_state {
+	struct pnfs_layout_segment *lseg;
+
+	struct page **pages;
+	unsigned pgbase;
+	unsigned nr_pages;
+	unsigned long count;
+	loff_t offset;
+	bool sync;
+
+	void *rpcdata;
+	int status;             /* res */
+	int eof;                /* res */
+	int committed;          /* res */
+
+	/* Error reporting (layout_return) */
+	struct list_head err_list;
+	unsigned num_comps;
+	/* Pointer to array of error descriptors of size num_comps.
+	 * It should contain as many entries as devices in the osd_layout
+	 * that participate in the I/O. It is up to the io_engine to allocate
+	 * needed space and set num_comps.
+	 */
+	struct pnfs_osd_ioerr *ioerrs;
+};
+
+/*
+ * Raid engine I/O API
+ */
+extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
+	struct pnfs_layout_hdr *pnfslay,
+	struct pnfs_layout_range *range,
+	struct xdr_stream *xdr,
+	gfp_t gfp_flags);
+extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
+
+extern int objio_alloc_io_state(
+	struct pnfs_layout_segment *lseg,
+	struct objlayout_io_state **outp,
+	gfp_t gfp_flags);
+extern void objio_free_io_state(struct objlayout_io_state *state);
+
+extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
+extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
+				    bool stable);
+
+/*
+ * callback API
+ */
+extern void objlayout_io_set_result(struct objlayout_io_state *state,
+			unsigned index, struct pnfs_osd_objid *pooid,
+			int osd_error, u64 offset, u64 length, bool is_write);
+
+static inline void
+objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
+{
+	struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
+
+	/* If one of the I/Os errored out and the delta_space_used was
+	 * invalid we render the complete report as invalid. Protocol mandate
+	 * the DSU be accurate or not reported.
+	 */
+	spin_lock(&objlay->lock);
+	if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
+		objlay->delta_space_valid = OBJ_DSU_VALID;
+		objlay->delta_space_used += space_used;
+	}
+	spin_unlock(&objlay->lock);
+}
+
+extern void objlayout_read_done(struct objlayout_io_state *state,
+				ssize_t status, bool sync);
+extern void objlayout_write_done(struct objlayout_io_state *state,
+				 ssize_t status, bool sync);
+
+extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
+	struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
+	gfp_t gfp_flags);
+extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
+
+/*
+ * exported generic objects function vectors
+ */
+
+extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags);
+extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *);
+
+extern struct pnfs_layout_segment *objlayout_alloc_lseg(
+	struct pnfs_layout_hdr *,
+	struct nfs4_layoutget_res *,
+	gfp_t gfp_flags);
+extern void objlayout_free_lseg(struct pnfs_layout_segment *);
+
+extern enum pnfs_try_status objlayout_read_pagelist(
+	struct nfs_read_data *);
+
+extern enum pnfs_try_status objlayout_write_pagelist(
+	struct nfs_write_data *,
+	int how);
+
+extern void objlayout_encode_layoutcommit(
+	struct pnfs_layout_hdr *,
+	struct xdr_stream *,
+	const struct nfs4_layoutcommit_args *);
+
+extern void objlayout_encode_layoutreturn(
+	struct pnfs_layout_hdr *,
+	struct xdr_stream *,
+	const struct nfs4_layoutreturn_args *);
+
+#endif /* _OBJLAYOUT_H */

diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
new file mode 100644
index 0000000..16fc758
--- /dev/null
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c

@@ -0,0 +1,412 @@
+/*
+ *  Object-Based pNFS Layout XDR layer
+ *
+ *  Copyright (C) 2007 Panasas Inc. [year of first publication]
+ *  All rights reserved.
+ *
+ *  Benny Halevy <bhalevy@panasas.com>
+ *  Boaz Harrosh <bharrosh@panasas.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  See the file COPYING included with this distribution for more details.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the Panasas company nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/pnfs_osd_xdr.h>
+
+#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
+
+/*
+ * The following implementation is based on RFC5664
+ */
+
+/*
+ * struct pnfs_osd_objid {
+ *	struct nfs4_deviceid	oid_device_id;
+ *	u64			oid_partition_id;
+ *	u64			oid_object_id;
+ * }; // xdr size 32 bytes
+ */
+static __be32 *
+_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
+{
+	p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data,
+				    sizeof(objid->oid_device_id.data));
+
+	p = xdr_decode_hyper(p, &objid->oid_partition_id);
+	p = xdr_decode_hyper(p, &objid->oid_object_id);
+	return p;
+}
+/*
+ * struct pnfs_osd_opaque_cred {
+ *	u32 cred_len;
+ *	void *cred;
+ * }; // xdr size [variable]
+ * The return pointers are from the xdr buffer
+ */
+static int
+_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred,
+			    struct xdr_stream *xdr)
+{
+	__be32 *p = xdr_inline_decode(xdr, 1);
+
+	if (!p)
+		return -EINVAL;
+
+	opaque_cred->cred_len = be32_to_cpu(*p++);
+
+	p = xdr_inline_decode(xdr, opaque_cred->cred_len);
+	if (!p)
+		return -EINVAL;
+
+	opaque_cred->cred = p;
+	return 0;
+}
+
+/*
+ * struct pnfs_osd_object_cred {
+ *	struct pnfs_osd_objid		oc_object_id;
+ *	u32				oc_osd_version;
+ *	u32				oc_cap_key_sec;
+ *	struct pnfs_osd_opaque_cred	oc_cap_key
+ *	struct pnfs_osd_opaque_cred	oc_cap;
+ * }; // xdr size 32 + 4 + 4 + [variable] + [variable]
+ */
+static int
+_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp,
+			    struct xdr_stream *xdr)
+{
+	__be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4);
+	int ret;
+
+	if (!p)
+		return -EIO;
+
+	p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
+	comp->oc_osd_version = be32_to_cpup(p++);
+	comp->oc_cap_key_sec = be32_to_cpup(p);
+
+	ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr);
+	if (unlikely(ret))
+		return ret;
+
+	ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr);
+	return ret;
+}
+
+/*
+ * struct pnfs_osd_data_map {
+ *	u32	odm_num_comps;
+ *	u64	odm_stripe_unit;
+ *	u32	odm_group_width;
+ *	u32	odm_group_depth;
+ *	u32	odm_mirror_cnt;
+ *	u32	odm_raid_algorithm;
+ * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4
+ */
+static inline int
+_osd_data_map_xdr_sz(void)
+{
+	return 4 + 8 + 4 + 4 + 4 + 4;
+}
+
+static __be32 *
+_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map)
+{
+	data_map->odm_num_comps = be32_to_cpup(p++);
+	p = xdr_decode_hyper(p, &data_map->odm_stripe_unit);
+	data_map->odm_group_width = be32_to_cpup(p++);
+	data_map->odm_group_depth = be32_to_cpup(p++);
+	data_map->odm_mirror_cnt = be32_to_cpup(p++);
+	data_map->odm_raid_algorithm = be32_to_cpup(p++);
+	dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
+		"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
+		__func__,
+		data_map->odm_num_comps,
+		(unsigned long long)data_map->odm_stripe_unit,
+		data_map->odm_group_width,
+		data_map->odm_group_depth,
+		data_map->odm_mirror_cnt,
+		data_map->odm_raid_algorithm);
+	return p;
+}
+
+int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
+	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr)
+{
+	__be32 *p;
+
+	memset(iter, 0, sizeof(*iter));
+
+	p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4);
+	if (unlikely(!p))
+		return -EINVAL;
+
+	p = _osd_xdr_decode_data_map(p, &layout->olo_map);
+	layout->olo_comps_index = be32_to_cpup(p++);
+	layout->olo_num_comps = be32_to_cpup(p++);
+	iter->total_comps = layout->olo_num_comps;
+	return 0;
+}
+
+bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
+	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
+	int *err)
+{
+	BUG_ON(iter->decoded_comps > iter->total_comps);
+	if (iter->decoded_comps == iter->total_comps)
+		return false;
+
+	*err = _osd_xdr_decode_object_cred(comp, xdr);
+	if (unlikely(*err)) {
+		dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d "
+			"total_comps=%d\n", __func__, *err,
+			iter->decoded_comps, iter->total_comps);
+		return false; /* stop the loop */
+	}
+	dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx "
+		"key_len=%u cap_len=%u\n",
+		__func__,
+		_DEVID_LO(&comp->oc_object_id.oid_device_id),
+		_DEVID_HI(&comp->oc_object_id.oid_device_id),
+		comp->oc_object_id.oid_partition_id,
+		comp->oc_object_id.oid_object_id,
+		comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
+
+	iter->decoded_comps++;
+	return true;
+}
+
+/*
+ * Get Device Information Decoding
+ *
+ * Note: since Device Information is currently done synchronously, all
+ *       variable strings fields are left inside the rpc buffer and are only
+ *       pointed to by the pnfs_osd_deviceaddr members. So the read buffer
+ *       should not be freed while the returned information is in use.
+ */
+/*
+ *struct nfs4_string {
+ *	unsigned int len;
+ *	char *data;
+ *}; // size [variable]
+ * NOTE: Returned string points to inside the XDR buffer
+ */
+static __be32 *
+__read_u8_opaque(__be32 *p, struct nfs4_string *str)
+{
+	str->len = be32_to_cpup(p++);
+	str->data = (char *)p;
+
+	p += XDR_QUADLEN(str->len);
+	return p;
+}
+
+/*
+ * struct pnfs_osd_targetid {
+ *	u32			oti_type;
+ *	struct nfs4_string	oti_scsi_device_id;
+ * };// size 4 + [variable]
+ */
+static __be32 *
+__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid)
+{
+	u32 oti_type;
+
+	oti_type = be32_to_cpup(p++);
+	targetid->oti_type = oti_type;
+
+	switch (oti_type) {
+	case OBJ_TARGET_SCSI_NAME:
+	case OBJ_TARGET_SCSI_DEVICE_ID:
+		p = __read_u8_opaque(p, &targetid->oti_scsi_device_id);
+	}
+
+	return p;
+}
+
+/*
+ * struct pnfs_osd_net_addr {
+ *	struct nfs4_string	r_netid;
+ *	struct nfs4_string	r_addr;
+ * };
+ */
+static __be32 *
+__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr)
+{
+	p = __read_u8_opaque(p, &netaddr->r_netid);
+	p = __read_u8_opaque(p, &netaddr->r_addr);
+
+	return p;
+}
+
+/*
+ * struct pnfs_osd_targetaddr {
+ *	u32				ota_available;
+ *	struct pnfs_osd_net_addr	ota_netaddr;
+ * };
+ */
+static __be32 *
+__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr)
+{
+	u32 ota_available;
+
+	ota_available = be32_to_cpup(p++);
+	targetaddr->ota_available = ota_available;
+
+	if (ota_available)
+		p = __read_net_addr(p, &targetaddr->ota_netaddr);
+
+
+	return p;
+}
+
+/*
+ * struct pnfs_osd_deviceaddr {
+ *	struct pnfs_osd_targetid	oda_targetid;
+ *	struct pnfs_osd_targetaddr	oda_targetaddr;
+ *	u8				oda_lun[8];
+ *	struct nfs4_string		oda_systemid;
+ *	struct pnfs_osd_object_cred	oda_root_obj_cred;
+ *	struct nfs4_string		oda_osdname;
+ * };
+ */
+
+/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does
+ * not have an xdr_stream
+ */
+static __be32 *
+__read_opaque_cred(__be32 *p,
+			      struct pnfs_osd_opaque_cred *opaque_cred)
+{
+	opaque_cred->cred_len = be32_to_cpu(*p++);
+	opaque_cred->cred = p;
+	return p + XDR_QUADLEN(opaque_cred->cred_len);
+}
+
+static __be32 *
+__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp)
+{
+	p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
+	comp->oc_osd_version = be32_to_cpup(p++);
+	comp->oc_cap_key_sec = be32_to_cpup(p++);
+
+	p = __read_opaque_cred(p, &comp->oc_cap_key);
+	p = __read_opaque_cred(p, &comp->oc_cap);
+	return p;
+}
+
+void pnfs_osd_xdr_decode_deviceaddr(
+	struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p)
+{
+	p = __read_targetid(p, &deviceaddr->oda_targetid);
+
+	p = __read_targetaddr(p, &deviceaddr->oda_targetaddr);
+
+	p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun,
+				    sizeof(deviceaddr->oda_lun));
+
+	p = __read_u8_opaque(p, &deviceaddr->oda_systemid);
+
+	p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred);
+
+	p = __read_u8_opaque(p, &deviceaddr->oda_osdname);
+
+	/* libosd likes this terminated in dbg. It's last, so no problems */
+	deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0;
+}
+
+/*
+ * struct pnfs_osd_layoutupdate {
+ *	u32	dsu_valid;
+ *	s64	dsu_delta;
+ *	u32	olu_ioerr_flag;
+ * }; xdr size 4 + 8 + 4
+ */
+int
+pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
+				 struct pnfs_osd_layoutupdate *lou)
+{
+	__be32 *p = xdr_reserve_space(xdr,  4 + 8 + 4);
+
+	if (!p)
+		return -E2BIG;
+
+	*p++ = cpu_to_be32(lou->dsu_valid);
+	if (lou->dsu_valid)
+		p = xdr_encode_hyper(p, lou->dsu_delta);
+	*p++ = cpu_to_be32(lou->olu_ioerr_flag);
+	return 0;
+}
+
+/*
+ * struct pnfs_osd_objid {
+ *	struct nfs4_deviceid	oid_device_id;
+ *	u64			oid_partition_id;
+ *	u64			oid_object_id;
+ * }; // xdr size 32 bytes
+ */
+static inline __be32 *
+pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id)
+{
+	p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
+				    sizeof(object_id->oid_device_id.data));
+	p = xdr_encode_hyper(p, object_id->oid_partition_id);
+	p = xdr_encode_hyper(p, object_id->oid_object_id);
+
+	return p;
+}
+
+/*
+ * struct pnfs_osd_ioerr {
+ *	struct pnfs_osd_objid	oer_component;
+ *	u64			oer_comp_offset;
+ *	u64			oer_comp_length;
+ *	u32			oer_iswrite;
+ *	u32			oer_errno;
+ * }; // xdr size 32 + 24 bytes
+ */
+void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr)
+{
+	p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component);
+	p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
+	p = xdr_encode_hyper(p, ioerr->oer_comp_length);
+	*p++ = cpu_to_be32(ioerr->oer_iswrite);
+	*p   = cpu_to_be32(ioerr->oer_errno);
+}
+
+__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 32 + 24);
+	if (unlikely(!p))
+		dprintk("%s: out of xdr space\n", __func__);
+
+	return p;
+}

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c80add6..7913961 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c

@@ -204,6 +204,21 @@
 			TASK_UNINTERRUPTIBLE);
 }
 
+static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+{
+	/*
+	 * FIXME: ideally we should be able to coalesce all requests
+	 * that are not block boundary aligned, but currently this
+	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
+	 * since nfs_flush_multi and nfs_pagein_multi assume you
+	 * can have only one struct nfs_page.
+	 */
+	if (desc->pg_bsize < PAGE_SIZE)
+		return 0;
+
+	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
+}
+
 /**
  * nfs_pageio_init - initialise a page io descriptor
  * @desc: pointer to descriptor
@@ -229,6 +244,8 @@
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
+	desc->pg_test = nfs_generic_pg_test;
+	pnfs_pageio_init(desc, inode);
 }
 
 /**
@@ -242,29 +259,23 @@
  *
  * Return 'true' if this is the case, else return 'false'.
  */
-static int nfs_can_coalesce_requests(struct nfs_page *prev,
-				     struct nfs_page *req,
-				     struct nfs_pageio_descriptor *pgio)
+static bool nfs_can_coalesce_requests(struct nfs_page *prev,
+				      struct nfs_page *req,
+				      struct nfs_pageio_descriptor *pgio)
 {
 	if (req->wb_context->cred != prev->wb_context->cred)
-		return 0;
+		return false;
 	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
-		return 0;
+		return false;
 	if (req->wb_context->state != prev->wb_context->state)
-		return 0;
+		return false;
 	if (req->wb_index != (prev->wb_index + 1))
-		return 0;
+		return false;
 	if (req->wb_pgbase != 0)
-		return 0;
+		return false;
 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
-		return 0;
-	/*
-	 * Non-whole file layouts need to check that req is inside of
-	 * pgio->pg_lseg.
-	 */
-	if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
-		return 0;
-	return 1;
+		return false;
+	return pgio->pg_test(pgio, prev, req);
 }
 
 /**
@@ -278,31 +289,18 @@
 static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
 				     struct nfs_page *req)
 {
-	size_t newlen = req->wb_bytes;
-
 	if (desc->pg_count != 0) {
 		struct nfs_page *prev;
 
-		/*
-		 * FIXME: ideally we should be able to coalesce all requests
-		 * that are not block boundary aligned, but currently this
-		 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
-		 * since nfs_flush_multi and nfs_pagein_multi assume you
-		 * can have only one struct nfs_page.
-		 */
-		if (desc->pg_bsize < PAGE_SIZE)
-			return 0;
-		newlen += desc->pg_count;
-		if (newlen > desc->pg_bsize)
-			return 0;
 		prev = nfs_list_entry(desc->pg_list.prev);
 		if (!nfs_can_coalesce_requests(prev, req, desc))
 			return 0;
-	} else
+	} else {
 		desc->pg_base = req->wb_pgbase;
+	}
 	nfs_list_remove_request(req);
 	nfs_list_add_request(req, &desc->pg_list);
-	desc->pg_count = newlen;
+	desc->pg_count += req->wb_bytes;
 	return 1;
 }
 

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f57f528..8c1309d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c

@@ -177,13 +177,28 @@
 	atomic_inc(&lo->plh_refcount);
 }
 
+static struct pnfs_layout_hdr *
+pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
+{
+	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
+	return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) :
+		kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
+}
+
+static void
+pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+	struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
+	return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
+}
+
 static void
 destroy_layout_hdr(struct pnfs_layout_hdr *lo)
 {
 	dprintk("%s: freeing layout cache %p\n", __func__, lo);
 	BUG_ON(!list_empty(&lo->plh_layouts));
 	NFS_I(lo->plh_inode)->layout = NULL;
-	kfree(lo);
+	pnfs_free_layout_hdr(lo);
 }
 
 static void
@@ -228,7 +243,7 @@
 {
 	struct inode *inode = lseg->pls_layout->plh_inode;
 
-	BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+	WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 	list_del_init(&lseg->pls_list);
 	if (list_empty(&lseg->pls_layout->plh_segs)) {
 		set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
@@ -261,11 +276,72 @@
 }
 EXPORT_SYMBOL_GPL(put_lseg);
 
-static bool
-should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
+static inline u64
+end_offset(u64 start, u64 len)
 {
-	return (recall_iomode == IOMODE_ANY ||
-		lseg_iomode == recall_iomode);
+	u64 end;
+
+	end = start + len;
+	return end >= start ? end : NFS4_MAX_UINT64;
+}
+
+/* last octet in a range */
+static inline u64
+last_byte_offset(u64 start, u64 len)
+{
+	u64 end;
+
+	BUG_ON(!len);
+	end = start + len;
+	return end > start ? end - 1 : NFS4_MAX_UINT64;
+}
+
+/*
+ * is l2 fully contained in l1?
+ *   start1                             end1
+ *   [----------------------------------)
+ *           start2           end2
+ *           [----------------)
+ */
+static inline int
+lo_seg_contained(struct pnfs_layout_range *l1,
+		 struct pnfs_layout_range *l2)
+{
+	u64 start1 = l1->offset;
+	u64 end1 = end_offset(start1, l1->length);
+	u64 start2 = l2->offset;
+	u64 end2 = end_offset(start2, l2->length);
+
+	return (start1 <= start2) && (end1 >= end2);
+}
+
+/*
+ * is l1 and l2 intersecting?
+ *   start1                             end1
+ *   [----------------------------------)
+ *                              start2           end2
+ *                              [----------------)
+ */
+static inline int
+lo_seg_intersecting(struct pnfs_layout_range *l1,
+		    struct pnfs_layout_range *l2)
+{
+	u64 start1 = l1->offset;
+	u64 end1 = end_offset(start1, l1->length);
+	u64 start2 = l2->offset;
+	u64 end2 = end_offset(start2, l2->length);
+
+	return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
+	       (end2 == NFS4_MAX_UINT64 || end2 > start1);
+}
+
+static bool
+should_free_lseg(struct pnfs_layout_range *lseg_range,
+		 struct pnfs_layout_range *recall_range)
+{
+	return (recall_range->iomode == IOMODE_ANY ||
+		lseg_range->iomode == recall_range->iomode) &&
+	       lo_seg_intersecting(lseg_range, recall_range);
 }
 
 /* Returns 1 if lseg is removed from list, 0 otherwise */
@@ -296,7 +372,7 @@
 int
 mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 			    struct list_head *tmp_list,
-			    u32 iomode)
+			    struct pnfs_layout_range *recall_range)
 {
 	struct pnfs_layout_segment *lseg, *next;
 	int invalid = 0, removed = 0;
@@ -309,7 +385,8 @@
 		return 0;
 	}
 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
-		if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
+		if (!recall_range ||
+		    should_free_lseg(&lseg->pls_range, recall_range)) {
 			dprintk("%s: freeing lseg %p iomode %d "
 				"offset %llu length %llu\n", __func__,
 				lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
@@ -358,7 +435,7 @@
 	lo = nfsi->layout;
 	if (lo) {
 		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
-		mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
+		mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
 	}
 	spin_unlock(&nfsi->vfs_inode.i_lock);
 	pnfs_free_lseg_list(&tmp_list);
@@ -467,7 +544,7 @@
 static struct pnfs_layout_segment *
 send_layoutget(struct pnfs_layout_hdr *lo,
 	   struct nfs_open_context *ctx,
-	   u32 iomode,
+	   struct pnfs_layout_range *range,
 	   gfp_t gfp_flags)
 {
 	struct inode *ino = lo->plh_inode;
@@ -499,11 +576,11 @@
 			goto out_err_free;
 	}
 
-	lgp->args.minlength = NFS4_MAX_UINT64;
+	lgp->args.minlength = PAGE_CACHE_SIZE;
+	if (lgp->args.minlength > range->length)
+		lgp->args.minlength = range->length;
 	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
-	lgp->args.range.iomode = iomode;
-	lgp->args.range.offset = 0;
-	lgp->args.range.length = NFS4_MAX_UINT64;
+	lgp->args.range = *range;
 	lgp->args.type = server->pnfs_curr_ld->id;
 	lgp->args.inode = ino;
 	lgp->args.ctx = get_nfs_open_context(ctx);
@@ -518,7 +595,7 @@
 	nfs4_proc_layoutget(lgp);
 	if (!lseg) {
 		/* remember that LAYOUTGET failed and suspend trying */
-		set_bit(lo_fail_bit(iomode), &lo->plh_flags);
+		set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
 	}
 
 	/* free xdr pages */
@@ -542,6 +619,51 @@
 	return NULL;
 }
 
+/* Initiates a LAYOUTRETURN(FILE) */
+int
+_pnfs_return_layout(struct inode *ino)
+{
+	struct pnfs_layout_hdr *lo = NULL;
+	struct nfs_inode *nfsi = NFS_I(ino);
+	LIST_HEAD(tmp_list);
+	struct nfs4_layoutreturn *lrp;
+	nfs4_stateid stateid;
+	int status = 0;
+
+	dprintk("--> %s\n", __func__);
+
+	spin_lock(&ino->i_lock);
+	lo = nfsi->layout;
+	if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) {
+		spin_unlock(&ino->i_lock);
+		dprintk("%s: no layout segments to return\n", __func__);
+		goto out;
+	}
+	stateid = nfsi->layout->plh_stateid;
+	/* Reference matched in nfs4_layoutreturn_release */
+	get_layout_hdr(lo);
+	spin_unlock(&ino->i_lock);
+	pnfs_free_lseg_list(&tmp_list);
+
+	WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
+
+	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
+	if (unlikely(lrp == NULL)) {
+		status = -ENOMEM;
+		goto out;
+	}
+
+	lrp->args.stateid = stateid;
+	lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
+	lrp->args.inode = ino;
+	lrp->clp = NFS_SERVER(ino)->nfs_client;
+
+	status = nfs4_proc_layoutreturn(lrp);
+out:
+	dprintk("<-- %s status: %d\n", __func__, status);
+	return status;
+}
+
 bool pnfs_roc(struct inode *ino)
 {
 	struct pnfs_layout_hdr *lo;
@@ -625,10 +747,23 @@
  * are seen first.
  */
 static s64
-cmp_layout(u32 iomode1, u32 iomode2)
+cmp_layout(struct pnfs_layout_range *l1,
+	   struct pnfs_layout_range *l2)
 {
+	s64 d;
+
+	/* high offset > low offset */
+	d = l1->offset - l2->offset;
+	if (d)
+		return d;
+
+	/* short length > long length */
+	d = l2->length - l1->length;
+	if (d)
+		return d;
+
 	/* read > read/write */
-	return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
+	return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
 }
 
 static void
@@ -636,13 +771,12 @@
 		   struct pnfs_layout_segment *lseg)
 {
 	struct pnfs_layout_segment *lp;
-	int found = 0;
 
 	dprintk("%s:Begin\n", __func__);
 
 	assert_spin_locked(&lo->plh_inode->i_lock);
 	list_for_each_entry(lp, &lo->plh_segs, pls_list) {
-		if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
+		if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
 			continue;
 		list_add_tail(&lseg->pls_list, &lp->pls_list);
 		dprintk("%s: inserted lseg %p "
@@ -652,16 +786,14 @@
 			lseg->pls_range.offset, lseg->pls_range.length,
 			lp, lp->pls_range.iomode, lp->pls_range.offset,
 			lp->pls_range.length);
-		found = 1;
-		break;
+		goto out;
 	}
-	if (!found) {
-		list_add_tail(&lseg->pls_list, &lo->plh_segs);
-		dprintk("%s: inserted lseg %p "
-			"iomode %d offset %llu length %llu at tail\n",
-			__func__, lseg, lseg->pls_range.iomode,
-			lseg->pls_range.offset, lseg->pls_range.length);
-	}
+	list_add_tail(&lseg->pls_list, &lo->plh_segs);
+	dprintk("%s: inserted lseg %p "
+		"iomode %d offset %llu length %llu at tail\n",
+		__func__, lseg, lseg->pls_range.iomode,
+		lseg->pls_range.offset, lseg->pls_range.length);
+out:
 	get_layout_hdr(lo);
 
 	dprintk("%s:Return\n", __func__);
@@ -672,7 +804,7 @@
 {
 	struct pnfs_layout_hdr *lo;
 
-	lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
+	lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
 	if (!lo)
 		return NULL;
 	atomic_set(&lo->plh_refcount, 1);
@@ -705,7 +837,7 @@
 	if (likely(nfsi->layout == NULL))	/* Won the race? */
 		nfsi->layout = new;
 	else
-		kfree(new);
+		pnfs_free_layout_hdr(new);
 	return nfsi->layout;
 }
 
@@ -721,16 +853,28 @@
  * READ		RW	true
  */
 static int
-is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
+is_matching_lseg(struct pnfs_layout_range *ls_range,
+		 struct pnfs_layout_range *range)
 {
-	return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
+	struct pnfs_layout_range range1;
+
+	if ((range->iomode == IOMODE_RW &&
+	     ls_range->iomode != IOMODE_RW) ||
+	    !lo_seg_intersecting(ls_range, range))
+		return 0;
+
+	/* range1 covers only the first byte in the range */
+	range1 = *range;
+	range1.length = 1;
+	return lo_seg_contained(ls_range, &range1);
 }
 
 /*
  * lookup range in layout
  */
 static struct pnfs_layout_segment *
-pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
+pnfs_find_lseg(struct pnfs_layout_hdr *lo,
+		struct pnfs_layout_range *range)
 {
 	struct pnfs_layout_segment *lseg, *ret = NULL;
 
@@ -739,11 +883,11 @@
 	assert_spin_locked(&lo->plh_inode->i_lock);
 	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
 		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
-		    is_matching_lseg(lseg, iomode)) {
+		    is_matching_lseg(&lseg->pls_range, range)) {
 			ret = get_lseg(lseg);
 			break;
 		}
-		if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
+		if (cmp_layout(range, &lseg->pls_range) > 0)
 			break;
 	}
 
@@ -759,9 +903,17 @@
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino,
 		   struct nfs_open_context *ctx,
+		   loff_t pos,
+		   u64 count,
 		   enum pnfs_iomode iomode,
 		   gfp_t gfp_flags)
 {
+	struct pnfs_layout_range arg = {
+		.iomode = iomode,
+		.offset = pos,
+		.length = count,
+	};
+	unsigned pg_offset;
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
 	struct pnfs_layout_hdr *lo;
@@ -789,7 +941,7 @@
 		goto out_unlock;
 
 	/* Check to see if the layout for the given range already exists */
-	lseg = pnfs_find_lseg(lo, iomode);
+	lseg = pnfs_find_lseg(lo, &arg);
 	if (lseg)
 		goto out_unlock;
 
@@ -811,7 +963,14 @@
 		spin_unlock(&clp->cl_lock);
 	}
 
-	lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
+	pg_offset = arg.offset & ~PAGE_CACHE_MASK;
+	if (pg_offset) {
+		arg.offset -= pg_offset;
+		arg.length += pg_offset;
+	}
+	arg.length = PAGE_CACHE_ALIGN(arg.length);
+
+	lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
 	if (!lseg && first) {
 		spin_lock(&clp->cl_lock);
 		list_del_init(&lo->plh_layouts);
@@ -838,17 +997,6 @@
 	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
 	int status = 0;
 
-	/* Verify we got what we asked for.
-	 * Note that because the xdr parsing only accepts a single
-	 * element array, this can fail even if the server is behaving
-	 * correctly.
-	 */
-	if (lgp->args.range.iomode > res->range.iomode ||
-	    res->range.offset != 0 ||
-	    res->range.length != NFS4_MAX_UINT64) {
-		status = -EINVAL;
-		goto out;
-	}
 	/* Inject layout blob into I/O device driver */
 	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
 	if (!lseg || IS_ERR(lseg)) {
@@ -895,51 +1043,64 @@
 	goto out;
 }
 
-static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
-			     struct nfs_page *prev,
-			     struct nfs_page *req)
+bool
+pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+		     struct nfs_page *req)
 {
+	enum pnfs_iomode access_type;
+	gfp_t gfp_flags;
+
+	/* We assume that pg_ioflags == 0 iff we're reading a page */
+	if (pgio->pg_ioflags == 0) {
+		access_type = IOMODE_READ;
+		gfp_flags = GFP_KERNEL;
+	} else {
+		access_type = IOMODE_RW;
+		gfp_flags = GFP_NOFS;
+	}
+
 	if (pgio->pg_count == prev->wb_bytes) {
 		/* This is first coelesce call for a series of nfs_pages */
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   prev->wb_context,
-						   IOMODE_READ,
-						   GFP_KERNEL);
+						   req_offset(req),
+						   pgio->pg_count,
+						   access_type,
+						   gfp_flags);
+		return true;
 	}
-	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
+
+	if (pgio->pg_lseg &&
+	    req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
+					 pgio->pg_lseg->pls_range.length))
+		return false;
+
+	return true;
 }
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
-void
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+/*
+ * Called by non rpc-based layout drivers
+ */
+int
+pnfs_ld_write_done(struct nfs_write_data *data)
 {
-	struct pnfs_layoutdriver_type *ld;
+	int status;
 
-	ld = NFS_SERVER(inode)->pnfs_curr_ld;
-	pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
-}
-
-static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
-			      struct nfs_page *prev,
-			      struct nfs_page *req)
-{
-	if (pgio->pg_count == prev->wb_bytes) {
-		/* This is first coelesce call for a series of nfs_pages */
-		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-						   prev->wb_context,
-						   IOMODE_RW,
-						   GFP_NOFS);
+	if (!data->pnfs_error) {
+		pnfs_set_layoutcommit(data);
+		data->mds_ops->rpc_call_done(&data->task, data);
+		data->mds_ops->rpc_release(data);
+		return 0;
 	}
-	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
-}
 
-void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
-{
-	struct pnfs_layoutdriver_type *ld;
-
-	ld = NFS_SERVER(inode)->pnfs_curr_ld;
-	pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
+	dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
+		data->pnfs_error);
+	status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
+				    data->mds_ops, NFS_FILE_SYNC);
+	return status ? : -EAGAIN;
 }
+EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
 enum pnfs_try_status
 pnfs_try_to_write_data(struct nfs_write_data *wdata,
@@ -966,6 +1127,29 @@
 }
 
 /*
+ * Called by non rpc-based layout drivers
+ */
+int
+pnfs_ld_read_done(struct nfs_read_data *data)
+{
+	int status;
+
+	if (!data->pnfs_error) {
+		__nfs4_read_done_cb(data);
+		data->mds_ops->rpc_call_done(&data->task, data);
+		data->mds_ops->rpc_release(data);
+		return 0;
+	}
+
+	dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
+		data->pnfs_error);
+	status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
+				   data->mds_ops);
+	return status ? : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
+
+/*
  * Call the appropriate parallel I/O subsystem read function.
  */
 enum pnfs_try_status
@@ -1009,7 +1193,7 @@
 pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 {
 	struct nfs_inode *nfsi = NFS_I(wdata->inode);
-	loff_t end_pos = wdata->args.offset + wdata->res.count;
+	loff_t end_pos = wdata->mds_offset + wdata->res.count;
 	bool mark_as_dirty = false;
 
 	spin_lock(&nfsi->vfs_inode.i_lock);

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 0c015ba..48d0a8e 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h

@@ -30,6 +30,7 @@
 #ifndef FS_NFS_PNFS_H
 #define FS_NFS_PNFS_H
 
+#include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
 
 enum {
@@ -64,17 +65,29 @@
 	NFS_LAYOUT_DESTROYED,		/* no new use of layout allowed */
 };
 
+enum layoutdriver_policy_flags {
+	/* Should the pNFS client commit and return the layout upon a setattr */
+	PNFS_LAYOUTRET_ON_SETATTR	= 1 << 0,
+};
+
+struct nfs4_deviceid_node;
+
 /* Per-layout driver specific registration structure */
 struct pnfs_layoutdriver_type {
 	struct list_head pnfs_tblid;
 	const u32 id;
 	const char *name;
 	struct module *owner;
+	unsigned flags;
+
+	struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags);
+	void (*free_layout_hdr) (struct pnfs_layout_hdr *);
+
 	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
 	void (*free_lseg) (struct pnfs_layout_segment *lseg);
 
 	/* test for nfs page cache coalescing */
-	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+	bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
 
 	/* Returns true if layoutdriver wants to divert this request to
 	 * driver's commit routine.
@@ -89,6 +102,16 @@
 	 */
 	enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
 	enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
+
+	void (*free_deviceid_node) (struct nfs4_deviceid_node *);
+
+	void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid,
+				     struct xdr_stream *xdr,
+				     const struct nfs4_layoutreturn_args *args);
+
+	void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
+				     struct xdr_stream *xdr,
+				     const struct nfs4_layoutcommit_args *args);
 };
 
 struct pnfs_layout_hdr {
@@ -120,21 +143,22 @@
 extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
 				   struct pnfs_device *dev);
 extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
+extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
 
 /* pnfs.c */
 void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   enum pnfs_iomode access_type, gfp_t gfp_flags);
+		   loff_t pos, u64 count, enum pnfs_iomode access_type,
+		   gfp_t gfp_flags);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
 					     const struct rpc_call_ops *, int);
 enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
 					    const struct rpc_call_ops *);
-void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
-void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
+bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
@@ -148,13 +172,37 @@
 				  struct nfs4_state *open_state);
 int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 				struct list_head *tmp_list,
-				u32 iomode);
+				struct pnfs_layout_range *recall_range);
 bool pnfs_roc(struct inode *ino);
 void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
 void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
+int _pnfs_return_layout(struct inode *);
+int pnfs_ld_write_done(struct nfs_write_data *);
+int pnfs_ld_read_done(struct nfs_read_data *);
+
+/* pnfs_dev.c */
+struct nfs4_deviceid_node {
+	struct hlist_node		node;
+	const struct pnfs_layoutdriver_type *ld;
+	const struct nfs_client		*nfs_client;
+	struct nfs4_deviceid		deviceid;
+	atomic_t			ref;
+};
+
+void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
+struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
+struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
+void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
+void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
+			     const struct pnfs_layoutdriver_type *,
+			     const struct nfs_client *,
+			     const struct nfs4_deviceid *);
+struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
+bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
+void nfs4_deviceid_purge_client(const struct nfs_client *);
 
 static inline int lo_fail_bit(u32 iomode)
 {
@@ -223,6 +271,36 @@
 		put_lseg(req->wb_commit_lseg);
 }
 
+/* Should the pNFS client commit and return the layout upon a setattr */
+static inline bool
+pnfs_ld_layoutret_on_setattr(struct inode *inode)
+{
+	if (!pnfs_enabled_sb(NFS_SERVER(inode)))
+		return false;
+	return NFS_SERVER(inode)->pnfs_curr_ld->flags &
+		PNFS_LAYOUTRET_ON_SETATTR;
+}
+
+static inline int pnfs_return_layout(struct inode *ino)
+{
+	struct nfs_inode *nfsi = NFS_I(ino);
+	struct nfs_server *nfss = NFS_SERVER(ino);
+
+	if (pnfs_enabled_sb(nfss) && nfsi->layout)
+		return _pnfs_return_layout(ino);
+
+	return 0;
+}
+
+static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
+				    struct inode *inode)
+{
+	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+	if (ld)
+		pgio->pg_test = ld->pg_test;
+}
+
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -245,7 +323,8 @@
 
 static inline struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   enum pnfs_iomode access_type, gfp_t gfp_flags)
+		   loff_t pos, u64 count, enum pnfs_iomode access_type,
+		   gfp_t gfp_flags)
 {
 	return NULL;
 }
@@ -264,6 +343,17 @@
 	return PNFS_NOT_ATTEMPTED;
 }
 
+static inline int pnfs_return_layout(struct inode *ino)
+{
+	return 0;
+}
+
+static inline bool
+pnfs_ld_layoutret_on_setattr(struct inode *inode)
+{
+	return false;
+}
+
 static inline bool
 pnfs_roc(struct inode *ino)
 {
@@ -294,16 +384,9 @@
 {
 }
 
-static inline void
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
+static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
+				    struct inode *inode)
 {
-	pgio->pg_test = NULL;
-}
-
-static inline void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
-{
-	pgio->pg_test = NULL;
 }
 
 static inline void
@@ -331,6 +414,10 @@
 {
 	return 0;
 }
+
+static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */

diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
new file mode 100644
index 0000000..c65e133
--- /dev/null
+++ b/fs/nfs/pnfs_dev.c

@@ -0,0 +1,270 @@
+/*
+ *  Device operations for the pnfs client.
+ *
+ *  Copyright (c) 2002
+ *  The Regents of the University of Michigan
+ *  All Rights Reserved
+ *
+ *  Dean Hildebrand <dhildebz@umich.edu>
+ *  Garth Goodson   <Garth.Goodson@netapp.com>
+ *
+ *  Permission is granted to use, copy, create derivative works, and
+ *  redistribute this software and such derivative works for any purpose,
+ *  so long as the name of the University of Michigan is not used in
+ *  any advertising or publicity pertaining to the use or distribution
+ *  of this software without specific, written prior authorization. If
+ *  the above copyright notice or any other identification of the
+ *  University of Michigan is included in any copy of any portion of
+ *  this software, then the disclaimer below must also be included.
+ *
+ *  This software is provided as is, without representation or warranty
+ *  of any kind either express or implied, including without limitation
+ *  the implied warranties of merchantability, fitness for a particular
+ *  purpose, or noninfringement.  The Regents of the University of
+ *  Michigan shall not be liable for any damages, including special,
+ *  indirect, incidental, or consequential damages, with respect to any
+ *  claim arising out of or in connection with the use of the software,
+ *  even if it has been or is hereafter advised of the possibility of
+ *  such damages.
+ */
+
+#include "pnfs.h"
+
+#define NFSDBG_FACILITY		NFSDBG_PNFS
+
+/*
+ * Device ID RCU cache. A device ID is unique per server and layout type.
+ */
+#define NFS4_DEVICE_ID_HASH_BITS	5
+#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
+#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
+
+static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
+static DEFINE_SPINLOCK(nfs4_deviceid_lock);
+
+void
+nfs4_print_deviceid(const struct nfs4_deviceid *id)
+{
+	u32 *p = (u32 *)id;
+
+	dprintk("%s: device id= [%x%x%x%x]\n", __func__,
+		p[0], p[1], p[2], p[3]);
+}
+EXPORT_SYMBOL_GPL(nfs4_print_deviceid);
+
+static inline u32
+nfs4_deviceid_hash(const struct nfs4_deviceid *id)
+{
+	unsigned char *cptr = (unsigned char *)id->data;
+	unsigned int nbytes = NFS4_DEVICEID4_SIZE;
+	u32 x = 0;
+
+	while (nbytes--) {
+		x *= 37;
+		x += *cptr++;
+	}
+	return x & NFS4_DEVICE_ID_HASH_MASK;
+}
+
+static struct nfs4_deviceid_node *
+_lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
+		 const struct nfs_client *clp, const struct nfs4_deviceid *id,
+		 long hash)
+{
+	struct nfs4_deviceid_node *d;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+		if (d->ld == ld && d->nfs_client == clp &&
+		    !memcmp(&d->deviceid, id, sizeof(*id))) {
+			if (atomic_read(&d->ref))
+				return d;
+			else
+				continue;
+		}
+	return NULL;
+}
+
+/*
+ * Lookup a deviceid in cache and get a reference count on it if found
+ *
+ * @clp nfs_client associated with deviceid
+ * @id deviceid to look up
+ */
+struct nfs4_deviceid_node *
+_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
+		   const struct nfs_client *clp, const struct nfs4_deviceid *id,
+		   long hash)
+{
+	struct nfs4_deviceid_node *d;
+
+	rcu_read_lock();
+	d = _lookup_deviceid(ld, clp, id, hash);
+	if (d && !atomic_inc_not_zero(&d->ref))
+		d = NULL;
+	rcu_read_unlock();
+	return d;
+}
+
+struct nfs4_deviceid_node *
+nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
+		       const struct nfs_client *clp, const struct nfs4_deviceid *id)
+{
+	return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id));
+}
+EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
+
+/*
+ * Unhash and put deviceid
+ *
+ * @clp nfs_client associated with deviceid
+ * @id the deviceid to unhash
+ *
+ * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
+ */
+struct nfs4_deviceid_node *
+nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
+			 const struct nfs_client *clp, const struct nfs4_deviceid *id)
+{
+	struct nfs4_deviceid_node *d;
+
+	spin_lock(&nfs4_deviceid_lock);
+	rcu_read_lock();
+	d = _lookup_deviceid(ld, clp, id, nfs4_deviceid_hash(id));
+	rcu_read_unlock();
+	if (!d) {
+		spin_unlock(&nfs4_deviceid_lock);
+		return NULL;
+	}
+	hlist_del_init_rcu(&d->node);
+	spin_unlock(&nfs4_deviceid_lock);
+	synchronize_rcu();
+
+	/* balance the initial ref set in pnfs_insert_deviceid */
+	if (atomic_dec_and_test(&d->ref))
+		return d;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid);
+
+/*
+ * Delete a deviceid from cache
+ *
+ * @clp struct nfs_client qualifying the deviceid
+ * @id deviceid to delete
+ */
+void
+nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
+		     const struct nfs_client *clp, const struct nfs4_deviceid *id)
+{
+	struct nfs4_deviceid_node *d;
+
+	d = nfs4_unhash_put_deviceid(ld, clp, id);
+	if (!d)
+		return;
+	d->ld->free_deviceid_node(d);
+}
+EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
+
+void
+nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
+			const struct pnfs_layoutdriver_type *ld,
+			const struct nfs_client *nfs_client,
+			const struct nfs4_deviceid *id)
+{
+	INIT_HLIST_NODE(&d->node);
+	d->ld = ld;
+	d->nfs_client = nfs_client;
+	d->deviceid = *id;
+	atomic_set(&d->ref, 1);
+}
+EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node);
+
+/*
+ * Uniquely initialize and insert a deviceid node into cache
+ *
+ * @new new deviceid node
+ *      Note that the caller must set up the following members:
+ *        new->ld
+ *        new->nfs_client
+ *        new->deviceid
+ *
+ * @ret the inserted node, if none found, otherwise, the found entry.
+ */
+struct nfs4_deviceid_node *
+nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new)
+{
+	struct nfs4_deviceid_node *d;
+	long hash;
+
+	spin_lock(&nfs4_deviceid_lock);
+	hash = nfs4_deviceid_hash(&new->deviceid);
+	d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash);
+	if (d) {
+		spin_unlock(&nfs4_deviceid_lock);
+		return d;
+	}
+
+	hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
+	spin_unlock(&nfs4_deviceid_lock);
+
+	return new;
+}
+EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node);
+
+/*
+ * Dereference a deviceid node and delete it when its reference count drops
+ * to zero.
+ *
+ * @d deviceid node to put
+ *
+ * @ret true iff the node was deleted
+ */
+bool
+nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
+{
+	if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock))
+		return false;
+	hlist_del_init_rcu(&d->node);
+	spin_unlock(&nfs4_deviceid_lock);
+	synchronize_rcu();
+	d->ld->free_deviceid_node(d);
+	return true;
+}
+EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node);
+
+static void
+_deviceid_purge_client(const struct nfs_client *clp, long hash)
+{
+	struct nfs4_deviceid_node *d;
+	struct hlist_node *n, *next;
+	HLIST_HEAD(tmp);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+		if (d->nfs_client == clp && atomic_read(&d->ref)) {
+			hlist_del_init_rcu(&d->node);
+			hlist_add_head(&d->node, &tmp);
+		}
+	rcu_read_unlock();
+
+	if (hlist_empty(&tmp))
+		return;
+
+	synchronize_rcu();
+	hlist_for_each_entry_safe(d, n, next, &tmp, node)
+		if (atomic_dec_and_test(&d->ref))
+			d->ld->free_deviceid_node(d);
+}
+
+void
+nfs4_deviceid_purge_client(const struct nfs_client *clp)
+{
+	long h;
+
+	spin_lock(&nfs4_deviceid_lock);
+	for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
+		_deviceid_purge_client(clp, h);
+	spin_unlock(&nfs4_deviceid_lock);
+}

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2bcf0dc..20a7f95 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c

@@ -288,7 +288,9 @@
 	atomic_set(&req->wb_complete, requests);
 
 	BUG_ON(desc->pg_lseg != NULL);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
+	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
+				  req_offset(req), desc->pg_count,
+				  IOMODE_READ, GFP_KERNEL);
 	ClearPageError(page);
 	offset = 0;
 	nbytes = desc->pg_count;
@@ -351,7 +353,9 @@
 	}
 	req = nfs_list_entry(data->pages.next);
 	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
+		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
+					  req_offset(req), desc->pg_count,
+					  IOMODE_READ, GFP_KERNEL);
 
 	ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
 				0, lseg);
@@ -660,7 +664,6 @@
 	if (ret == 0)
 		goto read_complete; /* all pages were read */
 
-	pnfs_pageio_init_read(&pgio, inode);
 	if (rsize < PAGE_CACHE_SIZE)
 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
 	else

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e288f06..ce40e5c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c

@@ -63,6 +63,7 @@
 #include "iostat.h"
 #include "internal.h"
 #include "fscache.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
@@ -732,6 +733,28 @@
 
 	return 0;
 }
+#ifdef CONFIG_NFS_V4_1
+void show_sessions(struct seq_file *m, struct nfs_server *server)
+{
+	if (nfs4_has_session(server->nfs_client))
+		seq_printf(m, ",sessions");
+}
+#else
+void show_sessions(struct seq_file *m, struct nfs_server *server) {}
+#endif
+
+#ifdef CONFIG_NFS_V4_1
+void show_pnfs(struct seq_file *m, struct nfs_server *server)
+{
+	seq_printf(m, ",pnfs=");
+	if (server->pnfs_curr_ld)
+		seq_printf(m, "%s", server->pnfs_curr_ld->name);
+	else
+		seq_printf(m, "not configured");
+}
+#else  /* CONFIG_NFS_V4_1 */
+void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
+#endif /* CONFIG_NFS_V4_1 */
 
 static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
 {
@@ -792,6 +815,8 @@
 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
 		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+		show_sessions(m, nfss);
+		show_pnfs(m, nfss);
 	}
 #endif
 

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 49c715b..e268e3b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c

@@ -939,7 +939,9 @@
 	atomic_set(&req->wb_complete, requests);
 
 	BUG_ON(desc->pg_lseg);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
+	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
+				  req_offset(req), desc->pg_count,
+				  IOMODE_RW, GFP_NOFS);
 	ClearPageError(page);
 	offset = 0;
 	nbytes = desc->pg_count;
@@ -1013,7 +1015,9 @@
 	}
 	req = nfs_list_entry(data->pages.next);
 	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
+		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
+					  req_offset(req), desc->pg_count,
+					  IOMODE_RW, GFP_NOFS);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
@@ -1032,8 +1036,6 @@
 {
 	size_t wsize = NFS_SERVER(inode)->wsize;
 
-	pnfs_pageio_init_write(pgio, inode);
-
 	if (wsize < PAGE_CACHE_SIZE)
 		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
 	else

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ad000ae..b9566e4 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c

@@ -1354,12 +1354,6 @@
 	if (IS_ERR(exp))
 		return nfserrno(PTR_ERR(exp));
 	rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
-	if (rv)
-		goto out;
-	rv = check_nfsd_access(exp, rqstp);
-	if (rv)
-		fh_put(fhp);
-out:
 	exp_put(exp);
 	return rv;
 }

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 2247fc9..9095f3c 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c

@@ -245,7 +245,7 @@
 	}
 
 	/* Now create the file and set attributes */
-	nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len,
+	nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
 				attr, newfhp,
 				argp->createmode, argp->verf, NULL, NULL);
 

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index ad48fac..08c6e36 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c

@@ -842,7 +842,7 @@
 	return rv;
 }
 
-__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
+static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
 {
 	struct svc_fh	fh;
 	int err;

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5fcb139..3a6dbd7 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c

@@ -196,9 +196,9 @@
 
 		/*
 		 * Note: create modes (UNCHECKED,GUARDED...) are the same
-		 * in NFSv4 as in v3.
+		 * in NFSv4 as in v3 except EXCLUSIVE4_1.
 		 */
-		status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data,
+		status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
 					open->op_fname.len, &open->op_iattr,
 					&resfh, open->op_createmode,
 					(u32 *)open->op_verf.data,
@@ -403,7 +403,7 @@
 	cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
 	memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
 	       putfh->pf_fhlen);
-	return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
+	return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
 }
 
 static __be32
@@ -762,6 +762,9 @@
 	__be32 err;
 
 	fh_init(&resfh, NFS4_FHSIZE);
+	err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC);
+	if (err)
+		return err;
 	err = nfsd_lookup_dentry(rqstp, &cstate->current_fh,
 				    secinfo->si_name, secinfo->si_namelen,
 				    &exp, &dentry);
@@ -986,6 +989,9 @@
 	ALLOWED_WITHOUT_FH = 1 << 0,	/* No current filehandle required */
 	ALLOWED_ON_ABSENT_FS = 1 << 1,	/* ops processed on absent fs */
 	ALLOWED_AS_FIRST_OP = 1 << 2,	/* ops reqired first in compound */
+	/* For rfc 5661 section 2.6.3.1.1: */
+	OP_HANDLES_WRONGSEC = 1 << 3,
+	OP_IS_PUTFH_LIKE = 1 << 4,
 };
 
 struct nfsd4_operation {
@@ -1031,6 +1037,44 @@
 	return nfs_ok;
 }
 
+static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
+{
+	return &nfsd4_ops[op->opnum];
+}
+
+static bool need_wrongsec_check(struct svc_rqst *rqstp)
+{
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
+	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+	struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
+	struct nfsd4_op *next = &argp->ops[resp->opcnt];
+	struct nfsd4_operation *thisd;
+	struct nfsd4_operation *nextd;
+
+	thisd = OPDESC(this);
+	/*
+	 * Most ops check wronsec on our own; only the putfh-like ops
+	 * have special rules.
+	 */
+	if (!(thisd->op_flags & OP_IS_PUTFH_LIKE))
+		return false;
+	/*
+	 * rfc 5661 2.6.3.1.1.6: don't bother erroring out a
+	 * put-filehandle operation if we're not going to use the
+	 * result:
+	 */
+	if (argp->opcnt == resp->opcnt)
+		return false;
+
+	nextd = OPDESC(next);
+	/*
+	 * Rest of 2.6.3.1.1: certain operations will return WRONGSEC
+	 * errors themselves as necessary; others should check for them
+	 * now:
+	 */
+	return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
+}
+
 /*
  * COMPOUND call.
  */
@@ -1108,7 +1152,7 @@
 			goto encode_op;
 		}
 
-		opdesc = &nfsd4_ops[op->opnum];
+		opdesc = OPDESC(op);
 
 		if (!cstate->current_fh.fh_dentry) {
 			if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
@@ -1126,6 +1170,9 @@
 		else
 			BUG_ON(op->status == nfs_ok);
 
+		if (!op->status && need_wrongsec_check(rqstp))
+			op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
+
 encode_op:
 		/* Only from SEQUENCE */
 		if (resp->cstate.status == nfserr_replay_cache) {
@@ -1217,10 +1264,12 @@
 	},
 	[OP_LOOKUP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookup,
+		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_LOOKUP",
 	},
 	[OP_LOOKUPP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookupp,
+		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_LOOKUPP",
 	},
 	[OP_NVERIFY] = {
@@ -1229,6 +1278,7 @@
 	},
 	[OP_OPEN] = {
 		.op_func = (nfsd4op_func)nfsd4_open,
+		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_OPEN",
 	},
 	[OP_OPEN_CONFIRM] = {
@@ -1241,17 +1291,20 @@
 	},
 	[OP_PUTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putfh,
-		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+				| OP_IS_PUTFH_LIKE,
 		.op_name = "OP_PUTFH",
 	},
 	[OP_PUTPUBFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
-		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+				| OP_IS_PUTFH_LIKE,
 		.op_name = "OP_PUTPUBFH",
 	},
 	[OP_PUTROOTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
-		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+				| OP_IS_PUTFH_LIKE,
 		.op_name = "OP_PUTROOTFH",
 	},
 	[OP_READ] = {
@@ -1281,15 +1334,18 @@
 	},
 	[OP_RESTOREFH] = {
 		.op_func = (nfsd4op_func)nfsd4_restorefh,
-		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+				| OP_IS_PUTFH_LIKE,
 		.op_name = "OP_RESTOREFH",
 	},
 	[OP_SAVEFH] = {
 		.op_func = (nfsd4op_func)nfsd4_savefh,
+		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SAVEFH",
 	},
 	[OP_SECINFO] = {
 		.op_func = (nfsd4op_func)nfsd4_secinfo,
+		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO",
 	},
 	[OP_SETATTR] = {
@@ -1353,6 +1409,7 @@
 	},
 	[OP_SECINFO_NO_NAME] = {
 		.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
+		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO_NO_NAME",
 	},
 };

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4cf04e1..e98f3c2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c

@@ -1519,6 +1519,9 @@
 	bool confirm_me = false;
 	int status = 0;
 
+	if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
+		return nfserr_inval;
+
 	nfs4_lock_state();
 	unconf = find_unconfirmed_client(&cr_ses->clientid);
 	conf = find_confirmed_client(&cr_ses->clientid);
@@ -1637,8 +1640,9 @@
 		return nfserr_badsession;
 
 	status = nfsd4_map_bcts_dir(&bcts->dir);
-	nfsd4_new_conn(rqstp, cstate->session, bcts->dir);
-	return nfs_ok;
+	if (!status)
+		nfsd4_new_conn(rqstp, cstate->session, bcts->dir);
+	return status;
 }
 
 static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
@@ -1725,6 +1729,13 @@
 	return;
 }
 
+static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session)
+{
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
+	return args->opcnt > session->se_fchannel.maxops;
+}
+
 __be32
 nfsd4_sequence(struct svc_rqst *rqstp,
 	       struct nfsd4_compound_state *cstate,
@@ -1753,6 +1764,10 @@
 	if (!session)
 		goto out;
 
+	status = nfserr_too_many_ops;
+	if (nfsd4_session_too_many_ops(rqstp, session))
+		goto out;
+
 	status = nfserr_badslot;
 	if (seq->slotid >= session->se_fchannel.maxreqs)
 		goto out;
@@ -1808,6 +1823,8 @@
 __be32
 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
 {
+	int status = 0;
+
 	if (rc->rca_one_fs) {
 		if (!cstate->current_fh.fh_dentry)
 			return nfserr_nofilehandle;
@@ -1817,9 +1834,14 @@
 		 */
 		 return nfs_ok;
 	}
+
 	nfs4_lock_state();
-	if (is_client_expired(cstate->session->se_client)) {
-		nfs4_unlock_state();
+	status = nfserr_complete_already;
+	if (cstate->session->se_client->cl_firststate)
+		goto out;
+
+	status = nfserr_stale_clientid;
+	if (is_client_expired(cstate->session->se_client))
 		/*
 		 * The following error isn't really legal.
 		 * But we only get here if the client just explicitly
@@ -1827,11 +1849,13 @@
 		 * error it gets back on an operation for the dead
 		 * client.
 		 */
-		return nfserr_stale_clientid;
-	}
+		goto out;
+
+	status = nfs_ok;
 	nfsd4_create_clid_dir(cstate->session->se_client);
+out:
 	nfs4_unlock_state();
-	return nfs_ok;
+	return status;
 }
 
 __be32
@@ -2462,7 +2486,7 @@
 	return NULL;
 }
 
-int share_access_to_flags(u32 share_access)
+static int share_access_to_flags(u32 share_access)
 {
 	share_access &= ~NFS4_SHARE_WANT_MASK;
 
@@ -2882,7 +2906,7 @@
 	return status;
 }
 
-struct lock_manager nfsd4_manager = {
+static struct lock_manager nfsd4_manager = {
 };
 
 static void

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c6766af..9901811 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c

@@ -424,15 +424,12 @@
 static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
 {
 	DECODE_HEAD;
-	u32 dummy;
 
 	READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
 	COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
 	READ32(bcts->dir);
-	/* XXX: Perhaps Tom Tucker could help us figure out how we
-	 * should be using ctsa_use_conn_in_rdma_mode: */
-	READ32(dummy);
-
+	/* XXX: skipping ctsa_use_conn_in_rdma_mode.  Perhaps Tom Tucker
+	 * could help us figure out we should be using it. */
 	DECODE_TAIL;
 }
 
@@ -588,8 +585,6 @@
 	READ_BUF(lockt->lt_owner.len);
 	READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
 
-	if (argp->minorversion && !zero_clientid(&lockt->lt_clientid))
-		return nfserr_inval;
 	DECODE_TAIL;
 }
 
@@ -3120,7 +3115,7 @@
 	return nfserr;
 }
 
-__be32
+static __be32
 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
 		      struct nfsd4_sequence *seq)
 {

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 55c8e63..90c6aa6 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c

@@ -344,7 +344,7 @@
 	 * which clients virtually always use auth_sys for,
 	 * even while using RPCSEC_GSS for NFS.
 	 */
-	if (access & NFSD_MAY_LOCK)
+	if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
 		goto skip_pseudoflavor_check;
 	/*
 	 * Clients may expect to be able to use auth_sys during mount,

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 129f3c9..d571827 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c

@@ -181,16 +181,10 @@
 	struct svc_export	*exp;
 	struct dentry		*dparent;
 	struct dentry		*dentry;
-	__be32			err;
 	int			host_err;
 
 	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
 
-	/* Obtain dentry and export. */
-	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
-	if (err)
-		return err;
-
 	dparent = fhp->fh_dentry;
 	exp  = fhp->fh_export;
 	exp_get(exp);
@@ -254,6 +248,9 @@
 	struct dentry		*dentry;
 	__be32 err;
 
+	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+	if (err)
+		return err;
 	err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
 	if (err)
 		return err;
@@ -877,13 +874,11 @@
 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
               loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
 {
-	struct inode *inode;
 	mm_segment_t	oldfs;
 	__be32		err;
 	int		host_err;
 
 	err = nfserr_perm;
-	inode = file->f_path.dentry->d_inode;
 
 	if (file->f_op->splice_read && rqstp->rq_splice_ok) {
 		struct splice_desc sd = {
@@ -1340,11 +1335,18 @@
 }
 
 #ifdef CONFIG_NFSD_V3
+
+static inline int nfsd_create_is_exclusive(int createmode)
+{
+	return createmode == NFS3_CREATE_EXCLUSIVE
+	       || createmode == NFS4_CREATE_EXCLUSIVE4_1;
+}
+
 /*
- * NFSv3 version of nfsd_create
+ * NFSv3 and NFSv4 version of nfsd_create
  */
 __be32
-nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		char *fname, int flen, struct iattr *iap,
 		struct svc_fh *resfhp, int createmode, u32 *verifier,
 	        int *truncp, int *created)
@@ -1396,7 +1398,7 @@
 	if (err)
 		goto out;
 
-	if (createmode == NFS3_CREATE_EXCLUSIVE) {
+	if (nfsd_create_is_exclusive(createmode)) {
 		/* solaris7 gets confused (bugid 4218508) if these have
 		 * the high bit set, so just clear the high bits. If this is
 		 * ever changed to use different attrs for storing the
@@ -1437,6 +1439,11 @@
 			    && dchild->d_inode->i_atime.tv_sec == v_atime
 			    && dchild->d_inode->i_size  == 0 )
 				break;
+		case NFS4_CREATE_EXCLUSIVE4_1:
+			if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
+			    && dchild->d_inode->i_atime.tv_sec == v_atime
+			    && dchild->d_inode->i_size  == 0 )
+				goto set_attr;
 			 /* fallthru */
 		case NFS3_CREATE_GUARDED:
 			err = nfserr_exist;
@@ -1455,7 +1462,7 @@
 
 	nfsd_check_ignore_resizing(iap);
 
-	if (createmode == NFS3_CREATE_EXCLUSIVE) {
+	if (nfsd_create_is_exclusive(createmode)) {
 		/* Cram the verifier into atime/mtime */
 		iap->ia_valid = ATTR_MTIME|ATTR_ATIME
 			| ATTR_MTIME_SET|ATTR_ATIME_SET;
@@ -2034,7 +2041,7 @@
 	struct inode	*inode = dentry->d_inode;
 	int		err;
 
-	if (acc == NFSD_MAY_NOP)
+	if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
 		return 0;
 #if 0
 	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",

diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 9a370a5..e0bbac0 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h

@@ -17,10 +17,14 @@
 #define NFSD_MAY_SATTR		8
 #define NFSD_MAY_TRUNC		16
 #define NFSD_MAY_LOCK		32
+#define NFSD_MAY_MASK		63
+
+/* extra hints to permission and open routines: */
 #define NFSD_MAY_OWNER_OVERRIDE	64
 #define NFSD_MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
 #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
 #define NFSD_MAY_NOT_BREAK_LEASE 512
+#define NFSD_MAY_BYPASS_GSS	1024
 
 #define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
 #define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
@@ -54,7 +58,7 @@
 				int type, dev_t rdev, struct svc_fh *res);
 #ifdef CONFIG_NFSD_V3
 __be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
-__be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
+__be32		do_nfsd_create(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, struct iattr *attrs,
 				struct svc_fh *res, int createmode,
 				u32 *verifier, int *truncp, int *created);

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 587f184..b954878 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c

@@ -917,7 +917,7 @@
  * construction. This function can be called both as a single operation
  * and as a part of indivisible file operations.
  */
-void nilfs_dirty_inode(struct inode *inode)
+void nilfs_dirty_inode(struct inode *inode, int flags)
 {
 	struct nilfs_transaction_info ti;
 	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);

diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1102a5f..546849b 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c

@@ -334,8 +334,6 @@
 	struct nilfs_transaction_info ti;
 	int err;
 
-	dentry_unhash(dentry);
-
 	err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
 	if (err)
 		return err;
@@ -371,9 +369,6 @@
 	struct nilfs_transaction_info ti;
 	int err;
 
-	if (new_inode && S_ISDIR(new_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1);
 	if (unlikely(err))
 		return err;

diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a9c6a53..f02b9ad 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h

@@ -269,7 +269,7 @@
 extern int nilfs_inode_dirty(struct inode *);
 int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
 extern int nilfs_mark_inode_dirty(struct inode *);
-extern void nilfs_dirty_inode(struct inode *);
+extern void nilfs_dirty_inode(struct inode *, int flags);
 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		 __u64 start, __u64 len);
 

diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index c368360..3b8d397 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c

@@ -241,11 +241,9 @@
 	int ret;
 
 
-	if (S_ISDIR(inode->i_mode)) {
-		dentry_unhash(dentry);
-		if (!omfs_dir_is_empty(inode))
-			return -ENOTEMPTY;
-	}
+	if (S_ISDIR(inode->i_mode) &&
+	    !omfs_dir_is_empty(inode))
+		return -ENOTEMPTY;
 
 	ret = omfs_delete_entry(dentry);
 	if (ret)
@@ -382,9 +380,6 @@
 	int err;
 
 	if (new_inode) {
-		if (S_ISDIR(new_inode->i_mode))
-			dentry_unhash(new_dentry);
-
 		/* overwriting existing file/dir */
 		err = omfs_remove(new_dir, new_dentry);
 		if (err)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4ede550..14def99 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c

@@ -83,6 +83,9 @@
 #include <linux/pid_namespace.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#ifdef CONFIG_HARDWALL
+#include <asm/hardwall.h>
+#endif
 #include "internal.h"
 
 /* NOTE:
@@ -2842,6 +2845,9 @@
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	INF("io",	S_IRUGO, proc_tgid_io_accounting),
 #endif
+#ifdef CONFIG_HARDWALL
+	INF("hardwall",   S_IRUGO, proc_pid_hardwall),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file * filp,
@@ -3181,6 +3187,9 @@
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	INF("io",	S_IRUGO, proc_tid_io_accounting),
 #endif
+#ifdef CONFIG_HARDWALL
+	INF("hardwall",   S_IRUGO, proc_pid_hardwall),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file * filp,

diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 76c8164..1186626 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c

@@ -831,8 +831,6 @@
 	INITIALIZE_PATH(path);
 	struct reiserfs_dir_entry de;
 
-	dentry_unhash(dentry);
-
 	/* we will be doing 2 balancings and update 2 stat data, we change quotas
 	 * of the owner of the directory and of the owner of the parent directory.
 	 * The quota structure is possibly deleted only on last iput => outside
@@ -1227,9 +1225,6 @@
 	unsigned long savelink = 1;
 	struct timespec ctime;
 
-	if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	/* three balancings: (1) old name removal, (2) new name insertion
 	   and (3) maybe "save" link insertion
 	   stat data updates: (1) old directory,

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b216ff6..aa91089 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c

@@ -568,7 +568,7 @@
 }
 
 /* we don't mark inodes dirty, we just log them */
-static void reiserfs_dirty_inode(struct inode *inode)
+static void reiserfs_dirty_inode(struct inode *inode, int flags)
 {
 	struct reiserfs_transaction_handle th;
 

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 50f1abc..e8a62f4 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c

@@ -98,7 +98,6 @@
 
 	reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
 					I_MUTEX_CHILD, dir->i_sb);
-	dentry_unhash(dentry);
 	error = dir->i_op->rmdir(dir, dentry);
 	if (!error)
 		dentry->d_inode->i_flags |= S_DEAD;

diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 730c562..5e1101f 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c

@@ -147,7 +147,7 @@
 	 * table[0] points to the first inode lookup table metadata block,
 	 * this should be less than lookup_table_start
 	 */
-	if (!IS_ERR(table) && table[0] >= lookup_table_start) {
+	if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) {
 		kfree(table);
 		return ERR_PTR(-EINVAL);
 	}

diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 1516a649..0ed6edb 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c

@@ -90,7 +90,7 @@
 	 * table[0] points to the first fragment table metadata block, this
 	 * should be less than fragment_table_start
 	 */
-	if (!IS_ERR(table) && table[0] >= fragment_table_start) {
+	if (!IS_ERR(table) && le64_to_cpu(table[0]) >= fragment_table_start) {
 		kfree(table);
 		return ERR_PTR(-EINVAL);
 	}

diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index a70858e..d38ea3d 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c

@@ -93,7 +93,7 @@
 	 * table[0] points to the first id lookup table metadata block, this
 	 * should be less than id_table_start
 	 */
-	if (!IS_ERR(table) && table[0] >= id_table_start) {
+	if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) {
 		kfree(table);
 		return ERR_PTR(-EINVAL);
 	}

diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 6f26abe..7438850 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c

@@ -245,7 +245,7 @@
 		msblk->id_table = NULL;
 		goto failed_mount;
 	}
-	next_table = msblk->id_table[0];
+	next_table = le64_to_cpu(msblk->id_table[0]);
 
 	/* Handle inode lookup table */
 	lookup_table_start = le64_to_cpu(sblk->lookup_table_start);
@@ -261,7 +261,7 @@
 		msblk->inode_lookup_table = NULL;
 		goto failed_mount;
 	}
-	next_table = msblk->inode_lookup_table[0];
+	next_table = le64_to_cpu(msblk->inode_lookup_table[0]);
 
 	sb->s_export_op = &squashfs_export_ops;
 
@@ -286,7 +286,7 @@
 		msblk->fragment_index = NULL;
 		goto failed_mount;
 	}
-	next_table = msblk->fragment_index[0];
+	next_table = le64_to_cpu(msblk->fragment_index[0]);
 
 check_directory_table:
 	/* Sanity check directory_table */

diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index e2cc675..e474fbc 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c

@@ -196,8 +196,6 @@
 	struct inode *inode = dentry->d_inode;
 	int err = -ENOTEMPTY;
 
-	dentry_unhash(dentry);
-
 	if (sysv_empty_dir(inode)) {
 		err = sysv_unlink(dir, dentry);
 		if (!err) {
@@ -224,9 +222,6 @@
 	struct sysv_dir_entry * old_de;
 	int err = -ENOENT;
 
-	if (new_inode && S_ISDIR(new_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	old_de = sysv_find_entry(old_dentry, &old_page);
 	if (!old_de)
 		goto out;

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index c2b8094..ef5abd3 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c

@@ -656,8 +656,6 @@
 	struct ubifs_inode *dir_ui = ubifs_inode(dir);
 	struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
 
-	dentry_unhash(dentry);
-
 	/*
 	 * Budget request settings: deletion direntry, deletion inode and
 	 * changing the parent inode. If budgeting fails, go ahead anyway
@@ -978,9 +976,6 @@
 			.dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
 	struct timespec time;
 
-	if (new_inode && S_ISDIR(new_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	/*
 	 * Budget request settings: deletion direntry, new direntry, removing
 	 * the old inode, and changing old and new parent directory inodes.

diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 46961c0..ca953a9 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c

@@ -277,8 +277,9 @@
 	return 0;
 }
 
-int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
 	int freed, contention = 0;
 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 6db0bdaa..1ab0d22 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c

@@ -382,7 +382,7 @@
 	end_writeback(inode);
 }
 
-static void ubifs_dirty_inode(struct inode *inode)
+static void ubifs_dirty_inode(struct inode *inode, int flags)
 {
 	struct ubifs_inode *ui = ubifs_inode(inode);
 

diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 93d1412..a70d7b4 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h

@@ -1614,7 +1614,7 @@
 int ubifs_tnc_end_commit(struct ubifs_info *c);
 
 /* shrinker.c */
-int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc);
 
 /* commit.c */
 int ubifs_bg_thread(void *info);

diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 4d76594..f1dce84 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c

@@ -783,8 +783,6 @@
 	struct fileIdentDesc *fi, cfi;
 	struct kernel_lb_addr tloc;
 
-	dentry_unhash(dentry);
-
 	retval = -ENOENT;
 	fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
 	if (!fi)
@@ -1083,9 +1081,6 @@
 	struct kernel_lb_addr tloc;
 	struct udf_inode_info *old_iinfo = UDF_I(old_inode);
 
-	if (new_inode && S_ISDIR(new_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
 	if (ofi) {
 		if (ofibh.sbh != ofibh.ebh)

diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 953ebdf..29309e2 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c

@@ -258,8 +258,6 @@
 	struct inode * inode = dentry->d_inode;
 	int err= -ENOTEMPTY;
 
-	dentry_unhash(dentry);
-
 	lock_ufs(dir->i_sb);
 	if (ufs_empty_dir (inode)) {
 		err = ufs_unlink(dir, dentry);
@@ -284,9 +282,6 @@
 	struct ufs_dir_entry *old_de;
 	int err = -ENOENT;
 
-	if (new_inode && S_ISDIR(new_inode->i_mode))
-		dentry_unhash(new_dentry);
-
 	old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
 		goto out;

diff --git a/fs/xattr.c b/fs/xattr.c
index f1ef949..f060663 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c

@@ -46,18 +46,22 @@
 		return 0;
 
 	/*
-	 * The trusted.* namespace can only be accessed by a privileged user.
+	 * The trusted.* namespace can only be accessed by privileged users.
 	 */
-	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
-		return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
+	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
+		if (!capable(CAP_SYS_ADMIN))
+			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
+		return 0;
+	}
 
-	/* In user.* namespace, only regular files and directories can have
+	/*
+	 * In the user.* namespace, only regular files and directories can have
 	 * extended attributes. For sticky directories, only the owner and
-	 * privileged user can write attributes.
+	 * privileged users can write attributes.
 	 */
 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
 		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
-			return -EPERM;
+			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
 		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
 		    (mask & MAY_WRITE) && !inode_owner_or_capable(inode))
 			return -EPERM;
@@ -87,7 +91,11 @@
 {
 	struct inode *inode = dentry->d_inode;
 	int error = -EOPNOTSUPP;
+	int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
+				   XATTR_SECURITY_PREFIX_LEN);
 
+	if (issec)
+		inode->i_flags &= ~S_NOSEC;
 	if (inode->i_op->setxattr) {
 		error = inode->i_op->setxattr(dentry, name, value, size, flags);
 		if (!error) {
@@ -95,8 +103,7 @@
 			security_inode_post_setxattr(dentry, name, value,
 						     size, flags);
 		}
-	} else if (!strncmp(name, XATTR_SECURITY_PREFIX,
-				XATTR_SECURITY_PREFIX_LEN)) {
+	} else if (issec) {
 		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
 		error = security_inode_setsecurity(inode, suffix, value,
 						   size, flags);

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 98b9c91..1e3a7ce 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c

@@ -925,7 +925,8 @@
  */
 STATIC void
 xfs_fs_dirty_inode(
-	struct inode	*inode)
+	struct inode	*inode,
+	int		flags)
 {
 	barrier();
 	XFS_I(inode)->i_update_core = 1;

diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index a3252a5..a756bc8 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h

@@ -98,6 +98,9 @@
 /*
  * Spinlock primitives
  */
+acpi_status
+acpi_os_create_lock(acpi_spinlock *out_handle);
+
 void acpi_os_delete_lock(acpi_spinlock handle);
 
 acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock handle);

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index f6ad63d..2ed0a84 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h

@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20110316
+#define ACPI_CA_VERSION                 0x20110413
 
 #include "actypes.h"
 #include "actbl.h"

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 64f838b..b67231b 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h

@@ -501,8 +501,9 @@
 #define ACPI_STATE_D1                   (u8) 1
 #define ACPI_STATE_D2                   (u8) 2
 #define ACPI_STATE_D3                   (u8) 3
-#define ACPI_D_STATES_MAX               ACPI_STATE_D3
-#define ACPI_D_STATE_COUNT              4
+#define ACPI_STATE_D3_COLD              (u8) 4
+#define ACPI_D_STATES_MAX               ACPI_STATE_D3_COLD
+#define ACPI_D_STATE_COUNT              5
 
 #define ACPI_STATE_C0                   (u8) 0
 #define ACPI_STATE_C1                   (u8) 1
@@ -712,8 +713,24 @@
 #define ACPI_ADR_SPACE_CMOS             (acpi_adr_space_type) 5
 #define ACPI_ADR_SPACE_PCI_BAR_TARGET   (acpi_adr_space_type) 6
 #define ACPI_ADR_SPACE_IPMI             (acpi_adr_space_type) 7
-#define ACPI_ADR_SPACE_DATA_TABLE       (acpi_adr_space_type) 8
-#define ACPI_ADR_SPACE_FIXED_HARDWARE   (acpi_adr_space_type) 127
+
+#define ACPI_NUM_PREDEFINED_REGIONS     8
+
+/*
+ * Special Address Spaces
+ *
+ * Note: A Data Table region is a special type of operation region
+ * that has its own AML opcode. However, internally, the AML
+ * interpreter simply creates an operation region with an an address
+ * space type of ACPI_ADR_SPACE_DATA_TABLE.
+ */
+#define ACPI_ADR_SPACE_DATA_TABLE       (acpi_adr_space_type) 0x7E	/* Internal to ACPICA only */
+#define ACPI_ADR_SPACE_FIXED_HARDWARE   (acpi_adr_space_type) 0x7F
+
+/* Values for _REG connection code */
+
+#define ACPI_REG_DISCONNECT             0
+#define ACPI_REG_CONNECT                1
 
 /*
  * bit_register IDs

diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 55192ac..ba4928c 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h

@@ -310,14 +310,7 @@
 
 /* in processor_core.c */
 void acpi_processor_set_pdc(acpi_handle handle);
-#ifdef CONFIG_SMP
 int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id);
-#else
-static inline int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
-{
-	return -1;
-}
-#endif
 
 /* in processor_throttling.c */
 int acpi_processor_tstate_has_changed(struct acpi_processor *pr);

diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index ff5c660..fcdcb5d 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h

@@ -35,9 +35,9 @@
  * platform data and other tables.
  */
 
-static inline int gpio_is_valid(int number)
+static inline bool gpio_is_valid(int number)
 {
-	return ((unsigned)number) < ARCH_NR_GPIOS;
+	return number >= 0 && number < ARCH_NR_GPIOS;
 }
 
 struct device;
@@ -193,8 +193,8 @@
 };
 
 extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
-extern int gpio_request_array(struct gpio *array, size_t num);
-extern void gpio_free_array(struct gpio *array, size_t num);
+extern int gpio_request_array(const struct gpio *array, size_t num);
+extern void gpio_free_array(const struct gpio *array, size_t num);
 
 #ifdef CONFIG_GPIO_SYSFS
 
@@ -212,7 +212,7 @@
 
 #else	/* !CONFIG_GPIOLIB */
 
-static inline int gpio_is_valid(int number)
+static inline bool gpio_is_valid(int number)
 {
 	/* only non-negative numbers are valid */
 	return number >= 0;

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 33d52470..ae90e0f 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h

@@ -681,9 +681,11 @@
 __SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime)
 #define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_setns 268
+__SYSCALL(__NR_setns, sys_setns)
 
 #undef __NR_syscalls
-#define __NR_syscalls 268
+#define __NR_syscalls 269
 
 /*
  * All syscalls below here should go away really,

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a2e910e..1deb2a7 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h

@@ -150,8 +150,7 @@
 extern int ec_write(u8 addr, u8 val);
 extern int ec_transaction(u8 command,
                           const u8 *wdata, unsigned wdata_len,
-                          u8 *rdata, unsigned rdata_len,
-			  int force_poll);
+                          u8 *rdata, unsigned rdata_len);
 
 #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE)
 

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 96c038e..ee456c7 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h

@@ -34,4 +34,17 @@
 }
 #endif
 
+#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
+static inline void atomic_or(int i, atomic_t *v)
+{
+	int old;
+	int new;
+
+	do {
+		old = atomic_read(v);
+		new = old | i;
+	} while (atomic_cmpxchg(v, old, new) != old);
+}
+#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */
+
 #endif /* _LINUX_ATOMIC_H */

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index f20eb8f..e9eaec5 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h

@@ -146,7 +146,7 @@
 
 static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
 {
-	cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+	do_set_cpus_allowed(p, cpu_possible_mask);
 	return cpumask_any(cpu_active_mask);
 }
 

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 32a4423..4427e04 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h

@@ -191,6 +191,12 @@
 
 	/* Used to provide an error string from the ctr */
 	char *error;
+
+	/*
+	 * Set if this target needs to receive discards regardless of
+	 * whether or not its underlying devices have support.
+	 */
+	unsigned discards_supported:1;
 };
 
 /* Each target can link one of these into the table */

diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index 5c9186b..f4b0aa3 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h

@@ -69,8 +69,7 @@
  *
  * Create/destroy may block.
  */
-struct dm_io_client *dm_io_client_create(unsigned num_pages);
-int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client);
+struct dm_io_client *dm_io_client_create(void);
 void dm_io_client_destroy(struct dm_io_client *client);
 
 /*

diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index 5db21631..298d587 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h

@@ -25,8 +25,7 @@
  * To use kcopyd you must first create a dm_kcopyd_client object.
  */
 struct dm_kcopyd_client;
-int dm_kcopyd_client_create(unsigned num_pages,
-			    struct dm_kcopyd_client **result);
+struct dm_kcopyd_client *dm_kcopyd_client_create(void);
 void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc);
 
 /*

diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 6998d93..4bfe0a2 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h

@@ -3,6 +3,7 @@
  * AVR32 systems.)
  *
  * Copyright (C) 2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 33fa120..e376270 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h

@@ -299,6 +299,7 @@
 		struct resource *data_resource, struct resource *bss_resource);
 extern unsigned long efi_get_time(void);
 extern int efi_set_rtc_mmss(unsigned long nowtime);
+extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
 /**

diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 85c1d30..5e06acf 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h

@@ -909,7 +909,7 @@
 extern void ext3_evict_inode (struct inode *);
 extern int  ext3_sync_inode (handle_t *, struct inode *);
 extern void ext3_discard_reservation (struct inode *);
-extern void ext3_dirty_inode(struct inode *);
+extern void ext3_dirty_inode(struct inode *, int);
 extern int ext3_change_inode_journal_flag(struct inode *, int);
 extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
 extern int ext3_can_truncate(struct inode *inode);

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2416093..c55d6b7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -237,6 +237,7 @@
 #define S_PRIVATE	512	/* Inode is fs-internal */
 #define S_IMA		1024	/* Inode has an associated IMA struct */
 #define S_AUTOMOUNT	2048	/* Automount/referral quasi-directory */
+#define S_NOSEC		4096	/* no suid or xattr security attributes */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -273,6 +274,7 @@
 #define IS_PRIVATE(inode)	((inode)->i_flags & S_PRIVATE)
 #define IS_IMA(inode)		((inode)->i_flags & S_IMA)
 #define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
+#define IS_NOSEC(inode)		((inode)->i_flags & S_NOSEC)
 
 /* the read-only stuff doesn't really belong here, but any other place is
    probably as bad and I don't want to create yet another include file. */
@@ -1618,7 +1620,7 @@
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
 
-   	void (*dirty_inode) (struct inode *);
+   	void (*dirty_inode) (struct inode *, int flags);
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
 	int (*drop_inode) (struct inode *);
 	void (*evict_inode) (struct inode *);
@@ -2582,5 +2584,16 @@
 #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
 					    (flag & __FMODE_NONOTIFY)))
 
+static inline int is_sxid(mode_t mode)
+{
+	return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
+}
+
+static inline void inode_has_no_xattr(struct inode *inode)
+{
+	if (!is_sxid(inode->i_mode))
+		inode->i_flags |= S_NOSEC;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index b5a550a..59d3ef1 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h

@@ -16,6 +16,11 @@
 	const char		*name;
 };
 
+struct trace_print_flags_u64 {
+	unsigned long long	mask;
+	const char		*name;
+};
+
 const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 				   unsigned long flags,
 				   const struct trace_print_flags *flag_array);
@@ -23,6 +28,13 @@
 const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 				     const struct trace_print_flags *symbol_array);
 
+#if BITS_PER_LONG == 32
+const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
+					 unsigned long long val,
+					 const struct trace_print_flags_u64
+								 *symbol_array);
+#endif
+
 const char *ftrace_print_hex_seq(struct trace_seq *p,
 				 const unsigned char *buf, int len);
 

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 32720ba..32d47e7 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h

@@ -25,9 +25,9 @@
  * warning when something is wrongly called.
  */
 
-static inline int gpio_is_valid(int number)
+static inline bool gpio_is_valid(int number)
 {
-	return 0;
+	return false;
 }
 
 static inline int gpio_request(unsigned gpio, const char *label)
@@ -41,7 +41,7 @@
 	return -ENOSYS;
 }
 
-static inline int gpio_request_array(struct gpio *array, size_t num)
+static inline int gpio_request_array(const struct gpio *array, size_t num)
 {
 	return -ENOSYS;
 }
@@ -54,7 +54,7 @@
 	WARN_ON(1);
 }
 
-static inline void gpio_free_array(struct gpio *array, size_t num)
+static inline void gpio_free_array(const struct gpio *array, size_t num)
 {
 	might_sleep();
 

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2a78aae..027935c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h

@@ -264,6 +264,8 @@
 
 	struct linux_binfmt *binfmt;
 
+	cpumask_var_t cpu_vm_mask_var;
+
 	/* Architecture-specific MM context */
 	mm_context_t context;
 
@@ -311,10 +313,18 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
-
-	cpumask_var_t cpu_vm_mask_var;
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	struct cpumask cpumask_allocation;
+#endif
 };
 
+static inline void mm_init_cpumask(struct mm_struct *mm)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	mm->cpu_vm_mask_var = &mm->cpumask_allocation;
+#endif
+}
+
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
 static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 {

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 178fafe..504b289 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h

@@ -562,6 +562,7 @@
 	NFSPROC4_CLNT_LAYOUTGET,
 	NFSPROC4_CLNT_GETDEVICEINFO,
 	NFSPROC4_CLNT_LAYOUTCOMMIT,
+	NFSPROC4_CLNT_LAYOUTRETURN,
 };
 
 /* nfs41 types */
@@ -570,9 +571,11 @@
 };
 
 /* Create Session Flags */
-#define SESSION4_PERSIST	 0x001
-#define SESSION4_BACK_CHAN 	 0x002
-#define SESSION4_RDMA		 0x004
+#define SESSION4_PERSIST	0x001
+#define SESSION4_BACK_CHAN	0x002
+#define SESSION4_RDMA		0x004
+
+#define SESSION4_FLAG_MASK_A	0x007
 
 enum state_protect_how4 {
 	SP4_NONE	= 0,

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 91af2e4..3a34e80 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h

@@ -68,7 +68,7 @@
 	int 			pg_ioflags;
 	int			pg_error;
 	struct pnfs_layout_segment *pg_lseg;
-	int			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+	bool			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7e371f7..5e8444a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h

@@ -269,6 +269,27 @@
 	struct nfs4_layoutcommit_res res;
 };
 
+struct nfs4_layoutreturn_args {
+	__u32   layout_type;
+	struct inode *inode;
+	nfs4_stateid stateid;
+	struct nfs4_sequence_args seq_args;
+};
+
+struct nfs4_layoutreturn_res {
+	struct nfs4_sequence_res seq_res;
+	u32 lrs_present;
+	nfs4_stateid stateid;
+};
+
+struct nfs4_layoutreturn {
+	struct nfs4_layoutreturn_args args;
+	struct nfs4_layoutreturn_res res;
+	struct rpc_cred *cred;
+	struct nfs_client *clp;
+	int rpc_status;
+};
+
 /*
  * Arguments to the open call.
  */
@@ -1087,6 +1108,7 @@
 	const struct rpc_call_ops *mds_ops;
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
 	__u64			mds_offset;
+	int			pnfs_error;
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
@@ -1112,6 +1134,7 @@
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
 	__u64			mds_offset;	/* Filelayout dense stripe */
+	int			pnfs_error;
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 79a6700..6081493 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h

@@ -308,7 +308,7 @@
 {
 #ifdef CONFIG_S390
 	if (!test_and_set_bit(PG_uptodate, &page->flags))
-		page_set_storage_key(page_to_pfn(page), PAGE_DEFAULT_KEY, 0);
+		page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, 0);
 #else
 	/*
 	 * Memory barrier must be issued before setting the PG_uptodate bit,

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 24787b7..a311008 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h

@@ -2483,6 +2483,7 @@
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0	0x1d40
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1	0x1d41
+#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI	0x1e31
 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN	0x1e40
 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX	0x1e5f
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN	0x2310

diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h
index 77cbddb..a7d87f9 100644
--- a/include/linux/pm_qos_params.h
+++ b/include/linux/pm_qos_params.h

@@ -16,6 +16,10 @@
 #define PM_QOS_NUM_CLASSES 4
 #define PM_QOS_DEFAULT_VALUE -1
 
+#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
+#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
+#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE	0
+
 struct pm_qos_request_list {
 	struct plist_node list;
 	int pm_qos_class;

diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
new file mode 100644
index 0000000..76efbdd
--- /dev/null
+++ b/include/linux/pnfs_osd_xdr.h

@@ -0,0 +1,345 @@
+/*
+ *  pNFS-osd on-the-wire data structures
+ *
+ *  Copyright (C) 2007 Panasas Inc. [year of first publication]
+ *  All rights reserved.
+ *
+ *  Benny Halevy <bhalevy@panasas.com>
+ *  Boaz Harrosh <bharrosh@panasas.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  See the file COPYING included with this distribution for more details.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the Panasas company nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __PNFS_OSD_XDR_H__
+#define __PNFS_OSD_XDR_H__
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <scsi/osd_protocol.h>
+
+#define PNFS_OSD_OSDNAME_MAXSIZE 256
+
+/*
+ * draft-ietf-nfsv4-minorversion-22
+ * draft-ietf-nfsv4-pnfs-obj-12
+ */
+
+/* Layout Structure */
+
+enum pnfs_osd_raid_algorithm4 {
+	PNFS_OSD_RAID_0		= 1,
+	PNFS_OSD_RAID_4		= 2,
+	PNFS_OSD_RAID_5		= 3,
+	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
+};
+
+/*   struct pnfs_osd_data_map4 {
+ *       uint32_t                    odm_num_comps;
+ *       length4                     odm_stripe_unit;
+ *       uint32_t                    odm_group_width;
+ *       uint32_t                    odm_group_depth;
+ *       uint32_t                    odm_mirror_cnt;
+ *       pnfs_osd_raid_algorithm4    odm_raid_algorithm;
+ *   };
+ */
+struct pnfs_osd_data_map {
+	u32	odm_num_comps;
+	u64	odm_stripe_unit;
+	u32	odm_group_width;
+	u32	odm_group_depth;
+	u32	odm_mirror_cnt;
+	u32	odm_raid_algorithm;
+};
+
+/*   struct pnfs_osd_objid4 {
+ *       deviceid4       oid_device_id;
+ *       uint64_t        oid_partition_id;
+ *       uint64_t        oid_object_id;
+ *   };
+ */
+struct pnfs_osd_objid {
+	struct nfs4_deviceid	oid_device_id;
+	u64			oid_partition_id;
+	u64			oid_object_id;
+};
+
+/* For printout. I use:
+ * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer));
+ * BE style
+ */
+#define _DEVID_LO(oid_device_id) \
+	(unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data)
+
+#define _DEVID_HI(oid_device_id) \
+	(unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
+
+static inline int
+pnfs_osd_objid_xdr_sz(void)
+{
+	return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2;
+}
+
+enum pnfs_osd_version {
+	PNFS_OSD_MISSING              = 0,
+	PNFS_OSD_VERSION_1            = 1,
+	PNFS_OSD_VERSION_2            = 2
+};
+
+struct pnfs_osd_opaque_cred {
+	u32 cred_len;
+	void *cred;
+};
+
+enum pnfs_osd_cap_key_sec {
+	PNFS_OSD_CAP_KEY_SEC_NONE     = 0,
+	PNFS_OSD_CAP_KEY_SEC_SSV      = 1,
+};
+
+/*   struct pnfs_osd_object_cred4 {
+ *       pnfs_osd_objid4         oc_object_id;
+ *       pnfs_osd_version4       oc_osd_version;
+ *       pnfs_osd_cap_key_sec4   oc_cap_key_sec;
+ *       opaque                  oc_capability_key<>;
+ *       opaque                  oc_capability<>;
+ *   };
+ */
+struct pnfs_osd_object_cred {
+	struct pnfs_osd_objid		oc_object_id;
+	u32				oc_osd_version;
+	u32				oc_cap_key_sec;
+	struct pnfs_osd_opaque_cred	oc_cap_key;
+	struct pnfs_osd_opaque_cred	oc_cap;
+};
+
+/*   struct pnfs_osd_layout4 {
+ *       pnfs_osd_data_map4      olo_map;
+ *       uint32_t                olo_comps_index;
+ *       pnfs_osd_object_cred4   olo_components<>;
+ *   };
+ */
+struct pnfs_osd_layout {
+	struct pnfs_osd_data_map	olo_map;
+	u32				olo_comps_index;
+	u32				olo_num_comps;
+	struct pnfs_osd_object_cred	*olo_comps;
+};
+
+/* Device Address */
+enum pnfs_osd_targetid_type {
+	OBJ_TARGET_ANON = 1,
+	OBJ_TARGET_SCSI_NAME = 2,
+	OBJ_TARGET_SCSI_DEVICE_ID = 3,
+};
+
+/*   union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
+ *       case OBJ_TARGET_SCSI_NAME:
+ *           string              oti_scsi_name<>;
+ *
+ *       case OBJ_TARGET_SCSI_DEVICE_ID:
+ *           opaque              oti_scsi_device_id<>;
+ *
+ *       default:
+ *           void;
+ *   };
+ *
+ *   union pnfs_osd_targetaddr4 switch (bool ota_available) {
+ *       case TRUE:
+ *           netaddr4            ota_netaddr;
+ *       case FALSE:
+ *           void;
+ *   };
+ *
+ *   struct pnfs_osd_deviceaddr4 {
+ *       pnfs_osd_targetid4      oda_targetid;
+ *       pnfs_osd_targetaddr4    oda_targetaddr;
+ *       uint64_t                oda_lun;
+ *       opaque                  oda_systemid<>;
+ *       pnfs_osd_object_cred4   oda_root_obj_cred;
+ *       opaque                  oda_osdname<>;
+ *   };
+ */
+struct pnfs_osd_targetid {
+	u32				oti_type;
+	struct nfs4_string		oti_scsi_device_id;
+};
+
+enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
+
+/*   struct netaddr4 {
+ *       // see struct rpcb in RFC1833
+ *       string r_netid<>;    // network id
+ *       string r_addr<>;     // universal address
+ *   };
+ */
+struct pnfs_osd_net_addr {
+	struct nfs4_string	r_netid;
+	struct nfs4_string	r_addr;
+};
+
+struct pnfs_osd_targetaddr {
+	u32				ota_available;
+	struct pnfs_osd_net_addr	ota_netaddr;
+};
+
+enum {
+	NETWORK_ID_MAX = 16 / 4,
+	UNIVERSAL_ADDRESS_MAX = 64 / 4,
+	PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
+};
+
+struct pnfs_osd_deviceaddr {
+	struct pnfs_osd_targetid	oda_targetid;
+	struct pnfs_osd_targetaddr	oda_targetaddr;
+	u8				oda_lun[8];
+	struct nfs4_string		oda_systemid;
+	struct pnfs_osd_object_cred	oda_root_obj_cred;
+	struct nfs4_string		oda_osdname;
+};
+
+enum {
+	ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
+	PNFS_OSD_DEVICEADDR_MAX =
+		PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
+		2 /*oda_lun*/ +
+		1 + OSD_SYSTEMID_LEN +
+		1 + ODA_OSDNAME_MAX,
+};
+
+/* LAYOUTCOMMIT: layoutupdate */
+
+/*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
+ *       case TRUE:
+ *           int64_t     dsu_delta;
+ *       case FALSE:
+ *           void;
+ *   };
+ *
+ *   struct pnfs_osd_layoutupdate4 {
+ *       pnfs_osd_deltaspaceused4    olu_delta_space_used;
+ *       bool                        olu_ioerr_flag;
+ *   };
+ */
+struct pnfs_osd_layoutupdate {
+	u32	dsu_valid;
+	s64	dsu_delta;
+	u32	olu_ioerr_flag;
+};
+
+/* LAYOUTRETURN: I/O Rrror Report */
+
+enum pnfs_osd_errno {
+	PNFS_OSD_ERR_EIO		= 1,
+	PNFS_OSD_ERR_NOT_FOUND		= 2,
+	PNFS_OSD_ERR_NO_SPACE		= 3,
+	PNFS_OSD_ERR_BAD_CRED		= 4,
+	PNFS_OSD_ERR_NO_ACCESS		= 5,
+	PNFS_OSD_ERR_UNREACHABLE	= 6,
+	PNFS_OSD_ERR_RESOURCE		= 7
+};
+
+/*   struct pnfs_osd_ioerr4 {
+ *       pnfs_osd_objid4     oer_component;
+ *       length4             oer_comp_offset;
+ *       length4             oer_comp_length;
+ *       bool                oer_iswrite;
+ *       pnfs_osd_errno4     oer_errno;
+ *   };
+ */
+struct pnfs_osd_ioerr {
+	struct pnfs_osd_objid	oer_component;
+	u64			oer_comp_offset;
+	u64			oer_comp_length;
+	u32			oer_iswrite;
+	u32			oer_errno;
+};
+
+/* OSD XDR API */
+/* Layout helpers */
+/* Layout decoding is done in two parts:
+ * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
+ *    of the layout. @iter members need not be initialized.
+ *    Returned:
+ *             @layout members are set. (@layout->olo_comps set to NULL).
+ *
+ *             Zero on success, or negative error if passed xdr is broken.
+ *
+ * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns
+ *    false, to decode the next component.
+ *    Returned:
+ *       true if there is more to decode or false if we are done or error.
+ *
+ * Example:
+ *	struct pnfs_osd_xdr_decode_layout_iter iter;
+ *	struct pnfs_osd_layout layout;
+ *	struct pnfs_osd_object_cred comp;
+ *	int status;
+ *
+ *	status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
+ *	if (unlikely(status))
+ *		goto err;
+ *	while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) {
+ *		// All of @comp strings point to inside the xdr_buffer
+ *		// or scrach buffer. Copy them out to user memory eg.
+ *		copy_single_comp(dest_comp++, &comp);
+ *	}
+ *	if (unlikely(status))
+ *		goto err;
+ */
+
+struct pnfs_osd_xdr_decode_layout_iter {
+	unsigned total_comps;
+	unsigned decoded_comps;
+};
+
+extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
+	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr);
+
+extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
+	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
+	int *err);
+
+/* Device Info helpers */
+
+/* Note: All strings inside @deviceaddr point to space inside @p.
+ * @p should stay valid while @deviceaddr is in use.
+ */
+extern void pnfs_osd_xdr_decode_deviceaddr(
+	struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p);
+
+/* layoutupdate (layout_commit) xdr helpers */
+extern int
+pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
+				 struct pnfs_osd_layoutupdate *lou);
+
+/* osd_ioerror encoding/decoding (layout_return) */
+/* Client */
+extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
+extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);
+
+#endif /* __PNFS_OSD_XDR_H__ */

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc88712..2a8621c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -1546,7 +1546,7 @@
 #ifdef CONFIG_TRACING
 	/* state flags for use by tracers */
 	unsigned long trace;
-	/* bitmask of trace recursion */
+	/* bitmask and counter of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
@@ -1841,9 +1841,16 @@
 #endif
 
 #ifdef CONFIG_SMP
+extern void do_set_cpus_allowed(struct task_struct *p,
+			       const struct cpumask *new_mask);
+
 extern int set_cpus_allowed_ptr(struct task_struct *p,
 				const struct cpumask *new_mask);
 #else
+static inline void do_set_cpus_allowed(struct task_struct *p,
+				      const struct cpumask *new_mask)
+{
+}
 static inline int set_cpus_allowed_ptr(struct task_struct *p,
 				       const struct cpumask *new_mask)
 {
@@ -2187,7 +2194,6 @@
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
 		__mmdrop(mm);
 }
-extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm);
 
 /* mmput gets rid of the mappings and all user-space */
 extern void mmput(struct mm_struct *);

diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 77e6248..c68a147 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h

@@ -145,6 +145,7 @@
 #define RPCBIND_NETID_TCP	"tcp"
 #define RPCBIND_NETID_UDP6	"udp6"
 #define RPCBIND_NETID_TCP6	"tcp6"
+#define RPCBIND_NETID_LOCAL	"local"
 
 /*
  * Note that RFC 1833 does not put any size restrictions on the

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 04dba23..85c50b4 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h

@@ -28,6 +28,7 @@
 	/* private TCP part */
 	u32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
+	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
 /*

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index fc84b7a..a20970e 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h

@@ -216,6 +216,8 @@
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
+		struct page **pages, unsigned int len);
 extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index a0f998c..81cce3b 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h

@@ -141,7 +141,8 @@
 	XPRT_TRANSPORT_UDP	= IPPROTO_UDP,
 	XPRT_TRANSPORT_TCP	= IPPROTO_TCP,
 	XPRT_TRANSPORT_BC_TCP	= IPPROTO_TCP | XPRT_TRANSPORT_BC,
-	XPRT_TRANSPORT_RDMA	= 256
+	XPRT_TRANSPORT_RDMA	= 256,
+	XPRT_TRANSPORT_LOCAL	= 257,
 };
 
 struct rpc_xprt {

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index d512d98..5ca0951 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h

@@ -93,8 +93,8 @@
  * Safely read from address @src to the buffer at @dst.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-extern long probe_kernel_read(void *dst, void *src, size_t size);
-extern long __probe_kernel_read(void *dst, void *src, size_t size);
+extern long probe_kernel_read(void *dst, const void *src, size_t size);
+extern long __probe_kernel_read(void *dst, const void *src, size_t size);
 
 /*
  * probe_kernel_write(): safely attempt to write to a location
@@ -105,7 +105,7 @@
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-extern long notrace probe_kernel_write(void *dst, void *src, size_t size);
-extern long notrace __probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
+extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
 
 #endif		/* __LINUX_UACCESS_H__ */

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index aff5b4f..7108857 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h

@@ -51,6 +51,13 @@
  *	This re-enables callbacks; it returns "false" if there are pending
  *	buffers in the queue, to detect a possible race between the driver
  *	checking for more work, and enabling callbacks.
+ * virtqueue_enable_cb_delayed: restart callbacks after disable_cb.
+ *	vq: the struct virtqueue we're talking about.
+ *	This re-enables callbacks but hints to the other side to delay
+ *	interrupts until most of the available buffers have been processed;
+ *	it returns "false" if there are many pending buffers in the queue,
+ *	to detect a possible race between the driver checking for more work,
+ *	and enabling callbacks.
  * virtqueue_detach_unused_buf: detach first unused buffer
  * 	vq: the struct virtqueue we're talking about.
  * 	Returns NULL or the "data" token handed to add_buf
@@ -86,6 +93,8 @@
 
 bool virtqueue_enable_cb(struct virtqueue *vq);
 
+bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
+
 void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 
 /**

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index e68b439..277c4ad 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h

@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_9P_H
 #define _LINUX_VIRTIO_9P_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>

diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index a50ecd1..652dc8b 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h

@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_BALLOON_H
 #define _LINUX_VIRTIO_BALLOON_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 

diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 167720d..e0edb40 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h

@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_BLK_H
 #define _LINUX_VIRTIO_BLK_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 800617b..39c88c5 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h

@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_CONFIG_H
 #define _LINUX_VIRTIO_CONFIG_H
 /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
- * anyone can use the definitions to implement compatible drivers/servers. */
+ * anyone can use the definitions to implement compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 
 /* Virtio devices use a standardized configuration space to define their
  * features and pass configuration information, but each implementation can

diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index e4d3335..bdf4b00 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h

@@ -5,7 +5,31 @@
 #include <linux/virtio_config.h>
 /*
  * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
- * anyone can use the definitions to implement compatible drivers/servers.
+ * anyone can use the definitions to implement compatible drivers/servers:
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  *
  * Copyright (C) Red Hat, Inc., 2009, 2010, 2011
  * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011

diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
index 06660c0..85bb0bb 100644
--- a/include/linux/virtio_ids.h
+++ b/include/linux/virtio_ids.h

@@ -5,7 +5,29 @@
  *
  * This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 
 #define VIRTIO_ID_NET		1 /* virtio net */
 #define VIRTIO_ID_BLOCK		2 /* virtio block */

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 085e422..136040b 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h

@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_NET_H
 #define _LINUX_VIRTIO_NET_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>

diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index 9a3d7c4..ea66f3f 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h

@@ -11,6 +11,29 @@
  *
  * This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 #ifndef _LINUX_VIRTIO_PCI_H

diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index e4d144b..4a32cb6 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h

@@ -7,6 +7,29 @@
  * This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers.
  *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
  * Copyright Rusty Russell IBM Corporation 2007. */
 #include <linux/types.h>
 
@@ -29,6 +52,12 @@
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC	28
 
+/* The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field. */
+/* The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field. */
+#define VIRTIO_RING_F_EVENT_IDX		29
+
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc {
 	/* Address (guest-physical). */
@@ -83,6 +112,7 @@
  *	__u16 avail_flags;
  *	__u16 avail_idx;
  *	__u16 available[num];
+ *	__u16 used_event_idx;
  *
  *	// Padding to the next align boundary.
  *	char pad[];
@@ -91,8 +121,14 @@
  *	__u16 used_flags;
  *	__u16 used_idx;
  *	struct vring_used_elem used[num];
+ *	__u16 avail_event_idx;
  * };
  */
+/* We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility. */
+#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
+
 static inline void vring_init(struct vring *vr, unsigned int num, void *p,
 			      unsigned long align)
 {
@@ -107,7 +143,21 @@
 {
 	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
 		 + align - 1) & ~(align - 1))
-		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
+		+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
+}
+
+/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
+/* Assuming a given event_idx value from the other size, if
+ * we have just incremented index from old to new_idx,
+ * should we trigger an event? */
+static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
+{
+	/* Note: Xen has similar logic for notification hold-off
+	 * in include/xen/interface/io/ring.h with req_event and req_prod
+	 * corresponding to event_idx + 1 and new_idx respectively.
+	 * Note also that req_event and req_prod in Xen start at 1,
+	 * event indexes in virtio start at 0. */
+	return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
 }
 
 #ifdef __KERNEL__

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index f445cff..4114129 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h

@@ -28,7 +28,7 @@
 		{ BTRFS_SHARED_DATA_REF_KEY, 	"SHARED_DATA_REF" })
 
 #define __show_root_type(obj)						\
-	__print_symbolic(obj,						\
+	__print_symbolic_u64(obj,					\
 		{ BTRFS_ROOT_TREE_OBJECTID, 	"ROOT_TREE"	},	\
 		{ BTRFS_EXTENT_TREE_OBJECTID, 	"EXTENT_TREE"	},	\
 		{ BTRFS_CHUNK_TREE_OBJECTID, 	"CHUNK_TREE"	},	\
@@ -125,7 +125,7 @@
 );
 
 #define __show_map_type(type)						\
-	__print_symbolic(type,						\
+	__print_symbolic_u64(type,					\
 		{ EXTENT_MAP_LAST_BYTE, "LAST_BYTE" 	},		\
 		{ EXTENT_MAP_HOLE, 	"HOLE" 		},		\
 		{ EXTENT_MAP_INLINE, 	"INLINE" 	},		\

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 3e68366..533c49f 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h

@@ -205,6 +205,19 @@
 		ftrace_print_symbols_seq(p, value, symbols);		\
 	})
 
+#undef __print_symbolic_u64
+#if BITS_PER_LONG == 32
+#define __print_symbolic_u64(value, symbol_array...)			\
+	({								\
+		static const struct trace_print_flags_u64 symbols[] =	\
+			{ symbol_array, { -1, NULL } };			\
+		ftrace_print_symbols_seq_u64(p, value, symbols);	\
+	})
+#else
+#define __print_symbolic_u64(value, symbol_array...)			\
+			__print_symbolic(value, symbol_array)
+#endif
+
 #undef __print_hex
 #define __print_hex(buf, buf_len) ftrace_print_hex_seq(p, buf, buf_len)
 

diff --git a/init/main.c b/init/main.c
index d2f1e08..cafba67 100644
--- a/init/main.c
+++ b/init/main.c

@@ -487,6 +487,7 @@
 	printk(KERN_NOTICE "%s", linux_banner);
 	setup_arch(&command_line);
 	mm_init_owner(&init_mm, &init_task);
+	mm_init_cpumask(&init_mm);
 	setup_command_line(command_line);
 	setup_nr_cpu_ids();
 	setup_per_cpu_areas();
@@ -510,7 +511,6 @@
 	sort_main_extable();
 	trap_init();
 	mm_init();
-	BUG_ON(mm_init_cpumask(&init_mm, 0));
 
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1ceeb04..9c9b754 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c

@@ -2190,7 +2190,7 @@
 	rcu_read_lock();
 	cs = task_cs(tsk);
 	if (cs)
-		cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
+		do_set_cpus_allowed(tsk, cs->cpus_allowed);
 	rcu_read_unlock();
 
 	/*
@@ -2217,7 +2217,7 @@
 		 * Like above we can temporary set any mask and rely on
 		 * set_cpus_allowed_ptr() as synchronization point.
 		 */
-		cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
+		do_set_cpus_allowed(tsk, cpu_possible_mask);
 		cpu = cpumask_any(cpu_active_mask);
 	}
 

diff --git a/kernel/events/core.c b/kernel/events/core.c
index c09767f..d863b3c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -5028,6 +5028,14 @@
 	else
 		perf_event_output(event, nmi, data, regs);
 
+	if (event->fasync && event->pending_kill) {
+		if (nmi) {
+			event->pending_wakeup = 1;
+			irq_work_queue(&event->pending);
+		} else
+			perf_event_wakeup(event);
+	}
+
 	return ret;
 }
 

diff --git a/kernel/fork.c b/kernel/fork.c
index ca406d9..0276c30 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -484,20 +484,6 @@
 #endif
 }
 
-int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
-		return -ENOMEM;
-
-	if (oldmm)
-		cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
-	else
-		memset(mm_cpumask(mm), 0, cpumask_size());
-#endif
-	return 0;
-}
-
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
@@ -538,17 +524,8 @@
 		return NULL;
 
 	memset(mm, 0, sizeof(*mm));
-	mm = mm_init(mm, current);
-	if (!mm)
-		return NULL;
-
-	if (mm_init_cpumask(mm, NULL)) {
-		mm_free_pgd(mm);
-		free_mm(mm);
-		return NULL;
-	}
-
-	return mm;
+	mm_init_cpumask(mm);
+	return mm_init(mm, current);
 }
 
 /*
@@ -559,7 +536,6 @@
 void __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
-	free_cpumask_var(mm->cpu_vm_mask_var);
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
@@ -753,6 +729,7 @@
 		goto fail_nomem;
 
 	memcpy(mm, oldmm, sizeof(*mm));
+	mm_init_cpumask(mm);
 
 	/* Initializing for Swap token stuff */
 	mm->token_priority = 0;
@@ -765,9 +742,6 @@
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
-	if (mm_init_cpumask(mm, oldmm))
-		goto fail_nocpumask;
-
 	if (init_new_context(tsk, mm))
 		goto fail_nocontext;
 
@@ -794,9 +768,6 @@
 	return NULL;
 
 fail_nocontext:
-	free_cpumask_var(mm->cpu_vm_mask_var);
-
-fail_nocpumask:
 	/*
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * because it calls destroy_context()
@@ -1591,6 +1562,13 @@
 	fs_cachep = kmem_cache_create("fs_cache",
 			sizeof(struct fs_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+	/*
+	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
+	 * whole struct cpumask for the OFFSTACK case. We could change
+	 * this to *only* allocate as much of it as required by the
+	 * maximum number of CPU's we can ever have.  The cpumask_allocation
+	 * is at the end of the structure, exactly for that reason.
+	 */
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 74d1c09..fa27e75 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c

@@ -105,9 +105,12 @@
 }
 
 static void __jump_label_update(struct jump_label_key *key,
-		struct jump_entry *entry, int enable)
+				struct jump_entry *entry,
+				struct jump_entry *stop, int enable)
 {
-	for (; entry->key == (jump_label_t)(unsigned long)key; entry++) {
+	for (; (entry < stop) &&
+	      (entry->key == (jump_label_t)(unsigned long)key);
+	      entry++) {
 		/*
 		 * entry->code set to 0 invalidates module init text sections
 		 * kernel_text_address() verifies we are not in core kernel
@@ -181,7 +184,11 @@
 	struct jump_label_mod *mod = key->next;
 
 	while (mod) {
-		__jump_label_update(key, mod->entries, enable);
+		struct module *m = mod->mod;
+
+		__jump_label_update(key, mod->entries,
+				    m->jump_entries + m->num_jump_entries,
+				    enable);
 		mod = mod->next;
 	}
 }
@@ -245,7 +252,8 @@
 		key->next = jlm;
 
 		if (jump_label_enabled(key))
-			__jump_label_update(key, iter, JUMP_LABEL_ENABLE);
+			__jump_label_update(key, iter, iter_stop,
+					    JUMP_LABEL_ENABLE);
 	}
 
 	return 0;
@@ -371,7 +379,7 @@
 
 	/* if there are no users, entry can be NULL */
 	if (entry)
-		__jump_label_update(key, entry, enable);
+		__jump_label_update(key, entry, __stop___jump_table, enable);
 
 #ifdef CONFIG_MODULES
 	__jump_label_mod_update(key, enable);

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3b34d27..4ba7ccc 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c

@@ -202,8 +202,8 @@
 		return;
 	}
 
-	p->cpus_allowed = cpumask_of_cpu(cpu);
-	p->rt.nr_cpus_allowed = 1;
+	/* It's safe because the task is inactive. */
+	do_set_cpus_allowed(p, cpumask_of(cpu));
 	p->flags |= PF_THREAD_BOUND;
 }
 EXPORT_SYMBOL(kthread_bind);

diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index fd8d1e0..6824ca7 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c

@@ -54,11 +54,17 @@
 	PM_QOS_MIN		/* return the smallest value */
 };
 
+/*
+ * Note: The lockless read path depends on the CPU accessing
+ * target_value atomically.  Atomic access is only guaranteed on all CPU
+ * types linux supports for 32 bit quantites
+ */
 struct pm_qos_object {
 	struct plist_head requests;
 	struct blocking_notifier_head *notifiers;
 	struct miscdevice pm_qos_power_miscdev;
 	char *name;
+	s32 target_value;	/* Do not change to 64 bit */
 	s32 default_value;
 	enum pm_qos_type type;
 };
@@ -71,7 +77,8 @@
 	.requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
 	.notifiers = &cpu_dma_lat_notifier,
 	.name = "cpu_dma_latency",
-	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
+	.default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
 	.type = PM_QOS_MIN,
 };
 
@@ -80,7 +87,8 @@
 	.requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
 	.notifiers = &network_lat_notifier,
 	.name = "network_latency",
-	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
+	.default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
 	.type = PM_QOS_MIN
 };
 
@@ -90,7 +98,8 @@
 	.requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
 	.notifiers = &network_throughput_notifier,
 	.name = "network_throughput",
-	.default_value = 0,
+	.target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
+	.default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
 	.type = PM_QOS_MAX,
 };
 
@@ -136,6 +145,16 @@
 	}
 }
 
+static inline s32 pm_qos_read_value(struct pm_qos_object *o)
+{
+	return o->target_value;
+}
+
+static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value)
+{
+	o->target_value = value;
+}
+
 static void update_target(struct pm_qos_object *o, struct plist_node *node,
 			  int del, int value)
 {
@@ -160,6 +179,7 @@
 		plist_add(node, &o->requests);
 	}
 	curr_value = pm_qos_get_value(o);
+	pm_qos_set_value(o, curr_value);
 	spin_unlock_irqrestore(&pm_qos_lock, flags);
 
 	if (prev_value != curr_value)
@@ -194,18 +214,11 @@
  * pm_qos_request - returns current system wide qos expectation
  * @pm_qos_class: identification of which qos value is requested
  *
- * This function returns the current target value in an atomic manner.
+ * This function returns the current target value.
  */
 int pm_qos_request(int pm_qos_class)
 {
-	unsigned long flags;
-	int value;
-
-	spin_lock_irqsave(&pm_qos_lock, flags);
-	value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
-	spin_unlock_irqrestore(&pm_qos_lock, flags);
-
-	return value;
+	return pm_qos_read_value(pm_qos_array[pm_qos_class]);
 }
 EXPORT_SYMBOL_GPL(pm_qos_request);
 

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f07d2f0..89419ff 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c

@@ -36,7 +36,7 @@
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/nmi.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/completion.h>
@@ -95,7 +95,6 @@
 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
 DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
-static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
 DEFINE_PER_CPU(char, rcu_cpu_has_work);
 static char rcu_kthreads_spawnable;
 
@@ -163,7 +162,7 @@
 #ifdef CONFIG_NO_HZ
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 	.dynticks_nesting = 1,
-	.dynticks = 1,
+	.dynticks = ATOMIC_INIT(1),
 };
 #endif /* #ifdef CONFIG_NO_HZ */
 
@@ -322,13 +321,25 @@
 	unsigned long flags;
 	struct rcu_dynticks *rdtp;
 
-	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
 	local_irq_save(flags);
 	rdtp = &__get_cpu_var(rcu_dynticks);
-	rdtp->dynticks++;
-	rdtp->dynticks_nesting--;
-	WARN_ON_ONCE(rdtp->dynticks & 0x1);
+	if (--rdtp->dynticks_nesting) {
+		local_irq_restore(flags);
+		return;
+	}
+	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+	smp_mb__before_atomic_inc();  /* See above. */
+	atomic_inc(&rdtp->dynticks);
+	smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */
+	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
 	local_irq_restore(flags);
+
+	/* If the interrupt queued a callback, get out of dyntick mode. */
+	if (in_irq() &&
+	    (__get_cpu_var(rcu_sched_data).nxtlist ||
+	     __get_cpu_var(rcu_bh_data).nxtlist ||
+	     rcu_preempt_needs_cpu(smp_processor_id())))
+		set_need_resched();
 }
 
 /*
@@ -344,11 +355,16 @@
 
 	local_irq_save(flags);
 	rdtp = &__get_cpu_var(rcu_dynticks);
-	rdtp->dynticks++;
-	rdtp->dynticks_nesting++;
-	WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
+	if (rdtp->dynticks_nesting++) {
+		local_irq_restore(flags);
+		return;
+	}
+	smp_mb__before_atomic_inc();  /* Force ordering w/previous sojourn. */
+	atomic_inc(&rdtp->dynticks);
+	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
+	smp_mb__after_atomic_inc();  /* See above. */
+	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
 	local_irq_restore(flags);
-	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
 }
 
 /**
@@ -362,11 +378,15 @@
 {
 	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
 
-	if (rdtp->dynticks & 0x1)
+	if (rdtp->dynticks_nmi_nesting == 0 &&
+	    (atomic_read(&rdtp->dynticks) & 0x1))
 		return;
-	rdtp->dynticks_nmi++;
-	WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1));
-	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+	rdtp->dynticks_nmi_nesting++;
+	smp_mb__before_atomic_inc();  /* Force delay from prior write. */
+	atomic_inc(&rdtp->dynticks);
+	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
+	smp_mb__after_atomic_inc();  /* See above. */
+	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
 }
 
 /**
@@ -380,11 +400,14 @@
 {
 	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
 
-	if (rdtp->dynticks & 0x1)
+	if (rdtp->dynticks_nmi_nesting == 0 ||
+	    --rdtp->dynticks_nmi_nesting != 0)
 		return;
-	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
-	rdtp->dynticks_nmi++;
-	WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1);
+	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+	smp_mb__before_atomic_inc();  /* See above. */
+	atomic_inc(&rdtp->dynticks);
+	smp_mb__after_atomic_inc();  /* Force delay to next write. */
+	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
 }
 
 /**
@@ -395,13 +418,7 @@
  */
 void rcu_irq_enter(void)
 {
-	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
-
-	if (rdtp->dynticks_nesting++)
-		return;
-	rdtp->dynticks++;
-	WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
-	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+	rcu_exit_nohz();
 }
 
 /**
@@ -413,18 +430,7 @@
  */
 void rcu_irq_exit(void)
 {
-	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
-
-	if (--rdtp->dynticks_nesting)
-		return;
-	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
-	rdtp->dynticks++;
-	WARN_ON_ONCE(rdtp->dynticks & 0x1);
-
-	/* If the interrupt queued a callback, get out of dyntick mode. */
-	if (__this_cpu_read(rcu_sched_data.nxtlist) ||
-	    __this_cpu_read(rcu_bh_data.nxtlist))
-		set_need_resched();
+	rcu_enter_nohz();
 }
 
 #ifdef CONFIG_SMP
@@ -436,19 +442,8 @@
  */
 static int dyntick_save_progress_counter(struct rcu_data *rdp)
 {
-	int ret;
-	int snap;
-	int snap_nmi;
-
-	snap = rdp->dynticks->dynticks;
-	snap_nmi = rdp->dynticks->dynticks_nmi;
-	smp_mb();	/* Order sampling of snap with end of grace period. */
-	rdp->dynticks_snap = snap;
-	rdp->dynticks_nmi_snap = snap_nmi;
-	ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
-	if (ret)
-		rdp->dynticks_fqs++;
-	return ret;
+	rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
+	return 0;
 }
 
 /*
@@ -459,16 +454,11 @@
  */
 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 {
-	long curr;
-	long curr_nmi;
-	long snap;
-	long snap_nmi;
+	unsigned long curr;
+	unsigned long snap;
 
-	curr = rdp->dynticks->dynticks;
-	snap = rdp->dynticks_snap;
-	curr_nmi = rdp->dynticks->dynticks_nmi;
-	snap_nmi = rdp->dynticks_nmi_snap;
-	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+	curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks);
+	snap = (unsigned long)rdp->dynticks_snap;
 
 	/*
 	 * If the CPU passed through or entered a dynticks idle phase with
@@ -478,8 +468,7 @@
 	 * read-side critical section that started before the beginning
 	 * of the current RCU grace period.
 	 */
-	if ((curr != snap || (curr & 0x1) == 0) &&
-	    (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
+	if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) {
 		rdp->dynticks_fqs++;
 		return 1;
 	}
@@ -908,6 +897,12 @@
 	unsigned long gp_duration;
 
 	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
+
+	/*
+	 * Ensure that all grace-period and pre-grace-period activity
+	 * is seen before the assignment to rsp->completed.
+	 */
+	smp_mb(); /* See above block comment. */
 	gp_duration = jiffies - rsp->gp_start;
 	if (gp_duration > rsp->gp_max)
 		rsp->gp_max = gp_duration;
@@ -1455,25 +1450,11 @@
  */
 static void rcu_process_callbacks(void)
 {
-	/*
-	 * Memory references from any prior RCU read-side critical sections
-	 * executed by the interrupted code must be seen before any RCU
-	 * grace-period manipulations below.
-	 */
-	smp_mb(); /* See above block comment. */
-
 	__rcu_process_callbacks(&rcu_sched_state,
 				&__get_cpu_var(rcu_sched_data));
 	__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
 	rcu_preempt_process_callbacks();
 
-	/*
-	 * Memory references from any later RCU read-side critical sections
-	 * executed by the interrupted code must be seen after any RCU
-	 * grace-period manipulations above.
-	 */
-	smp_mb(); /* See above block comment. */
-
 	/* If we are last CPU on way to dyntick-idle mode, accelerate it. */
 	rcu_needs_cpu_flush();
 }
@@ -1494,7 +1475,7 @@
 		local_irq_restore(flags);
 		return;
 	}
-	wake_up(&__get_cpu_var(rcu_cpu_wq));
+	wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
 	local_irq_restore(flags);
 }
 
@@ -1544,13 +1525,10 @@
  */
 static void rcu_cpu_kthread_timer(unsigned long arg)
 {
-	unsigned long flags;
 	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
 	struct rcu_node *rnp = rdp->mynode;
 
-	raw_spin_lock_irqsave(&rnp->lock, flags);
-	rnp->wakemask |= rdp->grpmask;
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	atomic_or(rdp->grpmask, &rnp->wakemask);
 	invoke_rcu_node_kthread(rnp);
 }
 
@@ -1617,14 +1595,12 @@
 	unsigned long flags;
 	int spincnt = 0;
 	unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
-	wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
 	char work;
 	char *workp = &per_cpu(rcu_cpu_has_work, cpu);
 
 	for (;;) {
 		*statusp = RCU_KTHREAD_WAITING;
-		wait_event_interruptible(*wqp,
-					 *workp != 0 || kthread_should_stop());
+		rcu_wait(*workp != 0 || kthread_should_stop());
 		local_bh_disable();
 		if (rcu_cpu_kthread_should_stop(cpu)) {
 			local_bh_enable();
@@ -1675,7 +1651,6 @@
 	per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
 	WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
 	per_cpu(rcu_cpu_kthread_task, cpu) = t;
-	wake_up_process(t);
 	sp.sched_priority = RCU_KTHREAD_PRIO;
 	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	return 0;
@@ -1698,11 +1673,10 @@
 
 	for (;;) {
 		rnp->node_kthread_status = RCU_KTHREAD_WAITING;
-		wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0);
+		rcu_wait(atomic_read(&rnp->wakemask) != 0);
 		rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
 		raw_spin_lock_irqsave(&rnp->lock, flags);
-		mask = rnp->wakemask;
-		rnp->wakemask = 0;
+		mask = atomic_xchg(&rnp->wakemask, 0);
 		rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
 		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
 			if ((mask & 0x1) == 0)
@@ -1783,13 +1757,14 @@
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		rnp->node_kthread_task = t;
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
-		wake_up_process(t);
 		sp.sched_priority = 99;
 		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	}
 	return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
 }
 
+static void rcu_wake_one_boost_kthread(struct rcu_node *rnp);
+
 /*
  * Spawn all kthreads -- called as soon as the scheduler is running.
  */
@@ -1797,24 +1772,31 @@
 {
 	int cpu;
 	struct rcu_node *rnp;
+	struct task_struct *t;
 
 	rcu_kthreads_spawnable = 1;
 	for_each_possible_cpu(cpu) {
-		init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
 		per_cpu(rcu_cpu_has_work, cpu) = 0;
-		if (cpu_online(cpu))
+		if (cpu_online(cpu)) {
 			(void)rcu_spawn_one_cpu_kthread(cpu);
+			t = per_cpu(rcu_cpu_kthread_task, cpu);
+			if (t)
+				wake_up_process(t);
+		}
 	}
 	rnp = rcu_get_root(rcu_state);
-	init_waitqueue_head(&rnp->node_wq);
-	rcu_init_boost_waitqueue(rnp);
 	(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
-	if (NUM_RCU_NODES > 1)
+	if (rnp->node_kthread_task)
+		wake_up_process(rnp->node_kthread_task);
+	if (NUM_RCU_NODES > 1) {
 		rcu_for_each_leaf_node(rcu_state, rnp) {
-			init_waitqueue_head(&rnp->node_wq);
-			rcu_init_boost_waitqueue(rnp);
 			(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+			t = rnp->node_kthread_task;
+			if (t)
+				wake_up_process(t);
+			rcu_wake_one_boost_kthread(rnp);
 		}
+	}
 	return 0;
 }
 early_initcall(rcu_spawn_kthreads);
@@ -2218,14 +2200,14 @@
 	raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 }
 
-static void __cpuinit rcu_online_cpu(int cpu)
+static void __cpuinit rcu_prepare_cpu(int cpu)
 {
 	rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
 	rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
 	rcu_preempt_init_percpu_data(cpu);
 }
 
-static void __cpuinit rcu_online_kthreads(int cpu)
+static void __cpuinit rcu_prepare_kthreads(int cpu)
 {
 	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
 	struct rcu_node *rnp = rdp->mynode;
@@ -2239,6 +2221,31 @@
 }
 
 /*
+ * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state,
+ * but the RCU threads are woken on demand, and if demand is low this
+ * could be a while triggering the hung task watchdog.
+ *
+ * In order to avoid this, poke all tasks once the CPU is fully
+ * up and running.
+ */
+static void __cpuinit rcu_online_kthreads(int cpu)
+{
+	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+	struct rcu_node *rnp = rdp->mynode;
+	struct task_struct *t;
+
+	t = per_cpu(rcu_cpu_kthread_task, cpu);
+	if (t)
+		wake_up_process(t);
+
+	t = rnp->node_kthread_task;
+	if (t)
+		wake_up_process(t);
+
+	rcu_wake_one_boost_kthread(rnp);
+}
+
+/*
  * Handle CPU online/offline notification events.
  */
 static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
@@ -2251,10 +2258,11 @@
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		rcu_online_cpu(cpu);
-		rcu_online_kthreads(cpu);
+		rcu_prepare_cpu(cpu);
+		rcu_prepare_kthreads(cpu);
 		break;
 	case CPU_ONLINE:
+		rcu_online_kthreads(cpu);
 	case CPU_DOWN_FAILED:
 		rcu_node_kthread_setaffinity(rnp, -1);
 		rcu_cpu_kthread_setrt(cpu, 1);

diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 2576648..7b9a08b 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h

@@ -84,11 +84,9 @@
  * Dynticks per-CPU state.
  */
 struct rcu_dynticks {
-	int dynticks_nesting;	/* Track nesting level, sort of. */
-	int dynticks;		/* Even value for dynticks-idle, else odd. */
-	int dynticks_nmi;	/* Even value for either dynticks-idle or */
-				/*  not in nmi handler, else odd.  So this */
-				/*  remains even for nmi from irq handler. */
+	int dynticks_nesting;	/* Track irq/process nesting level. */
+	int dynticks_nmi_nesting; /* Track NMI nesting level. */
+	atomic_t dynticks;	/* Even value for dynticks-idle, else odd. */
 };
 
 /* RCU's kthread states for tracing. */
@@ -121,7 +119,9 @@
 				/*  elements that need to drain to allow the */
 				/*  current expedited grace period to */
 				/*  complete (only for TREE_PREEMPT_RCU). */
-	unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */
+	atomic_t wakemask;	/* CPUs whose kthread needs to be awakened. */
+				/*  Since this has meaning only for leaf */
+				/*  rcu_node structures, 32 bits suffices. */
 	unsigned long qsmaskinit;
 				/* Per-GP initial value for qsmask & expmask. */
 	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
@@ -159,9 +159,6 @@
 	struct task_struct *boost_kthread_task;
 				/* kthread that takes care of priority */
 				/*  boosting for this rcu_node structure. */
-	wait_queue_head_t boost_wq;
-				/* Wait queue on which to park the boost */
-				/*  kthread. */
 	unsigned int boost_kthread_status;
 				/* State of boost_kthread_task for tracing. */
 	unsigned long n_tasks_boosted;
@@ -188,9 +185,6 @@
 				/* kthread that takes care of this rcu_node */
 				/*  structure, for example, awakening the */
 				/*  per-CPU kthreads as needed. */
-	wait_queue_head_t node_wq;
-				/* Wait queue on which to park the per-node */
-				/*  kthread. */
 	unsigned int node_kthread_status;
 				/* State of node_kthread_task for tracing. */
 } ____cacheline_internodealigned_in_smp;
@@ -284,7 +278,6 @@
 	/* 3) dynticks interface. */
 	struct rcu_dynticks *dynticks;	/* Shared per-CPU dynticks state. */
 	int dynticks_snap;		/* Per-GP tracking for dynticks. */
-	int dynticks_nmi_snap;		/* Per-GP tracking for dynticks_nmi. */
 #endif /* #ifdef CONFIG_NO_HZ */
 
 	/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
@@ -337,6 +330,16 @@
 						/*  scheduling clock irq */
 						/*  before ratting on them. */
 
+#define rcu_wait(cond)							\
+do {									\
+	for (;;) {							\
+		set_current_state(TASK_INTERRUPTIBLE);			\
+		if (cond)						\
+			break;						\
+		schedule();						\
+	}								\
+	__set_current_state(TASK_RUNNING);				\
+} while (0)
 
 /*
  * RCU global state, including node hierarchy.  This hierarchy is
@@ -446,7 +449,6 @@
 static void rcu_preempt_send_cbs_to_online(void);
 static void __init __rcu_init_preempt(void);
 static void rcu_needs_cpu_flush(void);
-static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
 					  cpumask_var_t cm);

diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3f6559a..c8bff30 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h

@@ -1196,8 +1196,7 @@
 
 	for (;;) {
 		rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
-		wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks ||
-							rnp->exp_tasks);
+		rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
 		rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
 		more2boost = rcu_boost(rnp);
 		if (more2boost)
@@ -1275,14 +1274,6 @@
 }
 
 /*
- * Initialize the RCU-boost waitqueue.
- */
-static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
-{
-	init_waitqueue_head(&rnp->boost_wq);
-}
-
-/*
  * Create an RCU-boost kthread for the specified node if one does not
  * already exist.  We only create this kthread for preemptible RCU.
  * Returns zero if all is well, a negated errno otherwise.
@@ -1306,12 +1297,17 @@
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	rnp->boost_kthread_task = t;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
-	wake_up_process(t);
 	sp.sched_priority = RCU_KTHREAD_PRIO;
 	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	return 0;
 }
 
+static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+{
+	if (rnp->boost_kthread_task)
+		wake_up_process(rnp->boost_kthread_task);
+}
+
 #else /* #ifdef CONFIG_RCU_BOOST */
 
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@@ -1328,10 +1324,6 @@
 {
 }
 
-static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
-{
-}
-
 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
 						 struct rcu_node *rnp,
 						 int rnp_index)
@@ -1339,6 +1331,10 @@
 	return 0;
 }
 
+static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+{
+}
+
 #endif /* #else #ifdef CONFIG_RCU_BOOST */
 
 #ifndef CONFIG_SMP
@@ -1520,7 +1516,6 @@
 {
 	int c = 0;
 	int snap;
-	int snap_nmi;
 	int thatcpu;
 
 	/* Check for being in the holdoff period. */
@@ -1531,10 +1526,10 @@
 	for_each_online_cpu(thatcpu) {
 		if (thatcpu == cpu)
 			continue;
-		snap = per_cpu(rcu_dynticks, thatcpu).dynticks;
-		snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi;
+		snap = atomic_add_return(0, &per_cpu(rcu_dynticks,
+						     thatcpu).dynticks);
 		smp_mb(); /* Order sampling of snap with end of grace period. */
-		if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) {
+		if ((snap & 0x1) != 0) {
 			per_cpu(rcu_dyntick_drain, cpu) = 0;
 			per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
 			return rcu_needs_cpu_quick_check(cpu);

diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index aa0fd72..9678cc3 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c

@@ -69,10 +69,10 @@
 		   rdp->passed_quiesc, rdp->passed_quiesc_completed,
 		   rdp->qs_pending);
 #ifdef CONFIG_NO_HZ
-	seq_printf(m, " dt=%d/%d dn=%d df=%lu",
-		   rdp->dynticks->dynticks,
+	seq_printf(m, " dt=%d/%d/%d df=%lu",
+		   atomic_read(&rdp->dynticks->dynticks),
 		   rdp->dynticks->dynticks_nesting,
-		   rdp->dynticks->dynticks_nmi,
+		   rdp->dynticks->dynticks_nmi_nesting,
 		   rdp->dynticks_fqs);
 #endif /* #ifdef CONFIG_NO_HZ */
 	seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -141,9 +141,9 @@
 		   rdp->qs_pending);
 #ifdef CONFIG_NO_HZ
 	seq_printf(m, ",%d,%d,%d,%lu",
-		   rdp->dynticks->dynticks,
+		   atomic_read(&rdp->dynticks->dynticks),
 		   rdp->dynticks->dynticks_nesting,
-		   rdp->dynticks->dynticks_nmi,
+		   rdp->dynticks->dynticks_nmi_nesting,
 		   rdp->dynticks_fqs);
 #endif /* #ifdef CONFIG_NO_HZ */
 	seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -167,7 +167,7 @@
 {
 	seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
 #ifdef CONFIG_NO_HZ
-	seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
+	seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
 #endif /* #ifdef CONFIG_NO_HZ */
 	seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n");
 #ifdef CONFIG_TREE_PREEMPT_RCU

diff --git a/kernel/sched.c b/kernel/sched.c
index 5e43e9d..cbb3a0e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c

@@ -2573,7 +2573,26 @@
 	if (!next)
 		smp_send_reschedule(cpu);
 }
-#endif
+
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
+{
+	struct rq *rq;
+	int ret = 0;
+
+	rq = __task_rq_lock(p);
+	if (p->on_cpu) {
+		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+		ttwu_do_wakeup(rq, p, wake_flags);
+		ret = 1;
+	}
+	__task_rq_unlock(rq);
+
+	return ret;
+
+}
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+#endif /* CONFIG_SMP */
 
 static void ttwu_queue(struct task_struct *p, int cpu)
 {
@@ -2631,17 +2650,17 @@
 	while (p->on_cpu) {
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 		/*
-		 * If called from interrupt context we could have landed in the
-		 * middle of schedule(), in this case we should take care not
-		 * to spin on ->on_cpu if p is current, since that would
-		 * deadlock.
+		 * In case the architecture enables interrupts in
+		 * context_switch(), we cannot busy wait, since that
+		 * would lead to deadlocks when an interrupt hits and
+		 * tries to wake up @prev. So bail and do a complete
+		 * remote wakeup.
 		 */
-		if (p == current) {
-			ttwu_queue(p, cpu);
+		if (ttwu_activate_remote(p, wake_flags))
 			goto stat;
-		}
-#endif
+#else
 		cpu_relax();
+#endif
 	}
 	/*
 	 * Pairs with the smp_wmb() in finish_lock_switch().
@@ -5841,7 +5860,7 @@
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
 
-	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+	do_set_cpus_allowed(idle, cpumask_of(cpu));
 	/*
 	 * We're having a chicken and egg problem, even though we are
 	 * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -5929,6 +5948,16 @@
 }
 
 #ifdef CONFIG_SMP
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+	if (p->sched_class && p->sched_class->set_cpus_allowed)
+		p->sched_class->set_cpus_allowed(p, new_mask);
+	else {
+		cpumask_copy(&p->cpus_allowed, new_mask);
+		p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
+	}
+}
+
 /*
  * This is how migration works:
  *
@@ -5974,12 +6003,7 @@
 		goto out;
 	}
 
-	if (p->sched_class->set_cpus_allowed)
-		p->sched_class->set_cpus_allowed(p, new_mask);
-	else {
-		cpumask_copy(&p->cpus_allowed, new_mask);
-		p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
-	}
+	do_set_cpus_allowed(p, new_mask);
 
 	/* Can the task run on the task's current CPU? If so, we're done */
 	if (cpumask_test_cpu(task_cpu(p), new_mask))

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e32a9b7..433491c2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c

@@ -1076,8 +1076,6 @@
 	se->on_rq = 0;
 	update_cfs_load(cfs_rq, 0);
 	account_entity_dequeue(cfs_rq, se);
-	update_min_vruntime(cfs_rq);
-	update_cfs_shares(cfs_rq);
 
 	/*
 	 * Normalize the entity after updating the min_vruntime because the
@@ -1086,6 +1084,9 @@
 	 */
 	if (!(flags & DEQUEUE_SLEEP))
 		se->vruntime -= cfs_rq->min_vruntime;
+
+	update_min_vruntime(cfs_rq);
+	update_cfs_shares(cfs_rq);
 }
 
 /*

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 64b2a37..88725c9 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c

@@ -1263,6 +1263,7 @@
 	if (!cpumask_test_cpu(this_cpu, lowest_mask))
 		this_cpu = -1; /* Skip this_cpu opt if not among lowest */
 
+	rcu_read_lock();
 	for_each_domain(cpu, sd) {
 		if (sd->flags & SD_WAKE_AFFINE) {
 			int best_cpu;
@@ -1272,15 +1273,20 @@
 			 * remote processor.
 			 */
 			if (this_cpu != -1 &&
-			    cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
+			    cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
+				rcu_read_unlock();
 				return this_cpu;
+			}
 
 			best_cpu = cpumask_first_and(lowest_mask,
 						     sched_domain_span(sd));
-			if (best_cpu < nr_cpu_ids)
+			if (best_cpu < nr_cpu_ids) {
+				rcu_read_unlock();
 				return best_cpu;
+			}
 		}
 	}
+	rcu_read_unlock();
 
 	/*
 	 * And finally, if there were no matches within the domains

diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 48ddf43..331e01b 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h

@@ -37,7 +37,7 @@
 
 #ifdef CONFIG_SMP
 		/* domain-specific stats */
-		preempt_disable();
+		rcu_read_lock();
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 
@@ -64,7 +64,7 @@
 			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
 			    sd->ttwu_move_balance);
 		}
-		preempt_enable();
+		rcu_read_unlock();
 #endif
 	}
 	kfree(mask_str);

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d017c2c..1ee417f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c

@@ -109,12 +109,18 @@
 static void ftrace_global_list_func(unsigned long ip,
 				    unsigned long parent_ip)
 {
-	struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/
+	struct ftrace_ops *op;
 
+	if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
+		return;
+
+	trace_recursion_set(TRACE_GLOBAL_BIT);
+	op = rcu_dereference_raw(ftrace_global_list); /*see above*/
 	while (op != &ftrace_list_end) {
 		op->func(ip, parent_ip);
 		op = rcu_dereference_raw(op->next); /*see above*/
 	};
+	trace_recursion_clear(TRACE_GLOBAL_BIT);
 }
 
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
@@ -1638,12 +1644,12 @@
 	ftrace_run_update_code(command);
 }
 
-static void ftrace_startup(struct ftrace_ops *ops, int command)
+static int ftrace_startup(struct ftrace_ops *ops, int command)
 {
 	bool hash_enable = true;
 
 	if (unlikely(ftrace_disabled))
-		return;
+		return -ENODEV;
 
 	ftrace_start_up++;
 	command |= FTRACE_ENABLE_CALLS;
@@ -1662,6 +1668,8 @@
 		ftrace_hash_rec_enable(ops, 1);
 
 	ftrace_startup_enable(command);
+
+	return 0;
 }
 
 static void ftrace_shutdown(struct ftrace_ops *ops, int command)
@@ -2501,7 +2509,7 @@
 
 	ret = __register_ftrace_function(&trace_probe_ops);
 	if (!ret)
-		ftrace_startup(&trace_probe_ops, 0);
+		ret = ftrace_startup(&trace_probe_ops, 0);
 
 	ftrace_probe_registered = 1;
 }
@@ -3466,7 +3474,11 @@
 static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
 static inline void ftrace_startup_enable(int command) { }
 /* Keep as macros so we do not need to define the commands */
-# define ftrace_startup(ops, command)	do { } while (0)
+# define ftrace_startup(ops, command)			\
+	({						\
+		(ops)->flags |= FTRACE_OPS_FL_ENABLED;	\
+		0;					\
+	})
 # define ftrace_shutdown(ops, command)	do { } while (0)
 # define ftrace_startup_sysctl()	do { } while (0)
 # define ftrace_shutdown_sysctl()	do { } while (0)
@@ -3484,6 +3496,10 @@
 {
 	struct ftrace_ops *op;
 
+	if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
+		return;
+
+	trace_recursion_set(TRACE_INTERNAL_BIT);
 	/*
 	 * Some of the ops may be dynamically allocated,
 	 * they must be freed after a synchronize_sched().
@@ -3496,6 +3512,7 @@
 		op = rcu_dereference_raw(op->next);
 	};
 	preempt_enable_notrace();
+	trace_recursion_clear(TRACE_INTERNAL_BIT);
 }
 
 static void clear_ftrace_swapper(void)
@@ -3799,7 +3816,7 @@
 
 	ret = __register_ftrace_function(ops);
 	if (!ret)
-		ftrace_startup(ops, 0);
+		ret = ftrace_startup(ops, 0);
 
 
  out_unlock:
@@ -4045,7 +4062,7 @@
 	ftrace_graph_return = retfunc;
 	ftrace_graph_entry = entryfunc;
 
-	ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
+	ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
 
 out:
 	mutex_unlock(&ftrace_lock);

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0ef7b4b..b0c7aa4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c

@@ -2216,7 +2216,7 @@
 
 	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
 		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
-		    current->trace_recursion,
+		    trace_recursion_buffer(),
 		    hardirq_count() >> HARDIRQ_SHIFT,
 		    softirq_count() >> SOFTIRQ_SHIFT,
 		    in_nmi());
@@ -2226,9 +2226,9 @@
 
 static inline int trace_recursive_lock(void)
 {
-	current->trace_recursion++;
+	trace_recursion_inc();
 
-	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+	if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
 		return 0;
 
 	trace_recursive_fail();
@@ -2238,9 +2238,9 @@
 
 static inline void trace_recursive_unlock(void)
 {
-	WARN_ON_ONCE(!current->trace_recursion);
+	WARN_ON_ONCE(!trace_recursion_buffer());
 
-	current->trace_recursion--;
+	trace_recursion_dec();
 }
 
 #else

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6b69c4b..229f859 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h

@@ -784,4 +784,19 @@
 	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
 #include "trace_entries.h"
 
+/* Only current can touch trace_recursion */
+#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
+#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
+
+/* Ring buffer has the 10 LSB bits to count */
+#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
+
+/* for function tracing recursion */
+#define TRACE_INTERNAL_BIT		(1<<11)
+#define TRACE_GLOBAL_BIT		(1<<12)
+
+#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0)
+#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0)
+#define trace_recursion_test(bit)	((current)->trace_recursion & (bit))
+
 #endif /* _LINUX_KERNEL_TRACE_H */

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2fe1103..686ec39 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c

@@ -1657,7 +1657,12 @@
 
 static __init void event_trace_self_test_with_function(void)
 {
-	register_ftrace_function(&trace_ops);
+	int ret;
+	ret = register_ftrace_function(&trace_ops);
+	if (WARN_ON(ret < 0)) {
+		pr_info("Failed to enable function tracer for event tests\n");
+		return;
+	}
 	pr_info("Running tests again, along with the function tracer\n");
 	event_trace_self_tests();
 	unregister_ftrace_function(&trace_ops);

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index cf535cc..e37de49 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c

@@ -353,6 +353,33 @@
 }
 EXPORT_SYMBOL(ftrace_print_symbols_seq);
 
+#if BITS_PER_LONG == 32
+const char *
+ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
+			 const struct trace_print_flags_u64 *symbol_array)
+{
+	int i;
+	const char *ret = p->buffer + p->len;
+
+	for (i = 0;  symbol_array[i].name; i++) {
+
+		if (val != symbol_array[i].mask)
+			continue;
+
+		trace_seq_puts(p, symbol_array[i].name);
+		break;
+	}
+
+	if (!p->len)
+		trace_seq_printf(p, "0x%llx", val);
+
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
+#endif
+
 const char *
 ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 {

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7daa4b0..3d0c56a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c

@@ -415,15 +415,13 @@
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 /* prepare/enable/disable routines */
-static int watchdog_prepare_cpu(int cpu)
+static void watchdog_prepare_cpu(int cpu)
 {
 	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
 
 	WARN_ON(per_cpu(softlockup_watchdog, cpu));
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer->function = watchdog_timer_fn;
-
-	return 0;
 }
 
 static int watchdog_enable(int cpu)
@@ -542,17 +540,16 @@
 cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
 	int hotcpu = (unsigned long)hcpu;
-	int err = 0;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		err = watchdog_prepare_cpu(hotcpu);
+		watchdog_prepare_cpu(hotcpu);
 		break;
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		if (watchdog_enabled)
-			err = watchdog_enable(hotcpu);
+			watchdog_enable(hotcpu);
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_UP_CANCELED:

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 619313e..507a22f 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c

@@ -144,7 +144,7 @@
 
 #define HARDIRQ_ENTER()				\
 	local_irq_disable();			\
-	irq_enter();				\
+	__irq_enter();				\
 	WARN_ON(!in_irq());
 
 #define HARDIRQ_EXIT()				\

diff --git a/mm/filemap.c b/mm/filemap.c
index bcdc393..d7b1057 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -1982,16 +1982,26 @@
 int file_remove_suid(struct file *file)
 {
 	struct dentry *dentry = file->f_path.dentry;
-	int killsuid = should_remove_suid(dentry);
-	int killpriv = security_inode_need_killpriv(dentry);
+	struct inode *inode = dentry->d_inode;
+	int killsuid;
+	int killpriv;
 	int error = 0;
 
+	/* Fast path for nothing security related */
+	if (IS_NOSEC(inode))
+		return 0;
+
+	killsuid = should_remove_suid(dentry);
+	killpriv = security_inode_need_killpriv(dentry);
+
 	if (killpriv < 0)
 		return killpriv;
 	if (killpriv)
 		error = security_inode_killpriv(dentry);
 	if (!error && killsuid)
 		error = __remove_suid(dentry, killsuid);
+	if (!error)
+		inode->i_flags |= S_NOSEC;
 
 	return error;
 }
@@ -2327,7 +2337,7 @@
 repeat:
 	page = find_lock_page(mapping, index);
 	if (page)
-		return page;
+		goto found;
 
 	page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
 	if (!page)
@@ -2340,6 +2350,8 @@
 			goto repeat;
 		return NULL;
 	}
+found:
+	wait_on_page_writeback(page);
 	return page;
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);

diff --git a/mm/maccess.c b/mm/maccess.c
index e2b6f56..4cee182 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c

@@ -15,10 +15,10 @@
  * happens, handle that and return -EFAULT.
  */
 
-long __weak probe_kernel_read(void *dst, void *src, size_t size)
+long __weak probe_kernel_read(void *dst, const void *src, size_t size)
     __attribute__((alias("__probe_kernel_read")));
 
-long __probe_kernel_read(void *dst, void *src, size_t size)
+long __probe_kernel_read(void *dst, const void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
@@ -43,10 +43,10 @@
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long __weak probe_kernel_write(void *dst, void *src, size_t size)
+long __weak probe_kernel_write(void *dst, const void *src, size_t size)
     __attribute__((alias("__probe_kernel_write")));
 
-long __probe_kernel_write(void *dst, void *src, size_t size)
+long __probe_kernel_write(void *dst, const void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();

diff --git a/mm/rmap.c b/mm/rmap.c
index 3a39b51..0eb463e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c

@@ -352,6 +352,11 @@
  * The page might have been remapped to a different anon_vma or the anon_vma
  * returned may already be freed (and even reused).
  *
+ * In case it was remapped to a different anon_vma, the new anon_vma will be a
+ * child of the old anon_vma, and the anon_vma lifetime rules will therefore
+ * ensure that any anon_vma obtained from the page will still be valid for as
+ * long as we observe page_mapped() [ hence all those page_mapped() tests ].
+ *
  * All users of this function must be very careful when walking the anon_vma
  * chain and verify that the page in question is indeed mapped in it
  * [ something equivalent to page_mapped_in_vma() ].
@@ -405,6 +410,7 @@
 struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma = NULL;
+	struct anon_vma *root_anon_vma;
 	unsigned long anon_mapping;
 
 	rcu_read_lock();
@@ -415,13 +421,15 @@
 		goto out;
 
 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
-	if (mutex_trylock(&anon_vma->root->mutex)) {
+	root_anon_vma = ACCESS_ONCE(anon_vma->root);
+	if (mutex_trylock(&root_anon_vma->mutex)) {
 		/*
-		 * If we observe a !0 refcount, then holding the lock ensures
-		 * the anon_vma will not go away, see __put_anon_vma().
+		 * If the page is still mapped, then this anon_vma is still
+		 * its anon_vma, and holding the mutex ensures that it will
+		 * not go away, see anon_vma_free().
 		 */
-		if (!atomic_read(&anon_vma->refcount)) {
-			anon_vma_unlock(anon_vma);
+		if (!page_mapped(page)) {
+			mutex_unlock(&root_anon_vma->mutex);
 			anon_vma = NULL;
 		}
 		goto out;
@@ -1014,7 +1022,7 @@
 		return;
 
 	VM_BUG_ON(!PageLocked(page));
-	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+	/* address might be in next vma when migration races vma_adjust */
 	if (first)
 		__page_set_anon_rmap(page, vma, address, exclusive);
 	else
@@ -1709,7 +1717,7 @@
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(!anon_vma);
-	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+	/* address might be in next vma when migration races vma_adjust */
 	first = atomic_inc_and_test(&page->_mapcount);
 	if (first)
 		__hugepage_set_anon_rmap(page, vma, address, 0);

diff --git a/mm/shmem.c b/mm/shmem.c
index 1acfb26..d221a1c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c

@@ -1114,8 +1114,8 @@
 		delete_from_page_cache(page);
 		shmem_swp_set(info, entry, swap.val);
 		shmem_swp_unmap(entry);
-		spin_unlock(&info->lock);
 		swap_shmem_alloc(swap);
+		spin_unlock(&info->lock);
 		BUG_ON(page_mapped(page));
 		swap_writepage(page, wbc);
 		return 0;

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8d83f9d..b84d739 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c

@@ -13,10 +13,6 @@
  *	and need to be refreshed, or when a packet was damaged in transit.
  *	This may be have to be moved to the VFS layer.
  *
- *  NB: BSD uses a more intelligent approach to guessing when a request
- *  or reply has been lost by keeping the RTO estimate for each procedure.
- *  We currently make do with a constant timeout value.
- *
  *  Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
  *  Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
  */
@@ -32,7 +28,9 @@
 #include <linux/slab.h>
 #include <linux/utsname.h>
 #include <linux/workqueue.h>
+#include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/un.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
@@ -298,22 +296,27 @@
 	 * up a string representation of the passed-in address.
 	 */
 	if (args->servername == NULL) {
+		struct sockaddr_un *sun =
+				(struct sockaddr_un *)args->address;
+		struct sockaddr_in *sin =
+				(struct sockaddr_in *)args->address;
+		struct sockaddr_in6 *sin6 =
+				(struct sockaddr_in6 *)args->address;
+
 		servername[0] = '\0';
 		switch (args->address->sa_family) {
-		case AF_INET: {
-			struct sockaddr_in *sin =
-					(struct sockaddr_in *)args->address;
+		case AF_LOCAL:
+			snprintf(servername, sizeof(servername), "%s",
+				 sun->sun_path);
+			break;
+		case AF_INET:
 			snprintf(servername, sizeof(servername), "%pI4",
 				 &sin->sin_addr.s_addr);
 			break;
-		}
-		case AF_INET6: {
-			struct sockaddr_in6 *sin =
-					(struct sockaddr_in6 *)args->address;
+		case AF_INET6:
 			snprintf(servername, sizeof(servername), "%pI6",
-				 &sin->sin6_addr);
+				 &sin6->sin6_addr);
 			break;
-		}
 		default:
 			/* caller wants default server name, but
 			 * address family isn't recognized. */

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c652e4c..9a80a92 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c

@@ -16,6 +16,7 @@
 
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/un.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/kernel.h>
@@ -32,6 +33,8 @@
 # define RPCDBG_FACILITY	RPCDBG_BIND
 #endif
 
+#define RPCBIND_SOCK_PATHNAME	"/var/run/rpcbind.sock"
+
 #define RPCBIND_PROGRAM		(100000u)
 #define RPCBIND_PORT		(111u)
 
@@ -158,20 +161,69 @@
 	kfree(map);
 }
 
-static const struct sockaddr_in rpcb_inaddr_loopback = {
-	.sin_family		= AF_INET,
-	.sin_addr.s_addr	= htonl(INADDR_LOOPBACK),
-	.sin_port		= htons(RPCBIND_PORT),
-};
+/*
+ * Returns zero on success, otherwise a negative errno value
+ * is returned.
+ */
+static int rpcb_create_local_unix(void)
+{
+	static const struct sockaddr_un rpcb_localaddr_rpcbind = {
+		.sun_family		= AF_LOCAL,
+		.sun_path		= RPCBIND_SOCK_PATHNAME,
+	};
+	struct rpc_create_args args = {
+		.net		= &init_net,
+		.protocol	= XPRT_TRANSPORT_LOCAL,
+		.address	= (struct sockaddr *)&rpcb_localaddr_rpcbind,
+		.addrsize	= sizeof(rpcb_localaddr_rpcbind),
+		.servername	= "localhost",
+		.program	= &rpcb_program,
+		.version	= RPCBVERS_2,
+		.authflavor	= RPC_AUTH_NULL,
+	};
+	struct rpc_clnt *clnt, *clnt4;
+	int result = 0;
 
-static DEFINE_MUTEX(rpcb_create_local_mutex);
+	/*
+	 * Because we requested an RPC PING at transport creation time,
+	 * this works only if the user space portmapper is rpcbind, and
+	 * it's listening on AF_LOCAL on the named socket.
+	 */
+	clnt = rpc_create(&args);
+	if (IS_ERR(clnt)) {
+		dprintk("RPC:       failed to create AF_LOCAL rpcbind "
+				"client (errno %ld).\n", PTR_ERR(clnt));
+		result = -PTR_ERR(clnt);
+		goto out;
+	}
+
+	clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
+	if (IS_ERR(clnt4)) {
+		dprintk("RPC:       failed to bind second program to "
+				"rpcbind v4 client (errno %ld).\n",
+				PTR_ERR(clnt4));
+		clnt4 = NULL;
+	}
+
+	/* Protected by rpcb_create_local_mutex */
+	rpcb_local_clnt = clnt;
+	rpcb_local_clnt4 = clnt4;
+
+out:
+	return result;
+}
 
 /*
  * Returns zero on success, otherwise a negative errno value
  * is returned.
  */
-static int rpcb_create_local(void)
+static int rpcb_create_local_net(void)
 {
+	static const struct sockaddr_in rpcb_inaddr_loopback = {
+		.sin_family		= AF_INET,
+		.sin_addr.s_addr	= htonl(INADDR_LOOPBACK),
+		.sin_port		= htons(RPCBIND_PORT),
+	};
 	struct rpc_create_args args = {
 		.net		= &init_net,
 		.protocol	= XPRT_TRANSPORT_TCP,
@@ -186,13 +238,6 @@
 	struct rpc_clnt *clnt, *clnt4;
 	int result = 0;
 
-	if (rpcb_local_clnt)
-		return result;
-
-	mutex_lock(&rpcb_create_local_mutex);
-	if (rpcb_local_clnt)
-		goto out;
-
 	clnt = rpc_create(&args);
 	if (IS_ERR(clnt)) {
 		dprintk("RPC:       failed to create local rpcbind "
@@ -214,10 +259,34 @@
 		clnt4 = NULL;
 	}
 
+	/* Protected by rpcb_create_local_mutex */
 	rpcb_local_clnt = clnt;
 	rpcb_local_clnt4 = clnt4;
 
 out:
+	return result;
+}
+
+/*
+ * Returns zero on success, otherwise a negative errno value
+ * is returned.
+ */
+static int rpcb_create_local(void)
+{
+	static DEFINE_MUTEX(rpcb_create_local_mutex);
+	int result = 0;
+
+	if (rpcb_local_clnt)
+		return result;
+
+	mutex_lock(&rpcb_create_local_mutex);
+	if (rpcb_local_clnt)
+		goto out;
+
+	if (rpcb_create_local_unix() != 0)
+		result = rpcb_create_local_net();
+
+out:
 	mutex_unlock(&rpcb_create_local_mutex);
 	return result;
 }

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 08e05a8..2b90292 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c

@@ -942,6 +942,8 @@
 			if (progp->pg_vers[i]->vs_hidden)
 				continue;
 
+			dprintk("svc: attempting to unregister %sv%u\n",
+				progp->pg_name, i);
 			__svc_unregister(progp->pg_prog, i, progp->pg_name);
 		}
 	}

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b7d435c..af04f77 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c

@@ -387,6 +387,33 @@
 	return len;
 }
 
+static int svc_partial_recvfrom(struct svc_rqst *rqstp,
+				struct kvec *iov, int nr,
+				int buflen, unsigned int base)
+{
+	size_t save_iovlen;
+	void __user *save_iovbase;
+	unsigned int i;
+	int ret;
+
+	if (base == 0)
+		return svc_recvfrom(rqstp, iov, nr, buflen);
+
+	for (i = 0; i < nr; i++) {
+		if (iov[i].iov_len > base)
+			break;
+		base -= iov[i].iov_len;
+	}
+	save_iovlen = iov[i].iov_len;
+	save_iovbase = iov[i].iov_base;
+	iov[i].iov_len -= base;
+	iov[i].iov_base += base;
+	ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
+	iov[i].iov_len = save_iovlen;
+	iov[i].iov_base = save_iovbase;
+	return ret;
+}
+
 /*
  * Set socket snd and rcv buffer lengths
  */
@@ -409,7 +436,6 @@
 	lock_sock(sock->sk);
 	sock->sk->sk_sndbuf = snd * 2;
 	sock->sk->sk_rcvbuf = rcv * 2;
-	sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
 	sock->sk->sk_write_space(sock->sk);
 	release_sock(sock->sk);
 #endif
@@ -884,6 +910,56 @@
 	return NULL;
 }
 
+static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
+{
+	unsigned int i, len, npages;
+
+	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+		return 0;
+	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		if (rqstp->rq_pages[i] != NULL)
+			put_page(rqstp->rq_pages[i]);
+		BUG_ON(svsk->sk_pages[i] == NULL);
+		rqstp->rq_pages[i] = svsk->sk_pages[i];
+		svsk->sk_pages[i] = NULL;
+	}
+	rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
+	return len;
+}
+
+static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
+{
+	unsigned int i, len, npages;
+
+	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+		return;
+	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		svsk->sk_pages[i] = rqstp->rq_pages[i];
+		rqstp->rq_pages[i] = NULL;
+	}
+}
+
+static void svc_tcp_clear_pages(struct svc_sock *svsk)
+{
+	unsigned int i, len, npages;
+
+	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+		goto out;
+	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		BUG_ON(svsk->sk_pages[i] == NULL);
+		put_page(svsk->sk_pages[i]);
+		svsk->sk_pages[i] = NULL;
+	}
+out:
+	svsk->sk_tcplen = 0;
+}
+
 /*
  * Receive data.
  * If we haven't gotten the record length yet, get the next four bytes.
@@ -893,31 +969,15 @@
 static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 {
 	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
+	unsigned int want;
 	int len;
 
-	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
-		/* sndbuf needs to have room for one request
-		 * per thread, otherwise we can stall even when the
-		 * network isn't a bottleneck.
-		 *
-		 * We count all threads rather than threads in a
-		 * particular pool, which provides an upper bound
-		 * on the number of threads which will access the socket.
-		 *
-		 * rcvbuf just needs to be able to hold a few requests.
-		 * Normally they will be removed from the queue
-		 * as soon a a complete request arrives.
-		 */
-		svc_sock_setbufsize(svsk->sk_sock,
-				    (serv->sv_nrthreads+3) * serv->sv_max_mesg,
-				    3 * serv->sv_max_mesg);
-
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
 	if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
-		int		want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
 		struct kvec	iov;
 
+		want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
 		iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
 		iov.iov_len  = want;
 		if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
@@ -927,7 +987,7 @@
 		if (len < want) {
 			dprintk("svc: short recvfrom while reading record "
 				"length (%d of %d)\n", len, want);
-			goto err_again; /* record header not complete */
+			return -EAGAIN;
 		}
 
 		svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -954,83 +1014,75 @@
 		}
 	}
 
-	/* Check whether enough data is available */
-	len = svc_recv_available(svsk);
-	if (len < 0)
-		goto error;
+	if (svsk->sk_reclen < 8)
+		goto err_delete; /* client is nuts. */
 
-	if (len < svsk->sk_reclen) {
-		dprintk("svc: incomplete TCP record (%d of %d)\n",
-			len, svsk->sk_reclen);
-		goto err_again;	/* record not complete */
-	}
 	len = svsk->sk_reclen;
-	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
 	return len;
- error:
-	if (len == -EAGAIN)
-		dprintk("RPC: TCP recv_record got EAGAIN\n");
+error:
+	dprintk("RPC: TCP recv_record got %d\n", len);
 	return len;
- err_delete:
+err_delete:
 	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
- err_again:
 	return -EAGAIN;
 }
 
-static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
-			       struct rpc_rqst **reqpp, struct kvec *vec)
+static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 {
+	struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
 	struct rpc_rqst *req = NULL;
-	u32 *p;
-	u32 xid;
-	u32 calldir;
-	int len;
+	struct kvec *src, *dst;
+	__be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
+	__be32 xid;
+	__be32 calldir;
 
-	len = svc_recvfrom(rqstp, vec, 1, 8);
-	if (len < 0)
-		goto error;
-
-	p = (u32 *)rqstp->rq_arg.head[0].iov_base;
 	xid = *p++;
 	calldir = *p;
 
-	if (calldir == 0) {
-		/* REQUEST is the most common case */
-		vec[0] = rqstp->rq_arg.head[0];
-	} else {
-		/* REPLY */
-		struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
+	if (bc_xprt)
+		req = xprt_lookup_rqst(bc_xprt, xid);
 
-		if (bc_xprt)
-			req = xprt_lookup_rqst(bc_xprt, xid);
-
-		if (!req) {
-			printk(KERN_NOTICE
-				"%s: Got unrecognized reply: "
-				"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
-				__func__, ntohl(calldir),
-				bc_xprt, xid);
-			vec[0] = rqstp->rq_arg.head[0];
-			goto out;
-		}
-
-		memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
-		       sizeof(struct xdr_buf));
-		/* copy the xid and call direction */
-		memcpy(req->rq_private_buf.head[0].iov_base,
-		       rqstp->rq_arg.head[0].iov_base, 8);
-		vec[0] = req->rq_private_buf.head[0];
+	if (!req) {
+		printk(KERN_NOTICE
+			"%s: Got unrecognized reply: "
+			"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
+			__func__, ntohl(calldir),
+			bc_xprt, xid);
+		return -EAGAIN;
 	}
- out:
-	vec[0].iov_base += 8;
-	vec[0].iov_len -= 8;
-	len = svsk->sk_reclen - 8;
- error:
-	*reqpp = req;
-	return len;
+
+	memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
+	/*
+	 * XXX!: cheating for now!  Only copying HEAD.
+	 * But we know this is good enough for now (in fact, for any
+	 * callback reply in the forseeable future).
+	 */
+	dst = &req->rq_private_buf.head[0];
+	src = &rqstp->rq_arg.head[0];
+	if (dst->iov_len < src->iov_len)
+		return -EAGAIN; /* whatever; just giving up. */
+	memcpy(dst->iov_base, src->iov_base, src->iov_len);
+	xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
+	rqstp->rq_arg.len = 0;
+	return 0;
 }
 
+static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
+{
+	int i = 0;
+	int t = 0;
+
+	while (t < len) {
+		vec[i].iov_base = page_address(pages[i]);
+		vec[i].iov_len = PAGE_SIZE;
+		i++;
+		t += PAGE_SIZE;
+	}
+	return i;
+}
+
+
 /*
  * Receive data from a TCP socket.
  */
@@ -1041,8 +1093,10 @@
 	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	int		len;
 	struct kvec *vec;
-	int pnum, vlen;
-	struct rpc_rqst *req = NULL;
+	unsigned int want, base;
+	__be32 *p;
+	__be32 calldir;
+	int pnum;
 
 	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
 		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
@@ -1053,87 +1107,73 @@
 	if (len < 0)
 		goto error;
 
+	base = svc_tcp_restore_pages(svsk, rqstp);
+	want = svsk->sk_reclen - base;
+
 	vec = rqstp->rq_vec;
-	vec[0] = rqstp->rq_arg.head[0];
-	vlen = PAGE_SIZE;
 
-	/*
-	 * We have enough data for the whole tcp record. Let's try and read the
-	 * first 8 bytes to get the xid and the call direction. We can use this
-	 * to figure out if this is a call or a reply to a callback. If
-	 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
-	 * In that case, don't bother with the calldir and just read the data.
-	 * It will be rejected in svc_process.
-	 */
-	if (len >= 8) {
-		len = svc_process_calldir(svsk, rqstp, &req, vec);
-		if (len < 0)
-			goto err_again;
-		vlen -= 8;
-	}
+	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
+						svsk->sk_reclen);
 
-	pnum = 1;
-	while (vlen < len) {
-		vec[pnum].iov_base = (req) ?
-			page_address(req->rq_private_buf.pages[pnum - 1]) :
-			page_address(rqstp->rq_pages[pnum]);
-		vec[pnum].iov_len = PAGE_SIZE;
-		pnum++;
-		vlen += PAGE_SIZE;
-	}
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
 
 	/* Now receive data */
-	len = svc_recvfrom(rqstp, vec, pnum, len);
-	if (len < 0)
-		goto err_again;
-
-	/*
-	 * Account for the 8 bytes we read earlier
-	 */
-	len += 8;
-
-	if (req) {
-		xprt_complete_rqst(req->rq_task, len);
-		len = 0;
-		goto out;
+	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
+	if (len >= 0)
+		svsk->sk_tcplen += len;
+	if (len != want) {
+		if (len < 0 && len != -EAGAIN)
+			goto err_other;
+		svc_tcp_save_pages(svsk, rqstp);
+		dprintk("svc: incomplete TCP record (%d of %d)\n",
+			svsk->sk_tcplen, svsk->sk_reclen);
+		goto err_noclose;
 	}
-	dprintk("svc: TCP complete record (%d bytes)\n", len);
-	rqstp->rq_arg.len = len;
+
+	rqstp->rq_arg.len = svsk->sk_reclen;
 	rqstp->rq_arg.page_base = 0;
-	if (len <= rqstp->rq_arg.head[0].iov_len) {
-		rqstp->rq_arg.head[0].iov_len = len;
+	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
+		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
 		rqstp->rq_arg.page_len = 0;
-	} else {
-		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
-	}
+	} else
+		rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
 
 	rqstp->rq_xprt_ctxt   = NULL;
 	rqstp->rq_prot	      = IPPROTO_TCP;
 
-out:
+	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
+	calldir = p[1];
+	if (calldir)
+		len = receive_cb_reply(svsk, rqstp);
+
 	/* Reset TCP read info */
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
+	/* If we have more data, signal svc_xprt_enqueue() to try again */
+	if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
+		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+
+	if (len < 0)
+		goto error;
 
 	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpcnt++;
 
-	return len;
+	dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
+	return rqstp->rq_arg.len;
 
-err_again:
-	if (len == -EAGAIN) {
-		dprintk("RPC: TCP recvfrom got EAGAIN\n");
-		return len;
-	}
 error:
-	if (len != -EAGAIN) {
-		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
-		       svsk->sk_xprt.xpt_server->sv_name, -len);
-		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-	}
+	if (len != -EAGAIN)
+		goto err_other;
+	dprintk("RPC: TCP recvfrom got EAGAIN\n");
 	return -EAGAIN;
+err_other:
+	printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
+	       svsk->sk_xprt.xpt_server->sv_name, -len);
+	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+err_noclose:
+	return -EAGAIN;	/* record not complete */
 }
 
 /*
@@ -1304,18 +1344,10 @@
 
 		svsk->sk_reclen = 0;
 		svsk->sk_tcplen = 0;
+		memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
 
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
 
-		/* initialise setting must have enough space to
-		 * receive and respond to one request.
-		 * svc_tcp_recvfrom will re-adjust if necessary
-		 */
-		svc_sock_setbufsize(svsk->sk_sock,
-				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
-				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
-
-		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		if (sk->sk_state != TCP_ESTABLISHED)
 			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1379,8 +1411,14 @@
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)
 		svc_udp_init(svsk, serv);
-	else
+	else {
+		/* initialise setting must have enough space to
+		 * receive and respond to one request.
+		 */
+		svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
+					4 * serv->sv_max_mesg);
 		svc_tcp_init(svsk, serv);
+	}
 
 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
 				svsk, svsk->sk_sk);
@@ -1562,8 +1600,10 @@
 
 	svc_sock_detach(xprt);
 
-	if (!test_bit(XPT_LISTENER, &xprt->xpt_flags))
+	if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
+		svc_tcp_clear_pages(svsk);
 		kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
+	}
 }
 
 /*

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 679cd67..f008c14 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c

@@ -638,6 +638,25 @@
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
+/**
+ * xdr_init_decode - Initialize an xdr_stream for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to XDR buffer from which to decode data
+ * @pages: list of pages to decode into
+ * @len: length in bytes of buffer in pages
+ */
+void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
+			   struct page **pages, unsigned int len)
+{
+	memset(buf, 0, sizeof(*buf));
+	buf->pages =  pages;
+	buf->page_len =  len;
+	buf->buflen =  len;
+	buf->len = len;
+	xdr_init_decode(xdr, buf, NULL);
+}
+EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
+
 static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
 	__be32 *p = xdr->p;

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bf005d3..72abb73 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c

@@ -19,6 +19,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/capability.h>
@@ -28,6 +29,7 @@
 #include <linux/in.h>
 #include <linux/net.h>
 #include <linux/mm.h>
+#include <linux/un.h>
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
@@ -45,6 +47,9 @@
 #include <net/tcp.h>
 
 #include "sunrpc.h"
+
+static void xs_close(struct rpc_xprt *xprt);
+
 /*
  * xprtsock tunables
  */
@@ -261,6 +266,11 @@
 	return (struct sockaddr *) &xprt->addr;
 }
 
+static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt)
+{
+	return (struct sockaddr_un *) &xprt->addr;
+}
+
 static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
 {
 	return (struct sockaddr_in *) &xprt->addr;
@@ -276,23 +286,34 @@
 	struct sockaddr *sap = xs_addr(xprt);
 	struct sockaddr_in6 *sin6;
 	struct sockaddr_in *sin;
+	struct sockaddr_un *sun;
 	char buf[128];
 
-	(void)rpc_ntop(sap, buf, sizeof(buf));
-	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
-
 	switch (sap->sa_family) {
+	case AF_LOCAL:
+		sun = xs_addr_un(xprt);
+		strlcpy(buf, sun->sun_path, sizeof(buf));
+		xprt->address_strings[RPC_DISPLAY_ADDR] =
+						kstrdup(buf, GFP_KERNEL);
+		break;
 	case AF_INET:
+		(void)rpc_ntop(sap, buf, sizeof(buf));
+		xprt->address_strings[RPC_DISPLAY_ADDR] =
+						kstrdup(buf, GFP_KERNEL);
 		sin = xs_addr_in(xprt);
 		snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
 		break;
 	case AF_INET6:
+		(void)rpc_ntop(sap, buf, sizeof(buf));
+		xprt->address_strings[RPC_DISPLAY_ADDR] =
+						kstrdup(buf, GFP_KERNEL);
 		sin6 = xs_addr_in6(xprt);
 		snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
 		break;
 	default:
 		BUG();
 	}
+
 	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
 }
 
@@ -495,6 +516,70 @@
 	return ret;
 }
 
+/*
+ * Construct a stream transport record marker in @buf.
+ */
+static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
+{
+	u32 reclen = buf->len - sizeof(rpc_fraghdr);
+	rpc_fraghdr *base = buf->head[0].iov_base;
+	*base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
+}
+
+/**
+ * xs_local_send_request - write an RPC request to an AF_LOCAL socket
+ * @task: RPC task that manages the state of an RPC request
+ *
+ * Return values:
+ *        0:	The request has been sent
+ *   EAGAIN:	The socket was blocked, please call again later to
+ *		complete the request
+ * ENOTCONN:	Caller needs to invoke connect logic then call again
+ *    other:	Some other error occured, the request was not sent
+ */
+static int xs_local_send_request(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+	struct sock_xprt *transport =
+				container_of(xprt, struct sock_xprt, xprt);
+	struct xdr_buf *xdr = &req->rq_snd_buf;
+	int status;
+
+	xs_encode_stream_record_marker(&req->rq_snd_buf);
+
+	xs_pktdump("packet data:",
+			req->rq_svec->iov_base, req->rq_svec->iov_len);
+
+	status = xs_sendpages(transport->sock, NULL, 0,
+						xdr, req->rq_bytes_sent);
+	dprintk("RPC:       %s(%u) = %d\n",
+			__func__, xdr->len - req->rq_bytes_sent, status);
+	if (likely(status >= 0)) {
+		req->rq_bytes_sent += status;
+		req->rq_xmit_bytes_sent += status;
+		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+			req->rq_bytes_sent = 0;
+			return 0;
+		}
+		status = -EAGAIN;
+	}
+
+	switch (status) {
+	case -EAGAIN:
+		status = xs_nospace(task);
+		break;
+	default:
+		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
+			-status);
+	case -EPIPE:
+		xs_close(xprt);
+		status = -ENOTCONN;
+	}
+
+	return status;
+}
+
 /**
  * xs_udp_send_request - write an RPC request to a UDP socket
  * @task: address of RPC task that manages the state of an RPC request
@@ -574,13 +659,6 @@
 		kernel_sock_shutdown(sock, SHUT_WR);
 }
 
-static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
-{
-	u32 reclen = buf->len - sizeof(rpc_fraghdr);
-	rpc_fraghdr *base = buf->head[0].iov_base;
-	*base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen);
-}
-
 /**
  * xs_tcp_send_request - write an RPC request to a TCP socket
  * @task: address of RPC task that manages the state of an RPC request
@@ -603,7 +681,7 @@
 	struct xdr_buf *xdr = &req->rq_snd_buf;
 	int status;
 
-	xs_encode_tcp_record_marker(&req->rq_snd_buf);
+	xs_encode_stream_record_marker(&req->rq_snd_buf);
 
 	xs_pktdump("packet data:",
 				req->rq_svec->iov_base,
@@ -785,6 +863,88 @@
 	return (struct rpc_xprt *) sk->sk_user_data;
 }
 
+static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
+{
+	struct xdr_skb_reader desc = {
+		.skb		= skb,
+		.offset		= sizeof(rpc_fraghdr),
+		.count		= skb->len - sizeof(rpc_fraghdr),
+	};
+
+	if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
+		return -1;
+	if (desc.count)
+		return -1;
+	return 0;
+}
+
+/**
+ * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
+ * @sk: socket with data to read
+ * @len: how much data to read
+ *
+ * Currently this assumes we can read the whole reply in a single gulp.
+ */
+static void xs_local_data_ready(struct sock *sk, int len)
+{
+	struct rpc_task *task;
+	struct rpc_xprt *xprt;
+	struct rpc_rqst *rovr;
+	struct sk_buff *skb;
+	int err, repsize, copied;
+	u32 _xid;
+	__be32 *xp;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	dprintk("RPC:       %s...\n", __func__);
+	xprt = xprt_from_sock(sk);
+	if (xprt == NULL)
+		goto out;
+
+	skb = skb_recv_datagram(sk, 0, 1, &err);
+	if (skb == NULL)
+		goto out;
+
+	if (xprt->shutdown)
+		goto dropit;
+
+	repsize = skb->len - sizeof(rpc_fraghdr);
+	if (repsize < 4) {
+		dprintk("RPC:       impossible RPC reply size %d\n", repsize);
+		goto dropit;
+	}
+
+	/* Copy the XID from the skb... */
+	xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
+	if (xp == NULL)
+		goto dropit;
+
+	/* Look up and lock the request corresponding to the given XID */
+	spin_lock(&xprt->transport_lock);
+	rovr = xprt_lookup_rqst(xprt, *xp);
+	if (!rovr)
+		goto out_unlock;
+	task = rovr->rq_task;
+
+	copied = rovr->rq_private_buf.buflen;
+	if (copied > repsize)
+		copied = repsize;
+
+	if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
+		dprintk("RPC:       sk_buff copy failed\n");
+		goto out_unlock;
+	}
+
+	xprt_complete_rqst(task, copied);
+
+ out_unlock:
+	spin_unlock(&xprt->transport_lock);
+ dropit:
+	skb_free_datagram(sk, skb);
+ out:
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
 /**
  * xs_udp_data_ready - "data ready" callback for UDP sockets
  * @sk: socket with data to read
@@ -1344,7 +1504,6 @@
 	case TCP_CLOSE_WAIT:
 		/* The server initiated a shutdown of the socket */
 		xprt_force_disconnect(xprt);
-	case TCP_SYN_SENT:
 		xprt->connect_cookie++;
 	case TCP_CLOSING:
 		/*
@@ -1571,11 +1730,31 @@
 	return err;
 }
 
+/*
+ * We don't support autobind on AF_LOCAL sockets
+ */
+static void xs_local_rpcbind(struct rpc_task *task)
+{
+	xprt_set_bound(task->tk_xprt);
+}
+
+static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
+{
+}
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key xs_key[2];
 static struct lock_class_key xs_slock_key[2];
 
+static inline void xs_reclassify_socketu(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	BUG_ON(sock_owned_by_user(sk));
+	sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC",
+		&xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]);
+}
+
 static inline void xs_reclassify_socket4(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -1597,6 +1776,9 @@
 static inline void xs_reclassify_socket(int family, struct socket *sock)
 {
 	switch (family) {
+	case AF_LOCAL:
+		xs_reclassify_socketu(sock);
+		break;
 	case AF_INET:
 		xs_reclassify_socket4(sock);
 		break;
@@ -1606,6 +1788,10 @@
 	}
 }
 #else
+static inline void xs_reclassify_socketu(struct socket *sock)
+{
+}
+
 static inline void xs_reclassify_socket4(struct socket *sock)
 {
 }
@@ -1644,6 +1830,94 @@
 	return ERR_PTR(err);
 }
 
+static int xs_local_finish_connecting(struct rpc_xprt *xprt,
+				      struct socket *sock)
+{
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
+									xprt);
+
+	if (!transport->inet) {
+		struct sock *sk = sock->sk;
+
+		write_lock_bh(&sk->sk_callback_lock);
+
+		xs_save_old_callbacks(transport, sk);
+
+		sk->sk_user_data = xprt;
+		sk->sk_data_ready = xs_local_data_ready;
+		sk->sk_write_space = xs_udp_write_space;
+		sk->sk_error_report = xs_error_report;
+		sk->sk_allocation = GFP_ATOMIC;
+
+		xprt_clear_connected(xprt);
+
+		/* Reset to new socket */
+		transport->sock = sock;
+		transport->inet = sk;
+
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+
+	/* Tell the socket layer to start connecting... */
+	xprt->stat.connect_count++;
+	xprt->stat.connect_start = jiffies;
+	return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
+}
+
+/**
+ * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
+ * @xprt: RPC transport to connect
+ * @transport: socket transport to connect
+ * @create_sock: function to create a socket of the correct type
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_local_setup_socket(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, connect_worker.work);
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct socket *sock;
+	int status = -EIO;
+
+	if (xprt->shutdown)
+		goto out;
+
+	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+	status = __sock_create(xprt->xprt_net, AF_LOCAL,
+					SOCK_STREAM, 0, &sock, 1);
+	if (status < 0) {
+		dprintk("RPC:       can't create AF_LOCAL "
+			"transport socket (%d).\n", -status);
+		goto out;
+	}
+	xs_reclassify_socketu(sock);
+
+	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
+			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+
+	status = xs_local_finish_connecting(xprt, sock);
+	switch (status) {
+	case 0:
+		dprintk("RPC:       xprt %p connected to %s\n",
+				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		xprt_set_connected(xprt);
+		break;
+	case -ENOENT:
+		dprintk("RPC:       xprt %p: socket %s does not exist\n",
+				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		break;
+	default:
+		printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
+				__func__, -status,
+				xprt->address_strings[RPC_DISPLAY_ADDR]);
+	}
+
+out:
+	xprt_clear_connecting(xprt);
+	xprt_wake_pending_tasks(xprt, status);
+}
+
 static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1758,6 +2032,7 @@
 static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+	int ret = -ENOTCONN;
 
 	if (!transport->inet) {
 		struct sock *sk = sock->sk;
@@ -1789,12 +2064,22 @@
 	}
 
 	if (!xprt_bound(xprt))
-		return -ENOTCONN;
+		goto out;
 
 	/* Tell the socket layer to start connecting... */
 	xprt->stat.connect_count++;
 	xprt->stat.connect_start = jiffies;
-	return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
+	ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
+	switch (ret) {
+	case 0:
+	case -EINPROGRESS:
+		/* SYN_SENT! */
+		xprt->connect_cookie++;
+		if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+			xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+	}
+out:
+	return ret;
 }
 
 /**
@@ -1917,6 +2202,32 @@
 }
 
 /**
+ * xs_local_print_stats - display AF_LOCAL socket-specifc stats
+ * @xprt: rpc_xprt struct containing statistics
+ * @seq: output file
+ *
+ */
+static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+	long idle_time = 0;
+
+	if (xprt_connected(xprt))
+		idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+	seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu "
+			"%llu %llu\n",
+			xprt->stat.bind_count,
+			xprt->stat.connect_count,
+			xprt->stat.connect_time,
+			idle_time,
+			xprt->stat.sends,
+			xprt->stat.recvs,
+			xprt->stat.bad_xids,
+			xprt->stat.req_u,
+			xprt->stat.bklog_u);
+}
+
+/**
  * xs_udp_print_stats - display UDP socket-specifc stats
  * @xprt: rpc_xprt struct containing statistics
  * @seq: output file
@@ -2014,10 +2325,7 @@
 	unsigned long headoff;
 	unsigned long tailoff;
 
-	/*
-	 * Set up the rpc header and record marker stuff
-	 */
-	xs_encode_tcp_record_marker(xbufp);
+	xs_encode_stream_record_marker(xbufp);
 
 	tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
 	headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
@@ -2089,6 +2397,21 @@
 {
 }
 
+static struct rpc_xprt_ops xs_local_ops = {
+	.reserve_xprt		= xprt_reserve_xprt,
+	.release_xprt		= xs_tcp_release_xprt,
+	.rpcbind		= xs_local_rpcbind,
+	.set_port		= xs_local_set_port,
+	.connect		= xs_connect,
+	.buf_alloc		= rpc_malloc,
+	.buf_free		= rpc_free,
+	.send_request		= xs_local_send_request,
+	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+	.close			= xs_close,
+	.destroy		= xs_destroy,
+	.print_stats		= xs_local_print_stats,
+};
+
 static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
@@ -2150,6 +2473,8 @@
 	};
 
 	switch (family) {
+	case AF_LOCAL:
+		break;
 	case AF_INET:
 		memcpy(sap, &sin, sizeof(sin));
 		break;
@@ -2197,6 +2522,70 @@
 	return xprt;
 }
 
+static const struct rpc_timeout xs_local_default_timeout = {
+	.to_initval = 10 * HZ,
+	.to_maxval = 10 * HZ,
+	.to_retries = 2,
+};
+
+/**
+ * xs_setup_local - Set up transport to use an AF_LOCAL socket
+ * @args: rpc transport creation arguments
+ *
+ * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP
+ */
+static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
+{
+	struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr;
+	struct sock_xprt *transport;
+	struct rpc_xprt *xprt;
+	struct rpc_xprt *ret;
+
+	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+	if (IS_ERR(xprt))
+		return xprt;
+	transport = container_of(xprt, struct sock_xprt, xprt);
+
+	xprt->prot = 0;
+	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
+	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+
+	xprt->bind_timeout = XS_BIND_TO;
+	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+	xprt->idle_timeout = XS_IDLE_DISC_TO;
+
+	xprt->ops = &xs_local_ops;
+	xprt->timeout = &xs_local_default_timeout;
+
+	switch (sun->sun_family) {
+	case AF_LOCAL:
+		if (sun->sun_path[0] != '/') {
+			dprintk("RPC:       bad AF_LOCAL address: %s\n",
+					sun->sun_path);
+			ret = ERR_PTR(-EINVAL);
+			goto out_err;
+		}
+		xprt_set_bound(xprt);
+		INIT_DELAYED_WORK(&transport->connect_worker,
+					xs_local_setup_socket);
+		xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
+		break;
+	default:
+		ret = ERR_PTR(-EAFNOSUPPORT);
+		goto out_err;
+	}
+
+	dprintk("RPC:       set up xprt to %s via AF_LOCAL\n",
+			xprt->address_strings[RPC_DISPLAY_ADDR]);
+
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+	ret = ERR_PTR(-EINVAL);
+out_err:
+	xprt_free(xprt);
+	return ret;
+}
+
 static const struct rpc_timeout xs_udp_default_timeout = {
 	.to_initval = 5 * HZ,
 	.to_maxval = 30 * HZ,
@@ -2438,6 +2827,14 @@
 	return ret;
 }
 
+static struct xprt_class	xs_local_transport = {
+	.list		= LIST_HEAD_INIT(xs_local_transport.list),
+	.name		= "named UNIX socket",
+	.owner		= THIS_MODULE,
+	.ident		= XPRT_TRANSPORT_LOCAL,
+	.setup		= xs_setup_local,
+};
+
 static struct xprt_class	xs_udp_transport = {
 	.list		= LIST_HEAD_INIT(xs_udp_transport.list),
 	.name		= "udp",
@@ -2473,6 +2870,7 @@
 		sunrpc_table_header = register_sysctl_table(sunrpc_table);
 #endif
 
+	xprt_register_transport(&xs_local_transport);
 	xprt_register_transport(&xs_udp_transport);
 	xprt_register_transport(&xs_tcp_transport);
 	xprt_register_transport(&xs_bc_tcp_transport);
@@ -2493,6 +2891,7 @@
 	}
 #endif
 
+	xprt_unregister_transport(&xs_local_transport);
 	xprt_unregister_transport(&xs_udp_transport);
 	xprt_unregister_transport(&xs_tcp_transport);
 	xprt_unregister_transport(&xs_bc_tcp_transport);

diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index 4be6036..f40a6af6 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h

@@ -43,6 +43,7 @@
 #undef ELF_R_INFO
 #undef Elf_r_info
 #undef ELF_ST_BIND
+#undef ELF_ST_TYPE
 #undef fn_ELF_R_SYM
 #undef fn_ELF_R_INFO
 #undef uint_t
@@ -76,6 +77,7 @@
 # define ELF_R_INFO		ELF64_R_INFO
 # define Elf_r_info		Elf64_r_info
 # define ELF_ST_BIND		ELF64_ST_BIND
+# define ELF_ST_TYPE		ELF64_ST_TYPE
 # define fn_ELF_R_SYM		fn_ELF64_R_SYM
 # define fn_ELF_R_INFO		fn_ELF64_R_INFO
 # define uint_t			uint64_t
@@ -108,6 +110,7 @@
 # define ELF_R_INFO		ELF32_R_INFO
 # define Elf_r_info		Elf32_r_info
 # define ELF_ST_BIND		ELF32_ST_BIND
+# define ELF_ST_TYPE		ELF32_ST_TYPE
 # define fn_ELF_R_SYM		fn_ELF32_R_SYM
 # define fn_ELF_R_INFO		fn_ELF32_R_INFO
 # define uint_t			uint32_t
@@ -427,6 +430,11 @@
 		if (txtndx == w2(symp->st_shndx)
 			/* avoid STB_WEAK */
 		    && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) {
+			/* function symbols on ARM have quirks, avoid them */
+			if (w2(ehdr->e_machine) == EM_ARM
+			    && ELF_ST_TYPE(symp->st_info) == STT_FUNC)
+				continue;
+
 			*recvalp = _w(symp->st_value);
 			return symp - sym0;
 		}

diff --git a/scripts/tags.sh b/scripts/tags.sh
index bd6185d..75c5d24 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh

@@ -132,7 +132,7 @@
 	--regex-asm='/^ENTRY\(([^)]*)\).*/\1/'                  \
 	--regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \
 	--regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/'		\
-	--regex-c++='/^DEFINE_EVENT\(([^,)]*).*/trace_\1/'
+	--regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/'
 
 	all_kconfigs | xargs $1 -a                              \
 	--langdef=kconfig --language-force=kconfig              \
@@ -152,7 +152,9 @@
 {
 	all_sources | xargs $1 -a                               \
 	--regex='/^ENTRY(\([^)]*\)).*/\1/'                      \
-	--regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/'
+	--regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/'   \
+	--regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/'		\
+	--regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/'
 
 	all_kconfigs | xargs $1 -a                              \
 	--regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/'

diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index ae3a698..ec1bcec 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c

@@ -593,7 +593,8 @@
 			sa.aad.op = OP_SETPROCATTR;
 			sa.aad.info = name;
 			sa.aad.error = -EINVAL;
-			return aa_audit(AUDIT_APPARMOR_DENIED, NULL, GFP_KERNEL,
+			return aa_audit(AUDIT_APPARMOR_DENIED,
+					__aa_current_profile(), GFP_KERNEL,
 					&sa, NULL);
 		}
 	} else if (strcmp(name, "exec") == 0) {

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1455413..032ba63 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile

@@ -215,11 +215,13 @@
 LIB_H += ../../include/linux/perf_event.h
 LIB_H += ../../include/linux/rbtree.h
 LIB_H += ../../include/linux/list.h
+LIB_H += ../../include/linux/const.h
 LIB_H += ../../include/linux/hash.h
 LIB_H += ../../include/linux/stringify.h
 LIB_H += util/include/linux/bitmap.h
 LIB_H += util/include/linux/bitops.h
 LIB_H += util/include/linux/compiler.h
+LIB_H += util/include/linux/const.h
 LIB_H += util/include/linux/ctype.h
 LIB_H += util/include/linux/kernel.h
 LIB_H += util/include/linux/list.h

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index e18eb7e..7b139e1 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c

@@ -8,8 +8,6 @@
 #include "builtin.h"
 
 #include "util/util.h"
-
-#include "util/util.h"
 #include "util/color.h"
 #include <linux/list.h>
 #include "util/cache.h"

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0974f95..8e2c857 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c

@@ -823,6 +823,16 @@
 
 	symbol__init();
 
+	if (symbol_conf.kptr_restrict)
+		pr_warning(
+"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
+"check /proc/sys/kernel/kptr_restrict.\n\n"
+"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
+"file is not found in the buildid cache or in the vmlinux path.\n\n"
+"Samples in kernel modules won't be resolved at all.\n\n"
+"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
+"even with a suitable vmlinux or kallsyms file.\n\n");
+
 	if (no_buildid_cache || no_buildid)
 		disable_buildid_cache();
 

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 498c6f7..287a173 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c

@@ -116,6 +116,9 @@
 	if (al.filtered || (hide_unresolved && al.sym == NULL))
 		return 0;
 
+	if (al.map != NULL)
+		al.map->dso->hit = 1;
+
 	if (perf_session__add_hist_entry(session, &al, sample, evsel)) {
 		pr_debug("problem incrementing symbol period, skipping event\n");
 		return -1;
@@ -249,6 +252,8 @@
 	u64 nr_samples;
 	struct perf_session *session;
 	struct perf_evsel *pos;
+	struct map *kernel_map;
+	struct kmap *kernel_kmap;
 	const char *help = "For a higher level overview, try: perf report --sort comm,dso";
 
 	signal(SIGINT, sig_handler);
@@ -268,6 +273,24 @@
 	if (ret)
 		goto out_delete;
 
+	kernel_map = session->host_machine.vmlinux_maps[MAP__FUNCTION];
+	kernel_kmap = map__kmap(kernel_map);
+	if (kernel_map == NULL ||
+	    (kernel_map->dso->hit &&
+	     (kernel_kmap->ref_reloc_sym == NULL ||
+	      kernel_kmap->ref_reloc_sym->addr == 0))) {
+		const struct dso *kdso = kernel_map->dso;
+
+		ui__warning(
+"Kernel address maps (/proc/{kallsyms,modules}) were restricted.\n\n"
+"Check /proc/sys/kernel/kptr_restrict before running 'perf record'.\n\n%s\n\n"
+"Samples in kernel modules can't be resolved as well.\n\n",
+			    RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION]) ?
+"As no suitable kallsyms nor vmlinux was found, kernel samples\n"
+"can't be resolved." :
+"If some relocation was applied (e.g. kexec) symbols may be misresolved.");
+	}
+
 	if (dump_trace) {
 		perf_session__fprintf_nr_events(session, stdout);
 		goto out_delete;

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 974f6d3..22747de 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c

@@ -10,7 +10,6 @@
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/trace-event.h"
-#include "util/parse-options.h"
 #include "util/util.h"
 #include "util/evlist.h"
 #include "util/evsel.h"

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 2d7934e..f2f3f49 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c

@@ -62,8 +62,6 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-
 static struct perf_top top = {
 	.count_filter		= 5,
 	.delay_secs		= 2,
@@ -82,6 +80,8 @@
 
 static int			default_interval		=      0;
 
+static bool			kptr_restrict_warned;
+static bool			vmlinux_warned;
 static bool			inherit				=  false;
 static int			realtime_prio			=      0;
 static bool			group				=  false;
@@ -740,7 +740,22 @@
 	    al.filtered)
 		return;
 
+	if (!kptr_restrict_warned &&
+	    symbol_conf.kptr_restrict &&
+	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
+		ui__warning(
+"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
+"Check /proc/sys/kernel/kptr_restrict.\n\n"
+"Kernel%s samples will not be resolved.\n",
+			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
+			  " modules" : "");
+		if (use_browser <= 0)
+			sleep(5);
+		kptr_restrict_warned = true;
+	}
+
 	if (al.sym == NULL) {
+		const char *msg = "Kernel samples will not be resolved.\n";
 		/*
 		 * As we do lazy loading of symtabs we only will know if the
 		 * specified vmlinux file is invalid when we actually have a
@@ -752,12 +767,20 @@
 		 * --hide-kernel-symbols, even if the user specifies an
 		 * invalid --vmlinux ;-)
 		 */
-		if (al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
+		if (!kptr_restrict_warned && !vmlinux_warned &&
+		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
 		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
-			ui__warning("The %s file can't be used\n",
-				    symbol_conf.vmlinux_name);
-			exit_browser(0);
-			exit(1);
+			if (symbol_conf.vmlinux_name) {
+				ui__warning("The %s file can't be used.\n%s",
+					    symbol_conf.vmlinux_name, msg);
+			} else {
+				ui__warning("A vmlinux file was not found.\n%s",
+					    msg);
+			}
+
+			if (use_browser <= 0)
+				sleep(5);
+			vmlinux_warned = true;
 		}
 
 		return;

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 6635fcd..0fe9adf 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c

@@ -553,9 +553,18 @@
 			goto out_problem;
 
 		perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps);
-		perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
-							 symbol_name,
-							 event->mmap.pgoff);
+
+		/*
+		 * Avoid using a zero address (kptr_restrict) for the ref reloc
+		 * symbol. Effectively having zero here means that at record
+		 * time /proc/sys/kernel/kptr_restrict was non zero.
+		 */
+		if (event->mmap.pgoff != 0) {
+			perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
+								 symbol_name,
+								 event->mmap.pgoff);
+		}
+
 		if (machine__is_default_guest(machine)) {
 			/*
 			 * preload dso of guest kernel and modules

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ee0fe0d..cca29ed 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c

@@ -35,7 +35,17 @@
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 {
+	int cpu, thread;
 	evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
+
+	if (evsel->fd) {
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			for (thread = 0; thread < nthreads; thread++) {
+				FD(evsel, cpu, thread) = -1;
+			}
+		}
+	}
+
 	return evsel->fd != NULL ? 0 : -ENOMEM;
 }
 

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 0717beb..afb0849 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c

@@ -193,9 +193,13 @@
 	     *linkname = malloc(size), *targetname;
 	int len, err = -1;
 
-	if (is_kallsyms)
+	if (is_kallsyms) {
+		if (symbol_conf.kptr_restrict) {
+			pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
+			return 0;
+		}
 		realname = (char *)name;
-	else
+	} else
 		realname = realpath(name, NULL);
 
 	if (realname == NULL || filename == NULL || linkname == NULL)

diff --git a/tools/perf/util/include/linux/const.h b/tools/perf/util/include/linux/const.h
new file mode 100644
index 0000000..1b476c9
--- /dev/null
+++ b/tools/perf/util/include/linux/const.h

@@ -0,0 +1 @@
+#include "../../../../include/linux/const.h"

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 516876d..eec1963 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c

@@ -676,9 +676,30 @@
 	return count + moved;
 }
 
+static bool symbol__restricted_filename(const char *filename,
+					const char *restricted_filename)
+{
+	bool restricted = false;
+
+	if (symbol_conf.kptr_restrict) {
+		char *r = realpath(filename, NULL);
+
+		if (r != NULL) {
+			restricted = strcmp(r, restricted_filename) == 0;
+			free(r);
+			return restricted;
+		}
+	}
+
+	return restricted;
+}
+
 int dso__load_kallsyms(struct dso *dso, const char *filename,
 		       struct map *map, symbol_filter_t filter)
 {
+	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+		return -1;
+
 	if (dso__load_all_kallsyms(dso, filename, map) < 0)
 		return -1;
 
@@ -1790,6 +1811,9 @@
 		modules = path;
 	}
 
+	if (symbol__restricted_filename(path, "/proc/modules"))
+		return -1;
+
 	file = fopen(modules, "r");
 	if (file == NULL)
 		return -1;
@@ -2239,6 +2263,9 @@
 		}
 	}
 
+	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+		return 0;
+
 	if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0)
 		return 0;
 
@@ -2410,6 +2437,25 @@
 	return 0;
 }
 
+static bool symbol__read_kptr_restrict(void)
+{
+	bool value = false;
+
+	if (geteuid() != 0) {
+		FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
+		if (fp != NULL) {
+			char line[8];
+
+			if (fgets(line, sizeof(line), fp) != NULL)
+				value = atoi(line) != 0;
+
+			fclose(fp);
+		}
+	}
+
+	return value;
+}
+
 int symbol__init(void)
 {
 	const char *symfs;
@@ -2456,6 +2502,8 @@
 	if (symfs != symbol_conf.symfs)
 		free((void *)symfs);
 
+	symbol_conf.kptr_restrict = symbol__read_kptr_restrict();
+
 	symbol_conf.initialized = true;
 	return 0;
 

diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 242de01..325ee36 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h

@@ -75,7 +75,8 @@
 			use_callchain,
 			exclude_other,
 			show_cpu_utilization,
-			initialized;
+			initialized,
+			kptr_restrict;
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,

diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index df0c6d2..74d3331 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c

@@ -198,6 +198,14 @@
 		.val = 'h',
 	},
 	{
+		.name = "event-idx",
+		.val = 'E',
+	},
+	{
+		.name = "no-event-idx",
+		.val = 'e',
+	},
+	{
 		.name = "indirect",
 		.val = 'I',
 	},
@@ -211,13 +219,17 @@
 
 static void help()
 {
-	fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n");
+	fprintf(stderr, "Usage: virtio_test [--help]"
+		" [--no-indirect]"
+		" [--no-event-idx]"
+		"\n");
 }
 
 int main(int argc, char **argv)
 {
 	struct vdev_info dev;
-	unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC;
+	unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+		(1ULL << VIRTIO_RING_F_EVENT_IDX);
 	int o;
 
 	for (;;) {
@@ -228,6 +240,9 @@
 		case '?':
 			help();
 			exit(2);
+		case 'e':
+			features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
+			break;
 		case 'h':
 			help();
 			goto done;
commit	3f303103b884ca577908d3e5c0650ad12e40c586	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Wed Jun 01 21:47:39 2011 +0900
committer	Linus Torvalds <torvalds@linux-foundation.org>	Wed Jun 01 21:47:39 2011 +0900
tree	021b8baeba20df329b60798ad4fb55f280b0cbda
parent	5c6cce92bc8aee751aafe82c5d9caf7553226a3d [diff]
parent	63da029015b5255915cd6d61f19ffc276ad4635d [diff]