Merge branch 'ftrace' of git://github.com/rabinv/linux-2.6 into devel-stable
diff --git a/Documentation/DocBook/sh.tmpl b/Documentation/DocBook/sh.tmpl
index d858d92..4a38f60 100644
--- a/Documentation/DocBook/sh.tmpl
+++ b/Documentation/DocBook/sh.tmpl
@@ -79,10 +79,6 @@
       </sect2>
     </sect1>
   </chapter>
-  <chapter id="clk">
-    <title>Clock Framework Extensions</title>
-!Iinclude/linux/sh_clk.h
-  </chapter>
   <chapter id="mach">
     <title>Machine Specific Interfaces</title>
     <sect1 id="dreamcast">
diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
index 4d4ce0e..b4665b9 100644
--- a/Documentation/DocBook/uio-howto.tmpl
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -16,7 +16,7 @@
 	</orgname>
 
 	<address>
-	   <email>hjk@linutronix.de</email>
+	   <email>hjk@hansjkoch.de</email>
 	</address>
     </affiliation>
 </author>
@@ -114,7 +114,7 @@
 
 <para>If you know of any translations for this document, or you are
 interested in translating it, please email me
-<email>hjk@linutronix.de</email>.
+<email>hjk@hansjkoch.de</email>.
 </para>
 </sect1>
 
@@ -171,7 +171,7 @@
 <title>Feedback</title>
 	<para>Find something wrong with this document? (Or perhaps something
 	right?) I would love to hear from you. Please email me at
-	<email>hjk@linutronix.de</email>.</para>
+	<email>hjk@hansjkoch.de</email>.</para>
 </sect1>
 </chapter>
 
diff --git a/Documentation/arm/OMAP/DSS b/Documentation/arm/OMAP/DSS
index 0af0e9e..888ae7b 100644
--- a/Documentation/arm/OMAP/DSS
+++ b/Documentation/arm/OMAP/DSS
@@ -255,9 +255,10 @@
 Kernel boot arguments
 ---------------------
 
-vram=<size>
-	- Amount of total VRAM to preallocate. For example, "10M". omapfb
-	  allocates memory for framebuffers from VRAM.
+vram=<size>[,<physaddr>]
+	- Amount of total VRAM to preallocate and optionally a physical start
+	  memory address. For example, "10M". omapfb allocates memory for
+	  framebuffers from VRAM.
 
 omapfb.mode=<display>:<mode>[,...]
 	- Default video mode for specified displays. For example,
diff --git a/Documentation/development-process/2.Process b/Documentation/development-process/2.Process
index 97726eb..911a451 100644
--- a/Documentation/development-process/2.Process
+++ b/Documentation/development-process/2.Process
@@ -154,7 +154,7 @@
    inclusion, it should be accepted by a relevant subsystem maintainer -
    though this acceptance is not a guarantee that the patch will make it
    all the way to the mainline.  The patch will show up in the maintainer's
-   subsystem tree and into the staging trees (described below).  When the
+   subsystem tree and into the -next trees (described below).  When the
    process works, this step leads to more extensive review of the patch and
    the discovery of any problems resulting from the integration of this
    patch with work being done by others.
@@ -236,7 +236,7 @@
 normally the right way to go.
 
 
-2.4: STAGING TREES
+2.4: NEXT TREES
 
 The chain of subsystem trees guides the flow of patches into the kernel,
 but it also raises an interesting question: what if somebody wants to look
@@ -250,7 +250,7 @@
 the interesting subsystem trees, but that would be a big and error-prone
 job.
 
-The answer comes in the form of staging trees, where subsystem trees are
+The answer comes in the form of -next trees, where subsystem trees are
 collected for testing and review.  The older of these trees, maintained by
 Andrew Morton, is called "-mm" (for memory management, which is how it got
 started).  The -mm tree integrates patches from a long list of subsystem
@@ -275,7 +275,7 @@
 Use of the MMOTM tree is likely to be a frustrating experience, though;
 there is a definite chance that it will not even compile.
 
-The other staging tree, started more recently, is linux-next, maintained by
+The other -next tree, started more recently, is linux-next, maintained by
 Stephen Rothwell.  The linux-next tree is, by design, a snapshot of what
 the mainline is expected to look like after the next merge window closes.
 Linux-next trees are announced on the linux-kernel and linux-next mailing
@@ -303,12 +303,25 @@
 See http://lwn.net/Articles/289013/ for more information on this topic, and
 stay tuned; much is still in flux where linux-next is involved.
 
-Besides the mmotm and linux-next trees, the kernel source tree now contains
-the drivers/staging/ directory and many sub-directories for drivers or
-filesystems that are on their way to being added to the kernel tree
-proper, but they remain in drivers/staging/ while they still need more
-work.
+2.4.1: STAGING TREES
 
+The kernel source tree now contains the drivers/staging/ directory, where
+many sub-directories for drivers or filesystems that are on their way to
+being added to the kernel tree live.  They remain in drivers/staging while
+they still need more work; once complete, they can be moved into the
+kernel proper.  This is a way to keep track of drivers that aren't
+up to Linux kernel coding or quality standards, but people may want to use
+them and track development.
+
+Greg Kroah-Hartman currently (as of 2.6.36) maintains the staging tree.
+Drivers that still need work are sent to him, with each driver having
+its own subdirectory in drivers/staging/.  Along with the driver source
+files, a TODO file should be present in the directory as well.  The TODO
+file lists the pending work that the driver needs for acceptance into
+the kernel proper, as well as a list of people that should be Cc'd for any
+patches to the driver.  Staging drivers that don't currently build should
+have their config entries depend upon CONFIG_BROKEN.  Once they can
+be successfully built without outside patches, CONFIG_BROKEN can be removed.
 
 2.5: TOOLS
 
diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX
index a618fd9..30a7054 100644
--- a/Documentation/fb/00-INDEX
+++ b/Documentation/fb/00-INDEX
@@ -4,33 +4,41 @@
 				    Geert Uytterhoeven <geert@linux-m68k.org>
 
 00-INDEX
-	- this file
+	- this file.
 arkfb.txt
 	- info on the fbdev driver for ARK Logic chips.
 aty128fb.txt
 	- info on the ATI Rage128 frame buffer driver.
 cirrusfb.txt
 	- info on the driver for Cirrus Logic chipsets.
+cmap_xfbdev.txt
+	- an introduction to fbdev's cmap structures.
 deferred_io.txt
 	- an introduction to deferred IO.
+efifb.txt
+	- info on the EFI platform driver for Intel based Apple computers.
+ep93xx-fb.txt
+	- info on the driver for EP93xx LCD controller.
 fbcon.txt
 	- intro to and usage guide for the framebuffer console (fbcon).
 framebuffer.txt
 	- introduction to frame buffer devices.
-imacfb.txt
-	- info on the generic EFI platform driver for Intel based Macs.
+gxfb.txt
+	- info on the framebuffer driver for AMD Geode GX2 based processors.
 intel810.txt
 	- documentation for the Intel 810/815 framebuffer driver.
 intelfb.txt
 	- docs for Intel 830M/845G/852GM/855GM/865G/915G/945G fb driver.
 internals.txt
 	- quick overview of frame buffer device internals.
+lxfb.txt
+	- info on the framebuffer driver for AMD Geode LX based processors.
 matroxfb.txt
 	- info on the Matrox framebuffer driver for Alpha, Intel and PPC.
+metronomefb.txt
+	- info on the driver for the Metronome display controller.
 modedb.txt
 	- info on the video mode database.
-matroxfb.txt
-	- info on the Matrox frame buffer driver.
 pvr2fb.txt
 	- info on the PowerVR 2 frame buffer driver.
 pxafb.txt
@@ -39,13 +47,23 @@
 	- info on the fbdev driver for S3 Trio/Virge chips.
 sa1100fb.txt
 	- information about the driver for the SA-1100 LCD controller.
+sh7760fb.txt
+	- info on the SH7760/SH7763 integrated LCDC Framebuffer driver.
 sisfb.txt
 	- info on the framebuffer device driver for various SiS chips.
 sstfb.txt
 	- info on the frame buffer driver for 3dfx' Voodoo Graphics boards.
 tgafb.txt
-	- info on the TGA (DECChip 21030) frame buffer driver
+	- info on the TGA (DECChip 21030) frame buffer driver.
+tridentfb.txt
+	info on the framebuffer driver for some Trident chip based cards.
+uvesafb.txt
+	- info on the userspace VESA (VBE2+ compliant) frame buffer device.
 vesafb.txt
-	- info on the VESA frame buffer device
+	- info on the VESA frame buffer device.
+viafb.modes
+	- list of modes for VIA Integration Graphic Chip.
+viafb.txt
+	- info on the VIA Integration Graphic Chip console framebuffer driver.
 vt8623fb.txt
 	- info on the fb driver for the graphics core in VIA VT8623 chipsets.
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index d8f36f9..6c2f55e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -554,3 +554,13 @@
 Who:    NeilBrown <neilb@suse.de>
 
 ----------------------------
+
+What:	i2c_adapter.id
+When:	June 2011
+Why:	This field is deprecated. I2C device drivers shouldn't change their
+	behavior based on the underlying I2C adapter. Instead, the I2C
+	adapter driver should instantiate the I2C devices and provide the
+	needed platform-specific information.
+Who:	Jean Delvare <khali@linux-fr.org>
+
+----------------------------
diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c
index d428cc9..fd53869f 100644
--- a/Documentation/filesystems/configfs/configfs_example_explicit.c
+++ b/Documentation/filesystems/configfs/configfs_example_explicit.c
@@ -89,7 +89,7 @@
 	char *p = (char *) page;
 
 	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
+	if ((*p != '\0') && (*p != '\n'))
 		return -EINVAL;
 
 	if (tmp > INT_MAX)
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 9633da0..792faa3 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -617,6 +617,16 @@
 		is configured as an output, this value may be written;
 		any nonzero value is treated as high.
 
+		If the pin can be configured as interrupt-generating interrupt
+		and if it has been configured to generate interrupts (see the
+		description of "edge"), you can poll(2) on that file and
+		poll(2) will return whenever the interrupt was triggered. If
+		you use poll(2), set the events POLLPRI and POLLERR. If you
+		use select(2), set the file descriptor in exceptfds. After
+		poll(2) returns, either lseek(2) to the beginning of the sysfs
+		file and read the new value or close the file and re-open it
+		to read the value.
+
 	"edge" ... reads as either "none", "rising", "falling", or
 		"both". Write these strings to select the signal edge(s)
 		that will make poll(2) on the "value" file return.
diff --git a/Documentation/hwmon/lm93 b/Documentation/hwmon/lm93
index ac711f3..7a10616 100644
--- a/Documentation/hwmon/lm93
+++ b/Documentation/hwmon/lm93
@@ -11,7 +11,7 @@
 	Mark M. Hoffman <mhoffman@lightlink.com>
 	Ported to 2.6 by Eric J. Bowersox <ericb@aspsys.com>
 	Adapted to 2.6.20 by Carsten Emde <ce@osadl.org>
-	Modified for mainline integration by Hans J. Koch <hjk@linutronix.de>
+	Modified for mainline integration by Hans J. Koch <hjk@hansjkoch.de>
 
 Module Parameters
 -----------------
diff --git a/Documentation/hwmon/max6650 b/Documentation/hwmon/max6650
index 8be7beb..c565650f 100644
--- a/Documentation/hwmon/max6650
+++ b/Documentation/hwmon/max6650
@@ -8,7 +8,7 @@
     Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6650-MAX6651.pdf
 
 Authors:
-    Hans J. Koch <hjk@linutronix.de>
+    Hans J. Koch <hjk@hansjkoch.de>
     John Morris <john.morris@spirentcom.com>
     Claus Gindhart <claus.gindhart@kontron.com>
 
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 92e83e5..cdd2a6e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2385,6 +2385,11 @@
 			improve throughput, but will also increase the
 			amount of memory reserved for use by the client.
 
+	swapaccount[=0|1]
+			[KNL] Enable accounting of swap in memory resource
+			controller if no parameter or 1 is given or disable
+			it if 0 is given (See Documentation/cgroups/memory.txt)
+
 	swiotlb=	[IA-64] Number of I/O TLB slabs
 
 	switches=	[HW,M68k]
diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
index 44d87ad..cd44558 100644
--- a/Documentation/power/opp.txt
+++ b/Documentation/power/opp.txt
@@ -37,6 +37,9 @@
 SoC framework	-> modifies on required cases certain OPPs	-> OPP layer
 		-> queries to search/retrieve information	->
 
+Architectures that provide a SoC framework for OPP should select ARCH_HAS_OPP
+to make the OPP layer available.
+
 OPP layer expects each domain to be represented by a unique device pointer. SoC
 framework registers a set of initial OPPs per device with the OPP layer. This
 list is expected to be an optimally small number typically around 5 per device.
diff --git a/Documentation/sh/clk.txt b/Documentation/sh/clk.txt
deleted file mode 100644
index 114b595..0000000
--- a/Documentation/sh/clk.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Clock framework on SuperH architecture
-
-The framework on SH extends existing API by the function clk_set_rate_ex,
-which prototype is as follows:
-
-    clk_set_rate_ex (struct clk *clk, unsigned long rate, int algo_id)
-
-The algo_id parameter is used to specify algorithm used to recalculate clocks,
-adjanced to clock, specified as first argument. It is assumed that algo_id==0
-means no changes to adjanced clock
-
-Internally, the clk_set_rate_ex forwards request to clk->ops->set_rate method,
-if it is present in ops structure. The method should set the clock rate and adjust
-all needed clocks according to the passed algo_id.
-Exact values for algo_id are machine-dependent. For the sh7722, the following
-values are defined:
-
-	NO_CHANGE	= 0,
-	IUS_N1_N1,	/* I:U = N:1, U:Sh = N:1 */
-	IUS_322,	/* I:U:Sh = 3:2:2	 */
-	IUS_522,	/* I:U:Sh = 5:2:2 	 */
-	IUS_N11,	/* I:U:Sh = N:1:1	 */
-	SB_N1,		/* Sh:B = N:1		 */
-	SB3_N1,		/* Sh:B3 = N:1		 */
-	SB3_32,		/* Sh:B3 = 3:2		 */
-	SB3_43,		/* Sh:B3 = 4:3		 */
-	SB3_54,		/* Sh:B3 = 5:4		 */
-	BP_N1,		/* B:P	 = N:1		 */
-	IP_N1		/* I:P	 = N:1		 */
-
-Each of these constants means relation between clocks that can be set via the FRQCR
-register
diff --git a/MAINTAINERS b/MAINTAINERS
index 88b74a7..b3be8b3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -945,7 +945,7 @@
 L:	linux-sh@vger.kernel.org
 W:	http://oss.renesas.com
 Q:	http://patchwork.kernel.org/project/linux-sh/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/genesis-2.6.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6.git rmobile-latest
 S:	Supported
 F:	arch/arm/mach-shmobile/
 F:	drivers/sh/
@@ -1359,7 +1359,7 @@
 
 BONDING DRIVER
 M:	Jay Vosburgh <fubar@us.ibm.com>
-L:	bonding-devel@lists.sourceforge.net
+L:	netdev@vger.kernel.org
 W:	http://sourceforge.net/projects/bonding/
 S:	Supported
 F:	drivers/net/bonding/
@@ -1829,6 +1829,13 @@
 S:	Supported
 F:	drivers/net/cxgb4vf/
 
+STMMAC ETHERNET DRIVER
+M:	Giuseppe Cavallaro <peppe.cavallaro@st.com>
+L:	netdev@vger.kernel.org
+W:	http://www.stlinux.com
+S:	Supported
+F:	drivers/net/stmmac/
+
 CYBERPRO FB DRIVER
 M:	Russell King <linux@arm.linux.org.uk>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -2008,6 +2015,7 @@
 DOCBOOK FOR DOCUMENTATION
 M:	Randy Dunlap <rdunlap@xenotime.net>
 S:	Maintained
+F:	scripts/kernel-doc
 
 DOCKING STATION DRIVER
 M:	Shaohua Li <shaohua.li@intel.com>
@@ -2018,6 +2026,7 @@
 DOCUMENTATION
 M:	Randy Dunlap <rdunlap@xenotime.net>
 L:	linux-doc@vger.kernel.org
+T:	quilt oss.oracle.com/~rdunlap/kernel-doc-patches/current/
 S:	Maintained
 F:	Documentation/
 
@@ -2435,9 +2444,12 @@
 FRAMEBUFFER LAYER
 L:	linux-fbdev@vger.kernel.org
 W:	http://linux-fbdev.sourceforge.net/
+Q:	http://patchwork.kernel.org/project/linux-fbdev/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/fbdev-2.6.git
 S:	Orphan
 F:	Documentation/fb/
-F:	drivers/video/fb*
+F:	drivers/video/
+F:	include/video/
 F:	include/linux/fb.h
 
 FREESCALE DMA DRIVER
@@ -5705,7 +5717,7 @@
 L:	linux-sh@vger.kernel.org
 W:	http://www.linux-sh.org
 Q:	http://patchwork.kernel.org/project/linux-sh/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6.git sh-latest
 S:	Supported
 F:	Documentation/sh/
 F:	arch/sh/
@@ -5827,6 +5839,8 @@
 W:	http://www.tilera.com/scm/
 S:	Supported
 F:	arch/tile/
+F:	drivers/char/hvc_tile.c
+F:	drivers/net/tile/
 
 TLAN NETWORK DRIVER
 M:	Samuel Chessman <chessman@tux.org>
diff --git a/Makefile b/Makefile
index 6619720..b31d213 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 37
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc3
 NAME = Flesh-Eating Bats with Fangs
 
 # *DOCUMENTATION*
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index bd620a4..49778bb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -647,7 +647,7 @@
 	select ARCH_HAS_CPUFREQ
 	select HAVE_CLK
 	select ARCH_USES_GETTIMEOFFSET
-	select HAVE_S3C2410_I2C
+	select HAVE_S3C2410_I2C if I2C
 	help
 	  Samsung S3C2410X CPU based systems, such as the Simtec Electronics
 	  BAST (<http://www.simtec.co.uk/products/EB110ITX/>), the IPAQ 1940 or
@@ -677,8 +677,8 @@
 	select S3C_DEV_NAND
 	select USB_ARCH_HAS_OHCI
 	select SAMSUNG_GPIOLIB_4BIT
-	select HAVE_S3C2410_I2C
-	select HAVE_S3C2410_WATCHDOG
+	select HAVE_S3C2410_I2C if I2C
+	select HAVE_S3C2410_WATCHDOG if WATCHDOG
 	help
 	  Samsung S3C64XX series based systems
 
@@ -687,10 +687,10 @@
 	select CPU_V6
 	select GENERIC_GPIO
 	select HAVE_CLK
-	select HAVE_S3C2410_WATCHDOG
+	select HAVE_S3C2410_WATCHDOG if WATCHDOG
 	select ARCH_USES_GETTIMEOFFSET
-	select HAVE_S3C2410_I2C
-	select HAVE_S3C_RTC
+	select HAVE_S3C2410_I2C if I2C
+	select HAVE_S3C_RTC if RTC_CLASS
 	help
 	  Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440,
 	  SMDK6450.
@@ -701,7 +701,7 @@
 	select GENERIC_GPIO
 	select HAVE_CLK
 	select ARCH_USES_GETTIMEOFFSET
-	select HAVE_S3C2410_WATCHDOG
+	select HAVE_S3C2410_WATCHDOG if WATCHDOG
 	help
 	  Samsung S5P6442 CPU based systems
 
@@ -712,9 +712,9 @@
 	select CPU_V7
 	select ARM_L1_CACHE_SHIFT_6
 	select ARCH_USES_GETTIMEOFFSET
-	select HAVE_S3C2410_I2C
-	select HAVE_S3C_RTC
-	select HAVE_S3C2410_WATCHDOG
+	select HAVE_S3C2410_I2C if I2C
+	select HAVE_S3C_RTC if RTC_CLASS
+	select HAVE_S3C2410_WATCHDOG if WATCHDOG
 	help
 	  Samsung S5PC100 series based systems
 
@@ -727,9 +727,9 @@
 	select ARM_L1_CACHE_SHIFT_6
 	select ARCH_HAS_CPUFREQ
 	select ARCH_USES_GETTIMEOFFSET
-	select HAVE_S3C2410_I2C
-	select HAVE_S3C_RTC
-	select HAVE_S3C2410_WATCHDOG
+	select HAVE_S3C2410_I2C if I2C
+	select HAVE_S3C_RTC if RTC_CLASS
+	select HAVE_S3C2410_WATCHDOG if WATCHDOG
 	help
 	  Samsung S5PV210/S5PC110 series based systems
 
@@ -740,9 +740,9 @@
 	select GENERIC_GPIO
 	select HAVE_CLK
 	select GENERIC_CLOCKEVENTS
-	select HAVE_S3C_RTC
-	select HAVE_S3C2410_I2C
-	select HAVE_S3C2410_WATCHDOG
+	select HAVE_S3C_RTC if RTC_CLASS
+	select HAVE_S3C2410_I2C if I2C
+	select HAVE_S3C2410_WATCHDOG if WATCHDOG
 	help
 	  Samsung S5PV310 series based systems
 
@@ -1206,10 +1206,11 @@
 	depends on EXPERIMENTAL
 	depends on GENERIC_CLOCKEVENTS
 	depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \
-		 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\
-		 ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4
+		 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \
+		 ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \
+		 ARCH_MSM_SCORPIONMP
 	select USE_GENERIC_SMP_HELPERS
-	select HAVE_ARM_SCU
+	select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
@@ -1284,6 +1285,7 @@
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
 	depends on SMP && HOTPLUG && EXPERIMENTAL
+	depends on !ARCH_MSM
 	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
@@ -1292,7 +1294,7 @@
 	bool "Use local timer interrupts"
 	depends on SMP
 	default y
-	select HAVE_ARM_TWD
+	select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP
 	help
 	  Enable support for local timers on SMP platforms, rather then the
 	  legacy IPI broadcast method.  Local timers allows the system
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 6825c34..9be21ba 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -1084,6 +1084,6 @@
 reloc_end:
 
 		.align
-		.section ".stack", "w"
+		.section ".stack", "aw", %nobits
 user_stack:	.space	4096
 user_stack_end:
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index d081689..366a924 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -57,7 +57,7 @@
   .bss			: { *(.bss) }
   _end = .;
 
-  .stack (NOLOAD)	: { *(.stack) }
+  .stack		: { *(.stack) }
 
   .stab 0		: { *(.stab) }
   .stabstr 0		: { *(.stabstr) }
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 062b58c..749bb66 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -238,7 +238,7 @@
 	@ Slightly optimised to avoid incrementing the pointer twice
 	usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort
 	.if	\rept == 2
-	usraccoff \instr, \reg, \ptr, \inc, 4, \cond, \abort
+	usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort
 	.endif
 
 	add\cond \ptr, #\rept * \inc
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 68870c7..b4ffe9d 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -13,6 +13,10 @@
 
 #ifdef CONFIG_CPU_HAS_ASID
 #define ASID(mm)	((mm)->context.id & 255)
+
+/* init_mm.context.id_lock should be initialized. */
+#define INIT_MM_CONTEXT(name)                                                 \
+	.context.id_lock    = __SPIN_LOCK_UNLOCKED(name.context.id_lock),
 #else
 #define ASID(mm)	(0)
 #endif
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index b155414..53d1d5d 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -374,6 +374,9 @@
 
 #define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd)))
 
+/* we don't need complex calculations here as the pmd is folded into the pgd */
+#define pmd_addr_end(addr,end)	(end)
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 07a5035..421a4bb 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -4,9 +4,7 @@
  * ARM performance counter support.
  *
  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
- *
- * ARMv7 support: Jean Pihet <jpihet@mvista.com>
- * 2010 (c) MontaVista Software, LLC.
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
  *
  * This code is based on the sparc64 perf event code, which is in turn based
  * on the x86 code. Callchain code is based on the ARM OProfile backtrace
@@ -69,29 +67,23 @@
 };
 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 
-/* PMU names. */
-static const char *arm_pmu_names[] = {
-	[ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
-	[ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
-	[ARM_PERF_PMU_ID_V6]	  = "v6",
-	[ARM_PERF_PMU_ID_V6MP]	  = "v6mpcore",
-	[ARM_PERF_PMU_ID_CA8]	  = "ARMv7 Cortex-A8",
-	[ARM_PERF_PMU_ID_CA9]	  = "ARMv7 Cortex-A9",
-};
-
 struct arm_pmu {
 	enum arm_perf_pmu_ids id;
+	const char	*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct hw_perf_event *evt, int idx);
 	void		(*disable)(struct hw_perf_event *evt, int idx);
-	int		(*event_map)(int evt);
-	u64		(*raw_event)(u64);
 	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
 					 struct hw_perf_event *hwc);
 	u32		(*read_counter)(int idx);
 	void		(*write_counter)(int idx, u32 val);
 	void		(*start)(void);
 	void		(*stop)(void);
+	const unsigned	(*cache_map)[PERF_COUNT_HW_CACHE_MAX]
+				    [PERF_COUNT_HW_CACHE_OP_MAX]
+				    [PERF_COUNT_HW_CACHE_RESULT_MAX];
+	const unsigned	(*event_map)[PERF_COUNT_HW_MAX];
+	u32		raw_event_mask;
 	int		num_events;
 	u64		max_period;
 };
@@ -136,10 +128,6 @@
 
 #define CACHE_OP_UNSUPPORTED		0xFFFF
 
-static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-				     [PERF_COUNT_HW_CACHE_OP_MAX]
-				     [PERF_COUNT_HW_CACHE_RESULT_MAX];
-
 static int
 armpmu_map_cache_event(u64 config)
 {
@@ -157,7 +145,7 @@
 	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		return -EINVAL;
 
-	ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
+	ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
 
 	if (ret == CACHE_OP_UNSUPPORTED)
 		return -ENOENT;
@@ -166,6 +154,19 @@
 }
 
 static int
+armpmu_map_event(u64 config)
+{
+	int mapping = (*armpmu->event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
+}
+
+static int
+armpmu_map_raw_event(u64 config)
+{
+	return (int)(config & armpmu->raw_event_mask);
+}
+
+static int
 armpmu_event_set_period(struct perf_event *event,
 			struct hw_perf_event *hwc,
 			int idx)
@@ -458,11 +459,11 @@
 
 	/* Decode the generic type into an ARM event identifier. */
 	if (PERF_TYPE_HARDWARE == event->attr.type) {
-		mapping = armpmu->event_map(event->attr.config);
+		mapping = armpmu_map_event(event->attr.config);
 	} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
 		mapping = armpmu_map_cache_event(event->attr.config);
 	} else if (PERF_TYPE_RAW == event->attr.type) {
-		mapping = armpmu->raw_event(event->attr.config);
+		mapping = armpmu_map_raw_event(event->attr.config);
 	} else {
 		pr_debug("event type %x not supported\n", event->attr.type);
 		return -EOPNOTSUPP;
@@ -603,2366 +604,10 @@
 	.read		= armpmu_read,
 };
 
-/*
- * ARMv6 Performance counter handling code.
- *
- * ARMv6 has 2 configurable performance counters and a single cycle counter.
- * They all share a single reset bit but can be written to zero so we can use
- * that for a reset.
- *
- * The counters can't be individually enabled or disabled so when we remove
- * one event and replace it with another we could get spurious counts from the
- * wrong event. However, we can take advantage of the fact that the
- * performance counters can export events to the event bus, and the event bus
- * itself can be monitored. This requires that we *don't* export the events to
- * the event bus. The procedure for disabling a configurable counter is:
- *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
- *	  effectively stops the counter from counting.
- *	- disable the counter's interrupt generation (each counter has it's
- *	  own interrupt enable bit).
- * Once stopped, the counter value can be written as 0 to reset.
- *
- * To enable a counter:
- *	- enable the counter's interrupt generation.
- *	- set the new event type.
- *
- * Note: the dedicated cycle counter only counts cycles and can't be
- * enabled/disabled independently of the others. When we want to disable the
- * cycle counter, we have to just disable the interrupt reporting and start
- * ignoring that counter. When re-enabling, we have to reset the value and
- * enable the interrupt.
- */
-
-enum armv6_perf_types {
-	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
-	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
-	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
-	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
-	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
-	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
-	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
-	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
-	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
-	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
-	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
-	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
-	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
-	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
-	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
-	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
-	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
-	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
-	ARMV6_PERFCTR_NOP		    = 0x20,
-};
-
-enum armv6_counters {
-	ARMV6_CYCLE_COUNTER = 1,
-	ARMV6_COUNTER0,
-	ARMV6_COUNTER1,
-};
-
-/*
- * The hardware events that we support. We do support cache operations but
- * we have harvard caches and no way to combine instruction and data
- * accesses/misses in hardware.
- */
-static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
-	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6_PERFCTR_INSTR_EXEC,
-	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
-	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6_PERFCTR_BR_MISPREDICT,
-	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
-};
-
-static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-					  [PERF_COUNT_HW_CACHE_OP_MAX]
-					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
-	[C(L1D)] = {
-		/*
-		 * The performance counters don't differentiate between read
-		 * and write accesses/misses so this isn't strictly correct,
-		 * but it's the best we can do. Writes and reads get
-		 * combined.
-		 */
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
-			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
-			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(L1I)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(LL)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(DTLB)] = {
-		/*
-		 * The ARM performance counters can count micro DTLB misses,
-		 * micro ITLB misses and main TLB misses. There isn't an event
-		 * for TLB misses, so use the micro misses here and if users
-		 * want the main TLB misses they can use a raw counter.
-		 */
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(ITLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(BPU)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-};
-
-enum armv6mpcore_perf_types {
-	ARMV6MPCORE_PERFCTR_ICACHE_MISS	    = 0x0,
-	ARMV6MPCORE_PERFCTR_IBUF_STALL	    = 0x1,
-	ARMV6MPCORE_PERFCTR_DDEP_STALL	    = 0x2,
-	ARMV6MPCORE_PERFCTR_ITLB_MISS	    = 0x3,
-	ARMV6MPCORE_PERFCTR_DTLB_MISS	    = 0x4,
-	ARMV6MPCORE_PERFCTR_BR_EXEC	    = 0x5,
-	ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
-	ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
-	ARMV6MPCORE_PERFCTR_INSTR_EXEC	    = 0x8,
-	ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
-	ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
-	ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
-	ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
-	ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
-	ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
-	ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
-	ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
-	ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
-	ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
-	ARMV6MPCORE_PERFCTR_CPU_CYCLES	    = 0xFF,
-};
-
-/*
- * The hardware events that we support. We do support cache operations but
- * we have harvard caches and no way to combine instruction and data
- * accesses/misses in hardware.
- */
-static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
-	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
-	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
-	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
-	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
-};
-
-static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-					[PERF_COUNT_HW_CACHE_OP_MAX]
-					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
-	[C(L1D)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]  =
-				ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
-			[C(RESULT_MISS)]    =
-				ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]  =
-				ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
-			[C(RESULT_MISS)]    =
-				ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(L1I)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(LL)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(DTLB)] = {
-		/*
-		 * The ARM performance counters can count micro DTLB misses,
-		 * micro ITLB misses and main TLB misses. There isn't an event
-		 * for TLB misses, so use the micro misses here and if users
-		 * want the main TLB misses they can use a raw counter.
-		 */
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(ITLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(BPU)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-	},
-};
-
-static inline unsigned long
-armv6_pmcr_read(void)
-{
-	u32 val;
-	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
-	return val;
-}
-
-static inline void
-armv6_pmcr_write(unsigned long val)
-{
-	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
-}
-
-#define ARMV6_PMCR_ENABLE		(1 << 0)
-#define ARMV6_PMCR_CTR01_RESET		(1 << 1)
-#define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
-#define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
-#define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
-#define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
-#define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
-#define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
-#define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
-#define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
-#define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
-#define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
-#define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
-#define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
-
-#define ARMV6_PMCR_OVERFLOWED_MASK \
-	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
-	 ARMV6_PMCR_CCOUNT_OVERFLOW)
-
-static inline int
-armv6_pmcr_has_overflowed(unsigned long pmcr)
-{
-	return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
-}
-
-static inline int
-armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
-				  enum armv6_counters counter)
-{
-	int ret = 0;
-
-	if (ARMV6_CYCLE_COUNTER == counter)
-		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
-	else if (ARMV6_COUNTER0 == counter)
-		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
-	else if (ARMV6_COUNTER1 == counter)
-		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
-	else
-		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-
-	return ret;
-}
-
-static inline u32
-armv6pmu_read_counter(int counter)
-{
-	unsigned long value = 0;
-
-	if (ARMV6_CYCLE_COUNTER == counter)
-		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
-	else if (ARMV6_COUNTER0 == counter)
-		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
-	else if (ARMV6_COUNTER1 == counter)
-		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
-	else
-		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-
-	return value;
-}
-
-static inline void
-armv6pmu_write_counter(int counter,
-		       u32 value)
-{
-	if (ARMV6_CYCLE_COUNTER == counter)
-		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
-	else if (ARMV6_COUNTER0 == counter)
-		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
-	else if (ARMV6_COUNTER1 == counter)
-		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
-	else
-		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-}
-
-void
-armv6pmu_enable_event(struct hw_perf_event *hwc,
-		      int idx)
-{
-	unsigned long val, mask, evt, flags;
-
-	if (ARMV6_CYCLE_COUNTER == idx) {
-		mask	= 0;
-		evt	= ARMV6_PMCR_CCOUNT_IEN;
-	} else if (ARMV6_COUNTER0 == idx) {
-		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
-		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
-			  ARMV6_PMCR_COUNT0_IEN;
-	} else if (ARMV6_COUNTER1 == idx) {
-		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
-		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
-			  ARMV6_PMCR_COUNT1_IEN;
-	} else {
-		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
-		return;
-	}
-
-	/*
-	 * Mask out the current event and set the counter to count the event
-	 * that we're interested in.
-	 */
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = armv6_pmcr_read();
-	val &= ~mask;
-	val |= evt;
-	armv6_pmcr_write(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static irqreturn_t
-armv6pmu_handle_irq(int irq_num,
-		    void *dev)
-{
-	unsigned long pmcr = armv6_pmcr_read();
-	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
-	struct pt_regs *regs;
-	int idx;
-
-	if (!armv6_pmcr_has_overflowed(pmcr))
-		return IRQ_NONE;
-
-	regs = get_irq_regs();
-
-	/*
-	 * The interrupts are cleared by writing the overflow flags back to
-	 * the control register. All of the other bits don't have any effect
-	 * if they are rewritten, so write the whole value back.
-	 */
-	armv6_pmcr_write(pmcr);
-
-	perf_sample_data_init(&data, 0);
-
-	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
-		struct perf_event *event = cpuc->events[idx];
-		struct hw_perf_event *hwc;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		/*
-		 * We have a single interrupt for all counters. Check that
-		 * each counter has overflowed before we process it.
-		 */
-		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
-			continue;
-
-		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
-		data.period = event->hw.last_period;
-		if (!armpmu_event_set_period(event, hwc, idx))
-			continue;
-
-		if (perf_event_overflow(event, 0, &data, regs))
-			armpmu->disable(hwc, idx);
-	}
-
-	/*
-	 * Handle the pending perf events.
-	 *
-	 * Note: this call *must* be run with interrupts disabled. For
-	 * platforms that can have the PMU interrupts raised as an NMI, this
-	 * will not work.
-	 */
-	irq_work_run();
-
-	return IRQ_HANDLED;
-}
-
-static void
-armv6pmu_start(void)
-{
-	unsigned long flags, val;
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = armv6_pmcr_read();
-	val |= ARMV6_PMCR_ENABLE;
-	armv6_pmcr_write(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-void
-armv6pmu_stop(void)
-{
-	unsigned long flags, val;
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = armv6_pmcr_read();
-	val &= ~ARMV6_PMCR_ENABLE;
-	armv6_pmcr_write(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static inline int
-armv6pmu_event_map(int config)
-{
-	int mapping = armv6_perf_map[config];
-	if (HW_OP_UNSUPPORTED == mapping)
-		mapping = -EOPNOTSUPP;
-	return mapping;
-}
-
-static inline int
-armv6mpcore_pmu_event_map(int config)
-{
-	int mapping = armv6mpcore_perf_map[config];
-	if (HW_OP_UNSUPPORTED == mapping)
-		mapping = -EOPNOTSUPP;
-	return mapping;
-}
-
-static u64
-armv6pmu_raw_event(u64 config)
-{
-	return config & 0xff;
-}
-
-static int
-armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
-		       struct hw_perf_event *event)
-{
-	/* Always place a cycle counter into the cycle counter. */
-	if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
-		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
-			return -EAGAIN;
-
-		return ARMV6_CYCLE_COUNTER;
-	} else {
-		/*
-		 * For anything other than a cycle counter, try and use
-		 * counter0 and counter1.
-		 */
-		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
-			return ARMV6_COUNTER1;
-		}
-
-		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
-			return ARMV6_COUNTER0;
-		}
-
-		/* The counters are all in use. */
-		return -EAGAIN;
-	}
-}
-
-static void
-armv6pmu_disable_event(struct hw_perf_event *hwc,
-		       int idx)
-{
-	unsigned long val, mask, evt, flags;
-
-	if (ARMV6_CYCLE_COUNTER == idx) {
-		mask	= ARMV6_PMCR_CCOUNT_IEN;
-		evt	= 0;
-	} else if (ARMV6_COUNTER0 == idx) {
-		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
-		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
-	} else if (ARMV6_COUNTER1 == idx) {
-		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
-		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
-	} else {
-		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
-		return;
-	}
-
-	/*
-	 * Mask out the current event and set the counter to count the number
-	 * of ETM bus signal assertion cycles. The external reporting should
-	 * be disabled and so this should never increment.
-	 */
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = armv6_pmcr_read();
-	val &= ~mask;
-	val |= evt;
-	armv6_pmcr_write(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static void
-armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
-			      int idx)
-{
-	unsigned long val, mask, flags, evt = 0;
-
-	if (ARMV6_CYCLE_COUNTER == idx) {
-		mask	= ARMV6_PMCR_CCOUNT_IEN;
-	} else if (ARMV6_COUNTER0 == idx) {
-		mask	= ARMV6_PMCR_COUNT0_IEN;
-	} else if (ARMV6_COUNTER1 == idx) {
-		mask	= ARMV6_PMCR_COUNT1_IEN;
-	} else {
-		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
-		return;
-	}
-
-	/*
-	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
-	 * simply disable the interrupt reporting.
-	 */
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = armv6_pmcr_read();
-	val &= ~mask;
-	val |= evt;
-	armv6_pmcr_write(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static const struct arm_pmu armv6pmu = {
-	.id			= ARM_PERF_PMU_ID_V6,
-	.handle_irq		= armv6pmu_handle_irq,
-	.enable			= armv6pmu_enable_event,
-	.disable		= armv6pmu_disable_event,
-	.event_map		= armv6pmu_event_map,
-	.raw_event		= armv6pmu_raw_event,
-	.read_counter		= armv6pmu_read_counter,
-	.write_counter		= armv6pmu_write_counter,
-	.get_event_idx		= armv6pmu_get_event_idx,
-	.start			= armv6pmu_start,
-	.stop			= armv6pmu_stop,
-	.num_events		= 3,
-	.max_period		= (1LLU << 32) - 1,
-};
-
-/*
- * ARMv6mpcore is almost identical to single core ARMv6 with the exception
- * that some of the events have different enumerations and that there is no
- * *hack* to stop the programmable counters. To stop the counters we simply
- * disable the interrupt reporting and update the event. When unthrottling we
- * reset the period and enable the interrupt reporting.
- */
-static const struct arm_pmu armv6mpcore_pmu = {
-	.id			= ARM_PERF_PMU_ID_V6MP,
-	.handle_irq		= armv6pmu_handle_irq,
-	.enable			= armv6pmu_enable_event,
-	.disable		= armv6mpcore_pmu_disable_event,
-	.event_map		= armv6mpcore_pmu_event_map,
-	.raw_event		= armv6pmu_raw_event,
-	.read_counter		= armv6pmu_read_counter,
-	.write_counter		= armv6pmu_write_counter,
-	.get_event_idx		= armv6pmu_get_event_idx,
-	.start			= armv6pmu_start,
-	.stop			= armv6pmu_stop,
-	.num_events		= 3,
-	.max_period		= (1LLU << 32) - 1,
-};
-
-/*
- * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
- *
- * Copied from ARMv6 code, with the low level code inspired
- *  by the ARMv7 Oprofile code.
- *
- * Cortex-A8 has up to 4 configurable performance counters and
- *  a single cycle counter.
- * Cortex-A9 has up to 31 configurable performance counters and
- *  a single cycle counter.
- *
- * All counters can be enabled/disabled and IRQ masked separately. The cycle
- *  counter and all 4 performance counters together can be reset separately.
- */
-
-/* Common ARMv7 event types */
-enum armv7_perf_types {
-	ARMV7_PERFCTR_PMNC_SW_INCR		= 0x00,
-	ARMV7_PERFCTR_IFETCH_MISS		= 0x01,
-	ARMV7_PERFCTR_ITLB_MISS			= 0x02,
-	ARMV7_PERFCTR_DCACHE_REFILL		= 0x03,
-	ARMV7_PERFCTR_DCACHE_ACCESS		= 0x04,
-	ARMV7_PERFCTR_DTLB_REFILL		= 0x05,
-	ARMV7_PERFCTR_DREAD			= 0x06,
-	ARMV7_PERFCTR_DWRITE			= 0x07,
-
-	ARMV7_PERFCTR_EXC_TAKEN			= 0x09,
-	ARMV7_PERFCTR_EXC_EXECUTED		= 0x0A,
-	ARMV7_PERFCTR_CID_WRITE			= 0x0B,
-	/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
-	 * It counts:
-	 *  - all branch instructions,
-	 *  - instructions that explicitly write the PC,
-	 *  - exception generating instructions.
-	 */
-	ARMV7_PERFCTR_PC_WRITE			= 0x0C,
-	ARMV7_PERFCTR_PC_IMM_BRANCH		= 0x0D,
-	ARMV7_PERFCTR_UNALIGNED_ACCESS		= 0x0F,
-	ARMV7_PERFCTR_PC_BRANCH_MIS_PRED	= 0x10,
-	ARMV7_PERFCTR_CLOCK_CYCLES		= 0x11,
-
-	ARMV7_PERFCTR_PC_BRANCH_MIS_USED	= 0x12,
-
-	ARMV7_PERFCTR_CPU_CYCLES		= 0xFF
-};
-
-/* ARMv7 Cortex-A8 specific event types */
-enum armv7_a8_perf_types {
-	ARMV7_PERFCTR_INSTR_EXECUTED		= 0x08,
-
-	ARMV7_PERFCTR_PC_PROC_RETURN		= 0x0E,
-
-	ARMV7_PERFCTR_WRITE_BUFFER_FULL		= 0x40,
-	ARMV7_PERFCTR_L2_STORE_MERGED		= 0x41,
-	ARMV7_PERFCTR_L2_STORE_BUFF		= 0x42,
-	ARMV7_PERFCTR_L2_ACCESS			= 0x43,
-	ARMV7_PERFCTR_L2_CACH_MISS		= 0x44,
-	ARMV7_PERFCTR_AXI_READ_CYCLES		= 0x45,
-	ARMV7_PERFCTR_AXI_WRITE_CYCLES		= 0x46,
-	ARMV7_PERFCTR_MEMORY_REPLAY		= 0x47,
-	ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY	= 0x48,
-	ARMV7_PERFCTR_L1_DATA_MISS		= 0x49,
-	ARMV7_PERFCTR_L1_INST_MISS		= 0x4A,
-	ARMV7_PERFCTR_L1_DATA_COLORING		= 0x4B,
-	ARMV7_PERFCTR_L1_NEON_DATA		= 0x4C,
-	ARMV7_PERFCTR_L1_NEON_CACH_DATA		= 0x4D,
-	ARMV7_PERFCTR_L2_NEON			= 0x4E,
-	ARMV7_PERFCTR_L2_NEON_HIT		= 0x4F,
-	ARMV7_PERFCTR_L1_INST			= 0x50,
-	ARMV7_PERFCTR_PC_RETURN_MIS_PRED	= 0x51,
-	ARMV7_PERFCTR_PC_BRANCH_FAILED		= 0x52,
-	ARMV7_PERFCTR_PC_BRANCH_TAKEN		= 0x53,
-	ARMV7_PERFCTR_PC_BRANCH_EXECUTED	= 0x54,
-	ARMV7_PERFCTR_OP_EXECUTED		= 0x55,
-	ARMV7_PERFCTR_CYCLES_INST_STALL		= 0x56,
-	ARMV7_PERFCTR_CYCLES_INST		= 0x57,
-	ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL	= 0x58,
-	ARMV7_PERFCTR_CYCLES_NEON_INST_STALL	= 0x59,
-	ARMV7_PERFCTR_NEON_CYCLES		= 0x5A,
-
-	ARMV7_PERFCTR_PMU0_EVENTS		= 0x70,
-	ARMV7_PERFCTR_PMU1_EVENTS		= 0x71,
-	ARMV7_PERFCTR_PMU_EVENTS		= 0x72,
-};
-
-/* ARMv7 Cortex-A9 specific event types */
-enum armv7_a9_perf_types {
-	ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC	= 0x40,
-	ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC	= 0x41,
-	ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC	= 0x42,
-
-	ARMV7_PERFCTR_COHERENT_LINE_MISS	= 0x50,
-	ARMV7_PERFCTR_COHERENT_LINE_HIT		= 0x51,
-
-	ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES	= 0x60,
-	ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES	= 0x61,
-	ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES	= 0x62,
-	ARMV7_PERFCTR_STREX_EXECUTED_PASSED	= 0x63,
-	ARMV7_PERFCTR_STREX_EXECUTED_FAILED	= 0x64,
-	ARMV7_PERFCTR_DATA_EVICTION		= 0x65,
-	ARMV7_PERFCTR_ISSUE_STAGE_NO_INST	= 0x66,
-	ARMV7_PERFCTR_ISSUE_STAGE_EMPTY		= 0x67,
-	ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE	= 0x68,
-
-	ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS	= 0x6E,
-
-	ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST	= 0x70,
-	ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST	= 0x71,
-	ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST	= 0x72,
-	ARMV7_PERFCTR_FP_EXECUTED_INST		= 0x73,
-	ARMV7_PERFCTR_NEON_EXECUTED_INST	= 0x74,
-
-	ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES	= 0x80,
-	ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES	= 0x81,
-	ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES	= 0x82,
-	ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES	= 0x83,
-	ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES	= 0x84,
-	ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES 	= 0x85,
-	ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES	= 0x86,
-
-	ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES	= 0x8A,
-	ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES	= 0x8B,
-
-	ARMV7_PERFCTR_ISB_INST			= 0x90,
-	ARMV7_PERFCTR_DSB_INST			= 0x91,
-	ARMV7_PERFCTR_DMB_INST			= 0x92,
-	ARMV7_PERFCTR_EXT_INTERRUPTS		= 0x93,
-
-	ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED	= 0xA0,
-	ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED	= 0xA1,
-	ARMV7_PERFCTR_PLE_FIFO_FLUSH		= 0xA2,
-	ARMV7_PERFCTR_PLE_RQST_COMPLETED	= 0xA3,
-	ARMV7_PERFCTR_PLE_FIFO_OVERFLOW		= 0xA4,
-	ARMV7_PERFCTR_PLE_RQST_PROG		= 0xA5
-};
-
-/*
- * Cortex-A8 HW events mapping
- *
- * The hardware events that we support. We do support cache operations but
- * we have harvard caches and no way to combine instruction and data
- * accesses/misses in hardware.
- */
-static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
-	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
-	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
-};
-
-static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-					  [PERF_COUNT_HW_CACHE_OP_MAX]
-					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
-	[C(L1D)] = {
-		/*
-		 * The performance counters don't differentiate between read
-		 * and write accesses/misses so this isn't strictly correct,
-		 * but it's the best we can do. Writes and reads get
-		 * combined.
-		 */
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(L1I)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_INST,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_INST_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_INST,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_INST_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(LL)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_ACCESS,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACH_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_ACCESS,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACH_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(DTLB)] = {
-		/*
-		 * Only ITLB misses and DTLB refills are supported.
-		 * If users want the DTLB refills misses a raw counter
-		 * must be used.
-		 */
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(ITLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(BPU)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
-			[C(RESULT_MISS)]
-					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
-			[C(RESULT_MISS)]
-					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-};
-
-/*
- * Cortex-A9 HW events mapping
- */
-static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
-	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]	    =
-					ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
-	[PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
-	[PERF_COUNT_HW_CACHE_MISSES]	    = ARMV7_PERFCTR_COHERENT_LINE_MISS,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
-	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
-};
-
-static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-					  [PERF_COUNT_HW_CACHE_OP_MAX]
-					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
-	[C(L1D)] = {
-		/*
-		 * The performance counters don't differentiate between read
-		 * and write accesses/misses so this isn't strictly correct,
-		 * but it's the best we can do. Writes and reads get
-		 * combined.
-		 */
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(L1I)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(LL)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(DTLB)] = {
-		/*
-		 * Only ITLB misses and DTLB refills are supported.
-		 * If users want the DTLB refills misses a raw counter
-		 * must be used.
-		 */
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(ITLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(BPU)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
-			[C(RESULT_MISS)]
-					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
-			[C(RESULT_MISS)]
-					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-};
-
-/*
- * Perf Events counters
- */
-enum armv7_counters {
-	ARMV7_CYCLE_COUNTER 		= 1,	/* Cycle counter */
-	ARMV7_COUNTER0			= 2,	/* First event counter */
-};
-
-/*
- * The cycle counter is ARMV7_CYCLE_COUNTER.
- * The first event counter is ARMV7_COUNTER0.
- * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
- */
-#define	ARMV7_COUNTER_LAST	(ARMV7_COUNTER0 + armpmu->num_events - 1)
-
-/*
- * ARMv7 low level PMNC access
- */
-
-/*
- * Per-CPU PMNC: config reg
- */
-#define ARMV7_PMNC_E		(1 << 0) /* Enable all counters */
-#define ARMV7_PMNC_P		(1 << 1) /* Reset all counters */
-#define ARMV7_PMNC_C		(1 << 2) /* Cycle counter reset */
-#define ARMV7_PMNC_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
-#define ARMV7_PMNC_X		(1 << 4) /* Export to ETM */
-#define ARMV7_PMNC_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
-#define	ARMV7_PMNC_N_SHIFT	11	 /* Number of counters supported */
-#define	ARMV7_PMNC_N_MASK	0x1f
-#define	ARMV7_PMNC_MASK		0x3f	 /* Mask for writable bits */
-
-/*
- * Available counters
- */
-#define ARMV7_CNT0 		0	/* First event counter */
-#define ARMV7_CCNT 		31	/* Cycle counter */
-
-/* Perf Event to low level counters mapping */
-#define ARMV7_EVENT_CNT_TO_CNTx	(ARMV7_COUNTER0 - ARMV7_CNT0)
-
-/*
- * CNTENS: counters enable reg
- */
-#define ARMV7_CNTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * CNTENC: counters disable reg
- */
-#define ARMV7_CNTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENS: counters overflow interrupt enable reg
- */
-#define ARMV7_INTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENC: counters overflow interrupt disable reg
- */
-#define ARMV7_INTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * EVTSEL: Event selection reg
- */
-#define	ARMV7_EVTSEL_MASK	0xff		/* Mask for writable bits */
-
-/*
- * SELECT: Counter selection reg
- */
-#define	ARMV7_SELECT_MASK	0x1f		/* Mask for writable bits */
-
-/*
- * FLAG: counters overflow flag status reg
- */
-#define ARMV7_FLAG_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_FLAG_C		(1 << ARMV7_CCNT)
-#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
-#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
-
-static inline unsigned long armv7_pmnc_read(void)
-{
-	u32 val;
-	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
-	return val;
-}
-
-static inline void armv7_pmnc_write(unsigned long val)
-{
-	val &= ARMV7_PMNC_MASK;
-	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
-}
-
-static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
-{
-	return pmnc & ARMV7_OVERFLOWED_MASK;
-}
-
-static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
-					enum armv7_counters counter)
-{
-	int ret = 0;
-
-	if (counter == ARMV7_CYCLE_COUNTER)
-		ret = pmnc & ARMV7_FLAG_C;
-	else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
-		ret = pmnc & ARMV7_FLAG_P(counter);
-	else
-		pr_err("CPU%u checking wrong counter %d overflow status\n",
-			smp_processor_id(), counter);
-
-	return ret;
-}
-
-static inline int armv7_pmnc_select_counter(unsigned int idx)
-{
-	u32 val;
-
-	if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
-		pr_err("CPU%u selecting wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
-	}
-
-	val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
-	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
-
-	return idx;
-}
-
-static inline u32 armv7pmu_read_counter(int idx)
-{
-	unsigned long value = 0;
-
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mrc p15, 0, %0, c9, c13, 2"
-				     : "=r" (value));
-	} else
-		pr_err("CPU%u reading wrong counter %d\n",
-			smp_processor_id(), idx);
-
-	return value;
-}
-
-static inline void armv7pmu_write_counter(int idx, u32 value)
-{
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mcr p15, 0, %0, c9, c13, 2"
-				     : : "r" (value));
-	} else
-		pr_err("CPU%u writing wrong counter %d\n",
-			smp_processor_id(), idx);
-}
-
-static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
-{
-	if (armv7_pmnc_select_counter(idx) == idx) {
-		val &= ARMV7_EVTSEL_MASK;
-		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
-	}
-}
-
-static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
-{
-	u32 val;
-
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
-	}
-
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENS_C;
-	else
-		val = ARMV7_CNTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
-
-	return idx;
-}
-
-static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
-{
-	u32 val;
-
-
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
-	}
-
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENC_C;
-	else
-		val = ARMV7_CNTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
-
-	return idx;
-}
-
-static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
-{
-	u32 val;
-
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
-	}
-
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENS_C;
-	else
-		val = ARMV7_INTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
-
-	return idx;
-}
-
-static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
-{
-	u32 val;
-
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
-	}
-
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENC_C;
-	else
-		val = ARMV7_INTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
-
-	return idx;
-}
-
-static inline u32 armv7_pmnc_getreset_flags(void)
-{
-	u32 val;
-
-	/* Read */
-	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
-
-	/* Write to clear flags */
-	val &= ARMV7_FLAG_MASK;
-	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
-
-	return val;
-}
-
-#ifdef DEBUG
-static void armv7_pmnc_dump_regs(void)
-{
-	u32 val;
-	unsigned int cnt;
-
-	printk(KERN_INFO "PMNC registers dump:\n");
-
-	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
-	printk(KERN_INFO "PMNC  =0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
-	printk(KERN_INFO "CNTENS=0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
-	printk(KERN_INFO "INTENS=0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
-	printk(KERN_INFO "FLAGS =0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
-	printk(KERN_INFO "SELECT=0x%08x\n", val);
-
-	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
-	printk(KERN_INFO "CCNT  =0x%08x\n", val);
-
-	for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
-		armv7_pmnc_select_counter(cnt);
-		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
-		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
-		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
-		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
-	}
-}
-#endif
-
-void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
-{
-	unsigned long flags;
-
-	/*
-	 * Enable counter and interrupt, and set the counter to count
-	 * the event that we're interested in.
-	 */
-	spin_lock_irqsave(&pmu_lock, flags);
-
-	/*
-	 * Disable counter
-	 */
-	armv7_pmnc_disable_counter(idx);
-
-	/*
-	 * Set event (if destined for PMNx counters)
-	 * We don't need to set the event if it's a cycle count
-	 */
-	if (idx != ARMV7_CYCLE_COUNTER)
-		armv7_pmnc_write_evtsel(idx, hwc->config_base);
-
-	/*
-	 * Enable interrupt for this counter
-	 */
-	armv7_pmnc_enable_intens(idx);
-
-	/*
-	 * Enable counter
-	 */
-	armv7_pmnc_enable_counter(idx);
-
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
-{
-	unsigned long flags;
-
-	/*
-	 * Disable counter and interrupt
-	 */
-	spin_lock_irqsave(&pmu_lock, flags);
-
-	/*
-	 * Disable counter
-	 */
-	armv7_pmnc_disable_counter(idx);
-
-	/*
-	 * Disable interrupt for this counter
-	 */
-	armv7_pmnc_disable_intens(idx);
-
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
-{
-	unsigned long pmnc;
-	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
-	struct pt_regs *regs;
-	int idx;
-
-	/*
-	 * Get and reset the IRQ flags
-	 */
-	pmnc = armv7_pmnc_getreset_flags();
-
-	/*
-	 * Did an overflow occur?
-	 */
-	if (!armv7_pmnc_has_overflowed(pmnc))
-		return IRQ_NONE;
-
-	/*
-	 * Handle the counter(s) overflow(s)
-	 */
-	regs = get_irq_regs();
-
-	perf_sample_data_init(&data, 0);
-
-	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
-		struct perf_event *event = cpuc->events[idx];
-		struct hw_perf_event *hwc;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		/*
-		 * We have a single interrupt for all counters. Check that
-		 * each counter has overflowed before we process it.
-		 */
-		if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
-			continue;
-
-		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
-		data.period = event->hw.last_period;
-		if (!armpmu_event_set_period(event, hwc, idx))
-			continue;
-
-		if (perf_event_overflow(event, 0, &data, regs))
-			armpmu->disable(hwc, idx);
-	}
-
-	/*
-	 * Handle the pending perf events.
-	 *
-	 * Note: this call *must* be run with interrupts disabled. For
-	 * platforms that can have the PMU interrupts raised as an NMI, this
-	 * will not work.
-	 */
-	irq_work_run();
-
-	return IRQ_HANDLED;
-}
-
-static void armv7pmu_start(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	/* Enable all counters */
-	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static void armv7pmu_stop(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	/* Disable all counters */
-	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static inline int armv7_a8_pmu_event_map(int config)
-{
-	int mapping = armv7_a8_perf_map[config];
-	if (HW_OP_UNSUPPORTED == mapping)
-		mapping = -EOPNOTSUPP;
-	return mapping;
-}
-
-static inline int armv7_a9_pmu_event_map(int config)
-{
-	int mapping = armv7_a9_perf_map[config];
-	if (HW_OP_UNSUPPORTED == mapping)
-		mapping = -EOPNOTSUPP;
-	return mapping;
-}
-
-static u64 armv7pmu_raw_event(u64 config)
-{
-	return config & 0xff;
-}
-
-static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
-				  struct hw_perf_event *event)
-{
-	int idx;
-
-	/* Always place a cycle counter into the cycle counter. */
-	if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
-		if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
-			return -EAGAIN;
-
-		return ARMV7_CYCLE_COUNTER;
-	} else {
-		/*
-		 * For anything other than a cycle counter, try and use
-		 * the events counters
-		 */
-		for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
-			if (!test_and_set_bit(idx, cpuc->used_mask))
-				return idx;
-		}
-
-		/* The counters are all in use. */
-		return -EAGAIN;
-	}
-}
-
-static struct arm_pmu armv7pmu = {
-	.handle_irq		= armv7pmu_handle_irq,
-	.enable			= armv7pmu_enable_event,
-	.disable		= armv7pmu_disable_event,
-	.raw_event		= armv7pmu_raw_event,
-	.read_counter		= armv7pmu_read_counter,
-	.write_counter		= armv7pmu_write_counter,
-	.get_event_idx		= armv7pmu_get_event_idx,
-	.start			= armv7pmu_start,
-	.stop			= armv7pmu_stop,
-	.max_period		= (1LLU << 32) - 1,
-};
-
-static u32 __init armv7_reset_read_pmnc(void)
-{
-	u32 nb_cnt;
-
-	/* Initialize & Reset PMNC: C and P bits */
-	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
-
-	/* Read the nb of CNTx counters supported from PMNC */
-	nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
-
-	/* Add the CPU cycles counter and return */
-	return nb_cnt + 1;
-}
-
-/*
- * ARMv5 [xscale] Performance counter handling code.
- *
- * Based on xscale OProfile code.
- *
- * There are two variants of the xscale PMU that we support:
- * 	- xscale1pmu: 2 event counters and a cycle counter
- * 	- xscale2pmu: 4 event counters and a cycle counter
- * The two variants share event definitions, but have different
- * PMU structures.
- */
-
-enum xscale_perf_types {
-	XSCALE_PERFCTR_ICACHE_MISS		= 0x00,
-	XSCALE_PERFCTR_ICACHE_NO_DELIVER	= 0x01,
-	XSCALE_PERFCTR_DATA_STALL		= 0x02,
-	XSCALE_PERFCTR_ITLB_MISS		= 0x03,
-	XSCALE_PERFCTR_DTLB_MISS		= 0x04,
-	XSCALE_PERFCTR_BRANCH			= 0x05,
-	XSCALE_PERFCTR_BRANCH_MISS		= 0x06,
-	XSCALE_PERFCTR_INSTRUCTION		= 0x07,
-	XSCALE_PERFCTR_DCACHE_FULL_STALL	= 0x08,
-	XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG	= 0x09,
-	XSCALE_PERFCTR_DCACHE_ACCESS		= 0x0A,
-	XSCALE_PERFCTR_DCACHE_MISS		= 0x0B,
-	XSCALE_PERFCTR_DCACHE_WRITE_BACK	= 0x0C,
-	XSCALE_PERFCTR_PC_CHANGED		= 0x0D,
-	XSCALE_PERFCTR_BCU_REQUEST		= 0x10,
-	XSCALE_PERFCTR_BCU_FULL			= 0x11,
-	XSCALE_PERFCTR_BCU_DRAIN		= 0x12,
-	XSCALE_PERFCTR_BCU_ECC_NO_ELOG		= 0x14,
-	XSCALE_PERFCTR_BCU_1_BIT_ERR		= 0x15,
-	XSCALE_PERFCTR_RMW			= 0x16,
-	/* XSCALE_PERFCTR_CCNT is not hardware defined */
-	XSCALE_PERFCTR_CCNT			= 0xFE,
-	XSCALE_PERFCTR_UNUSED			= 0xFF,
-};
-
-enum xscale_counters {
-	XSCALE_CYCLE_COUNTER	= 1,
-	XSCALE_COUNTER0,
-	XSCALE_COUNTER1,
-	XSCALE_COUNTER2,
-	XSCALE_COUNTER3,
-};
-
-static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
-	[PERF_COUNT_HW_CPU_CYCLES]	    = XSCALE_PERFCTR_CCNT,
-	[PERF_COUNT_HW_INSTRUCTIONS]	    = XSCALE_PERFCTR_INSTRUCTION,
-	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
-	[PERF_COUNT_HW_BRANCH_MISSES]	    = XSCALE_PERFCTR_BRANCH_MISS,
-	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
-};
-
-static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-					   [PERF_COUNT_HW_CACHE_OP_MAX]
-					   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
-	[C(L1D)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
-			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
-			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(L1I)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ICACHE_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ICACHE_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(LL)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(DTLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(ITLB)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-	[C(BPU)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
-};
-
-#define	XSCALE_PMU_ENABLE	0x001
-#define XSCALE_PMN_RESET	0x002
-#define	XSCALE_CCNT_RESET	0x004
-#define	XSCALE_PMU_RESET	(CCNT_RESET | PMN_RESET)
-#define XSCALE_PMU_CNT64	0x008
-
-static inline int
-xscalepmu_event_map(int config)
-{
-	int mapping = xscale_perf_map[config];
-	if (HW_OP_UNSUPPORTED == mapping)
-		mapping = -EOPNOTSUPP;
-	return mapping;
-}
-
-static u64
-xscalepmu_raw_event(u64 config)
-{
-	return config & 0xff;
-}
-
-#define XSCALE1_OVERFLOWED_MASK	0x700
-#define XSCALE1_CCOUNT_OVERFLOW	0x400
-#define XSCALE1_COUNT0_OVERFLOW	0x100
-#define XSCALE1_COUNT1_OVERFLOW	0x200
-#define XSCALE1_CCOUNT_INT_EN	0x040
-#define XSCALE1_COUNT0_INT_EN	0x010
-#define XSCALE1_COUNT1_INT_EN	0x020
-#define XSCALE1_COUNT0_EVT_SHFT	12
-#define XSCALE1_COUNT0_EVT_MASK	(0xff << XSCALE1_COUNT0_EVT_SHFT)
-#define XSCALE1_COUNT1_EVT_SHFT	20
-#define XSCALE1_COUNT1_EVT_MASK	(0xff << XSCALE1_COUNT1_EVT_SHFT)
-
-static inline u32
-xscale1pmu_read_pmnc(void)
-{
-	u32 val;
-	asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
-	return val;
-}
-
-static inline void
-xscale1pmu_write_pmnc(u32 val)
-{
-	/* upper 4bits and 7, 11 are write-as-0 */
-	val &= 0xffff77f;
-	asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
-}
-
-static inline int
-xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
-					enum xscale_counters counter)
-{
-	int ret = 0;
-
-	switch (counter) {
-	case XSCALE_CYCLE_COUNTER:
-		ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
-		break;
-	case XSCALE_COUNTER0:
-		ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
-		break;
-	case XSCALE_COUNTER1:
-		ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
-		break;
-	default:
-		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-	}
-
-	return ret;
-}
-
-static irqreturn_t
-xscale1pmu_handle_irq(int irq_num, void *dev)
-{
-	unsigned long pmnc;
-	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
-	struct pt_regs *regs;
-	int idx;
-
-	/*
-	 * NOTE: there's an A stepping erratum that states if an overflow
-	 *       bit already exists and another occurs, the previous
-	 *       Overflow bit gets cleared. There's no workaround.
-	 *	 Fixed in B stepping or later.
-	 */
-	pmnc = xscale1pmu_read_pmnc();
-
-	/*
-	 * Write the value back to clear the overflow flags. Overflow
-	 * flags remain in pmnc for use below. We also disable the PMU
-	 * while we process the interrupt.
-	 */
-	xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
-
-	if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
-		return IRQ_NONE;
-
-	regs = get_irq_regs();
-
-	perf_sample_data_init(&data, 0);
-
-	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
-		struct perf_event *event = cpuc->events[idx];
-		struct hw_perf_event *hwc;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
-			continue;
-
-		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
-		data.period = event->hw.last_period;
-		if (!armpmu_event_set_period(event, hwc, idx))
-			continue;
-
-		if (perf_event_overflow(event, 0, &data, regs))
-			armpmu->disable(hwc, idx);
-	}
-
-	irq_work_run();
-
-	/*
-	 * Re-enable the PMU.
-	 */
-	pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
-	xscale1pmu_write_pmnc(pmnc);
-
-	return IRQ_HANDLED;
-}
-
-static void
-xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
-{
-	unsigned long val, mask, evt, flags;
-
-	switch (idx) {
-	case XSCALE_CYCLE_COUNTER:
-		mask = 0;
-		evt = XSCALE1_CCOUNT_INT_EN;
-		break;
-	case XSCALE_COUNTER0:
-		mask = XSCALE1_COUNT0_EVT_MASK;
-		evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
-			XSCALE1_COUNT0_INT_EN;
-		break;
-	case XSCALE_COUNTER1:
-		mask = XSCALE1_COUNT1_EVT_MASK;
-		evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
-			XSCALE1_COUNT1_INT_EN;
-		break;
-	default:
-		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
-		return;
-	}
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = xscale1pmu_read_pmnc();
-	val &= ~mask;
-	val |= evt;
-	xscale1pmu_write_pmnc(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static void
-xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
-{
-	unsigned long val, mask, evt, flags;
-
-	switch (idx) {
-	case XSCALE_CYCLE_COUNTER:
-		mask = XSCALE1_CCOUNT_INT_EN;
-		evt = 0;
-		break;
-	case XSCALE_COUNTER0:
-		mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
-		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
-		break;
-	case XSCALE_COUNTER1:
-		mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
-		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
-		break;
-	default:
-		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
-		return;
-	}
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = xscale1pmu_read_pmnc();
-	val &= ~mask;
-	val |= evt;
-	xscale1pmu_write_pmnc(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static int
-xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
-			struct hw_perf_event *event)
-{
-	if (XSCALE_PERFCTR_CCNT == event->config_base) {
-		if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
-			return -EAGAIN;
-
-		return XSCALE_CYCLE_COUNTER;
-	} else {
-		if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
-			return XSCALE_COUNTER1;
-		}
-
-		if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
-			return XSCALE_COUNTER0;
-		}
-
-		return -EAGAIN;
-	}
-}
-
-static void
-xscale1pmu_start(void)
-{
-	unsigned long flags, val;
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = xscale1pmu_read_pmnc();
-	val |= XSCALE_PMU_ENABLE;
-	xscale1pmu_write_pmnc(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static void
-xscale1pmu_stop(void)
-{
-	unsigned long flags, val;
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = xscale1pmu_read_pmnc();
-	val &= ~XSCALE_PMU_ENABLE;
-	xscale1pmu_write_pmnc(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static inline u32
-xscale1pmu_read_counter(int counter)
-{
-	u32 val = 0;
-
-	switch (counter) {
-	case XSCALE_CYCLE_COUNTER:
-		asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
-		break;
-	case XSCALE_COUNTER0:
-		asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
-		break;
-	case XSCALE_COUNTER1:
-		asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
-		break;
-	}
-
-	return val;
-}
-
-static inline void
-xscale1pmu_write_counter(int counter, u32 val)
-{
-	switch (counter) {
-	case XSCALE_CYCLE_COUNTER:
-		asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
-		break;
-	case XSCALE_COUNTER0:
-		asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
-		break;
-	case XSCALE_COUNTER1:
-		asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
-		break;
-	}
-}
-
-static const struct arm_pmu xscale1pmu = {
-	.id		= ARM_PERF_PMU_ID_XSCALE1,
-	.handle_irq	= xscale1pmu_handle_irq,
-	.enable		= xscale1pmu_enable_event,
-	.disable	= xscale1pmu_disable_event,
-	.event_map	= xscalepmu_event_map,
-	.raw_event	= xscalepmu_raw_event,
-	.read_counter	= xscale1pmu_read_counter,
-	.write_counter	= xscale1pmu_write_counter,
-	.get_event_idx	= xscale1pmu_get_event_idx,
-	.start		= xscale1pmu_start,
-	.stop		= xscale1pmu_stop,
-	.num_events	= 3,
-	.max_period	= (1LLU << 32) - 1,
-};
-
-#define XSCALE2_OVERFLOWED_MASK	0x01f
-#define XSCALE2_CCOUNT_OVERFLOW	0x001
-#define XSCALE2_COUNT0_OVERFLOW	0x002
-#define XSCALE2_COUNT1_OVERFLOW	0x004
-#define XSCALE2_COUNT2_OVERFLOW	0x008
-#define XSCALE2_COUNT3_OVERFLOW	0x010
-#define XSCALE2_CCOUNT_INT_EN	0x001
-#define XSCALE2_COUNT0_INT_EN	0x002
-#define XSCALE2_COUNT1_INT_EN	0x004
-#define XSCALE2_COUNT2_INT_EN	0x008
-#define XSCALE2_COUNT3_INT_EN	0x010
-#define XSCALE2_COUNT0_EVT_SHFT	0
-#define XSCALE2_COUNT0_EVT_MASK	(0xff << XSCALE2_COUNT0_EVT_SHFT)
-#define XSCALE2_COUNT1_EVT_SHFT	8
-#define XSCALE2_COUNT1_EVT_MASK	(0xff << XSCALE2_COUNT1_EVT_SHFT)
-#define XSCALE2_COUNT2_EVT_SHFT	16
-#define XSCALE2_COUNT2_EVT_MASK	(0xff << XSCALE2_COUNT2_EVT_SHFT)
-#define XSCALE2_COUNT3_EVT_SHFT	24
-#define XSCALE2_COUNT3_EVT_MASK	(0xff << XSCALE2_COUNT3_EVT_SHFT)
-
-static inline u32
-xscale2pmu_read_pmnc(void)
-{
-	u32 val;
-	asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
-	/* bits 1-2 and 4-23 are read-unpredictable */
-	return val & 0xff000009;
-}
-
-static inline void
-xscale2pmu_write_pmnc(u32 val)
-{
-	/* bits 4-23 are write-as-0, 24-31 are write ignored */
-	val &= 0xf;
-	asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
-}
-
-static inline u32
-xscale2pmu_read_overflow_flags(void)
-{
-	u32 val;
-	asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
-	return val;
-}
-
-static inline void
-xscale2pmu_write_overflow_flags(u32 val)
-{
-	asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
-}
-
-static inline u32
-xscale2pmu_read_event_select(void)
-{
-	u32 val;
-	asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
-	return val;
-}
-
-static inline void
-xscale2pmu_write_event_select(u32 val)
-{
-	asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
-}
-
-static inline u32
-xscale2pmu_read_int_enable(void)
-{
-	u32 val;
-	asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
-	return val;
-}
-
-static void
-xscale2pmu_write_int_enable(u32 val)
-{
-	asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
-}
-
-static inline int
-xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
-					enum xscale_counters counter)
-{
-	int ret = 0;
-
-	switch (counter) {
-	case XSCALE_CYCLE_COUNTER:
-		ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
-		break;
-	case XSCALE_COUNTER0:
-		ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
-		break;
-	case XSCALE_COUNTER1:
-		ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
-		break;
-	case XSCALE_COUNTER2:
-		ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
-		break;
-	case XSCALE_COUNTER3:
-		ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
-		break;
-	default:
-		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-	}
-
-	return ret;
-}
-
-static irqreturn_t
-xscale2pmu_handle_irq(int irq_num, void *dev)
-{
-	unsigned long pmnc, of_flags;
-	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
-	struct pt_regs *regs;
-	int idx;
-
-	/* Disable the PMU. */
-	pmnc = xscale2pmu_read_pmnc();
-	xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
-
-	/* Check the overflow flag register. */
-	of_flags = xscale2pmu_read_overflow_flags();
-	if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
-		return IRQ_NONE;
-
-	/* Clear the overflow bits. */
-	xscale2pmu_write_overflow_flags(of_flags);
-
-	regs = get_irq_regs();
-
-	perf_sample_data_init(&data, 0);
-
-	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
-		struct perf_event *event = cpuc->events[idx];
-		struct hw_perf_event *hwc;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
-			continue;
-
-		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
-		data.period = event->hw.last_period;
-		if (!armpmu_event_set_period(event, hwc, idx))
-			continue;
-
-		if (perf_event_overflow(event, 0, &data, regs))
-			armpmu->disable(hwc, idx);
-	}
-
-	irq_work_run();
-
-	/*
-	 * Re-enable the PMU.
-	 */
-	pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
-	xscale2pmu_write_pmnc(pmnc);
-
-	return IRQ_HANDLED;
-}
-
-static void
-xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
-{
-	unsigned long flags, ien, evtsel;
-
-	ien = xscale2pmu_read_int_enable();
-	evtsel = xscale2pmu_read_event_select();
-
-	switch (idx) {
-	case XSCALE_CYCLE_COUNTER:
-		ien |= XSCALE2_CCOUNT_INT_EN;
-		break;
-	case XSCALE_COUNTER0:
-		ien |= XSCALE2_COUNT0_INT_EN;
-		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
-		evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
-		break;
-	case XSCALE_COUNTER1:
-		ien |= XSCALE2_COUNT1_INT_EN;
-		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
-		evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
-		break;
-	case XSCALE_COUNTER2:
-		ien |= XSCALE2_COUNT2_INT_EN;
-		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
-		evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
-		break;
-	case XSCALE_COUNTER3:
-		ien |= XSCALE2_COUNT3_INT_EN;
-		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
-		evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
-		break;
-	default:
-		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
-		return;
-	}
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	xscale2pmu_write_event_select(evtsel);
-	xscale2pmu_write_int_enable(ien);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static void
-xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
-{
-	unsigned long flags, ien, evtsel;
-
-	ien = xscale2pmu_read_int_enable();
-	evtsel = xscale2pmu_read_event_select();
-
-	switch (idx) {
-	case XSCALE_CYCLE_COUNTER:
-		ien &= ~XSCALE2_CCOUNT_INT_EN;
-		break;
-	case XSCALE_COUNTER0:
-		ien &= ~XSCALE2_COUNT0_INT_EN;
-		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
-		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
-		break;
-	case XSCALE_COUNTER1:
-		ien &= ~XSCALE2_COUNT1_INT_EN;
-		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
-		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
-		break;
-	case XSCALE_COUNTER2:
-		ien &= ~XSCALE2_COUNT2_INT_EN;
-		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
-		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
-		break;
-	case XSCALE_COUNTER3:
-		ien &= ~XSCALE2_COUNT3_INT_EN;
-		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
-		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
-		break;
-	default:
-		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
-		return;
-	}
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	xscale2pmu_write_event_select(evtsel);
-	xscale2pmu_write_int_enable(ien);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static int
-xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
-			struct hw_perf_event *event)
-{
-	int idx = xscale1pmu_get_event_idx(cpuc, event);
-	if (idx >= 0)
-		goto out;
-
-	if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
-		idx = XSCALE_COUNTER3;
-	else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
-		idx = XSCALE_COUNTER2;
-out:
-	return idx;
-}
-
-static void
-xscale2pmu_start(void)
-{
-	unsigned long flags, val;
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
-	val |= XSCALE_PMU_ENABLE;
-	xscale2pmu_write_pmnc(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static void
-xscale2pmu_stop(void)
-{
-	unsigned long flags, val;
-
-	spin_lock_irqsave(&pmu_lock, flags);
-	val = xscale2pmu_read_pmnc();
-	val &= ~XSCALE_PMU_ENABLE;
-	xscale2pmu_write_pmnc(val);
-	spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static inline u32
-xscale2pmu_read_counter(int counter)
-{
-	u32 val = 0;
-
-	switch (counter) {
-	case XSCALE_CYCLE_COUNTER:
-		asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
-		break;
-	case XSCALE_COUNTER0:
-		asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
-		break;
-	case XSCALE_COUNTER1:
-		asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
-		break;
-	case XSCALE_COUNTER2:
-		asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
-		break;
-	case XSCALE_COUNTER3:
-		asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
-		break;
-	}
-
-	return val;
-}
-
-static inline void
-xscale2pmu_write_counter(int counter, u32 val)
-{
-	switch (counter) {
-	case XSCALE_CYCLE_COUNTER:
-		asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
-		break;
-	case XSCALE_COUNTER0:
-		asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
-		break;
-	case XSCALE_COUNTER1:
-		asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
-		break;
-	case XSCALE_COUNTER2:
-		asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
-		break;
-	case XSCALE_COUNTER3:
-		asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
-		break;
-	}
-}
-
-static const struct arm_pmu xscale2pmu = {
-	.id		= ARM_PERF_PMU_ID_XSCALE2,
-	.handle_irq	= xscale2pmu_handle_irq,
-	.enable		= xscale2pmu_enable_event,
-	.disable	= xscale2pmu_disable_event,
-	.event_map	= xscalepmu_event_map,
-	.raw_event	= xscalepmu_raw_event,
-	.read_counter	= xscale2pmu_read_counter,
-	.write_counter	= xscale2pmu_write_counter,
-	.get_event_idx	= xscale2pmu_get_event_idx,
-	.start		= xscale2pmu_start,
-	.stop		= xscale2pmu_stop,
-	.num_events	= 5,
-	.max_period	= (1LLU << 32) - 1,
-};
+/* Include the PMU-specific implementations. */
+#include "perf_event_xscale.c"
+#include "perf_event_v6.c"
+#include "perf_event_v7.c"
 
 static int __init
 init_hw_perf_events(void)
@@ -2977,37 +622,16 @@
 		case 0xB360:	/* ARM1136 */
 		case 0xB560:	/* ARM1156 */
 		case 0xB760:	/* ARM1176 */
-			armpmu = &armv6pmu;
-			memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
-					sizeof(armv6_perf_cache_map));
+			armpmu = armv6pmu_init();
 			break;
 		case 0xB020:	/* ARM11mpcore */
-			armpmu = &armv6mpcore_pmu;
-			memcpy(armpmu_perf_cache_map,
-			       armv6mpcore_perf_cache_map,
-			       sizeof(armv6mpcore_perf_cache_map));
+			armpmu = armv6mpcore_pmu_init();
 			break;
 		case 0xC080:	/* Cortex-A8 */
-			armv7pmu.id = ARM_PERF_PMU_ID_CA8;
-			memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
-				sizeof(armv7_a8_perf_cache_map));
-			armv7pmu.event_map = armv7_a8_pmu_event_map;
-			armpmu = &armv7pmu;
-
-			/* Reset PMNC and read the nb of CNTx counters
-			    supported */
-			armv7pmu.num_events = armv7_reset_read_pmnc();
+			armpmu = armv7_a8_pmu_init();
 			break;
 		case 0xC090:	/* Cortex-A9 */
-			armv7pmu.id = ARM_PERF_PMU_ID_CA9;
-			memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
-				sizeof(armv7_a9_perf_cache_map));
-			armv7pmu.event_map = armv7_a9_pmu_event_map;
-			armpmu = &armv7pmu;
-
-			/* Reset PMNC and read the nb of CNTx counters
-			    supported */
-			armv7pmu.num_events = armv7_reset_read_pmnc();
+			armpmu = armv7_a9_pmu_init();
 			break;
 		}
 	/* Intel CPUs [xscale]. */
@@ -3015,21 +639,17 @@
 		part_number = (cpuid >> 13) & 0x7;
 		switch (part_number) {
 		case 1:
-			armpmu = &xscale1pmu;
-			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
-					sizeof(xscale_perf_cache_map));
+			armpmu = xscale1pmu_init();
 			break;
 		case 2:
-			armpmu = &xscale2pmu;
-			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
-					sizeof(xscale_perf_cache_map));
+			armpmu = xscale2pmu_init();
 			break;
 		}
 	}
 
 	if (armpmu) {
 		pr_info("enabled with %s PMU driver, %d counters available\n",
-				arm_pmu_names[armpmu->id], armpmu->num_events);
+			armpmu->name, armpmu->num_events);
 	} else {
 		pr_info("no hardware support available\n");
 	}
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
new file mode 100644
index 0000000..7aeb07d
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -0,0 +1,672 @@
+/*
+ * ARMv6 Performance counter handling code.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ *
+ * ARMv6 has 2 configurable performance counters and a single cycle counter.
+ * They all share a single reset bit but can be written to zero so we can use
+ * that for a reset.
+ *
+ * The counters can't be individually enabled or disabled so when we remove
+ * one event and replace it with another we could get spurious counts from the
+ * wrong event. However, we can take advantage of the fact that the
+ * performance counters can export events to the event bus, and the event bus
+ * itself can be monitored. This requires that we *don't* export the events to
+ * the event bus. The procedure for disabling a configurable counter is:
+ *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
+ *	  effectively stops the counter from counting.
+ *	- disable the counter's interrupt generation (each counter has it's
+ *	  own interrupt enable bit).
+ * Once stopped, the counter value can be written as 0 to reset.
+ *
+ * To enable a counter:
+ *	- enable the counter's interrupt generation.
+ *	- set the new event type.
+ *
+ * Note: the dedicated cycle counter only counts cycles and can't be
+ * enabled/disabled independently of the others. When we want to disable the
+ * cycle counter, we have to just disable the interrupt reporting and start
+ * ignoring that counter. When re-enabling, we have to reset the value and
+ * enable the interrupt.
+ */
+
+#ifdef CONFIG_CPU_V6
+enum armv6_perf_types {
+	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
+	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
+	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
+	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
+	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
+	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
+	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
+	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
+	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
+	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
+	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
+	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
+	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
+	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
+	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
+	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
+	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
+	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
+	ARMV6_PERFCTR_NOP		    = 0x20,
+};
+
+enum armv6_counters {
+	ARMV6_CYCLE_COUNTER = 1,
+	ARMV6_COUNTER0,
+	ARMV6_COUNTER1,
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6_PERFCTR_INSTR_EXEC,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6_PERFCTR_BR_MISPREDICT,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
+};
+
+static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		/*
+		 * The performance counters don't differentiate between read
+		 * and write accesses/misses so this isn't strictly correct,
+		 * but it's the best we can do. Writes and reads get
+		 * combined.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		/*
+		 * The ARM performance counters can count micro DTLB misses,
+		 * micro ITLB misses and main TLB misses. There isn't an event
+		 * for TLB misses, so use the micro misses here and if users
+		 * want the main TLB misses they can use a raw counter.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+enum armv6mpcore_perf_types {
+	ARMV6MPCORE_PERFCTR_ICACHE_MISS	    = 0x0,
+	ARMV6MPCORE_PERFCTR_IBUF_STALL	    = 0x1,
+	ARMV6MPCORE_PERFCTR_DDEP_STALL	    = 0x2,
+	ARMV6MPCORE_PERFCTR_ITLB_MISS	    = 0x3,
+	ARMV6MPCORE_PERFCTR_DTLB_MISS	    = 0x4,
+	ARMV6MPCORE_PERFCTR_BR_EXEC	    = 0x5,
+	ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
+	ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
+	ARMV6MPCORE_PERFCTR_INSTR_EXEC	    = 0x8,
+	ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
+	ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
+	ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
+	ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
+	ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
+	ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
+	ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
+	ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
+	ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
+	ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
+	ARMV6MPCORE_PERFCTR_CPU_CYCLES	    = 0xFF,
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
+};
+
+static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  =
+				ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
+			[C(RESULT_MISS)]    =
+				ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  =
+				ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
+			[C(RESULT_MISS)]    =
+				ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		/*
+		 * The ARM performance counters can count micro DTLB misses,
+		 * micro ITLB misses and main TLB misses. There isn't an event
+		 * for TLB misses, so use the micro misses here and if users
+		 * want the main TLB misses they can use a raw counter.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+static inline unsigned long
+armv6_pmcr_read(void)
+{
+	u32 val;
+	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
+	return val;
+}
+
+static inline void
+armv6_pmcr_write(unsigned long val)
+{
+	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
+}
+
+#define ARMV6_PMCR_ENABLE		(1 << 0)
+#define ARMV6_PMCR_CTR01_RESET		(1 << 1)
+#define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
+#define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
+#define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
+#define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
+#define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
+#define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
+#define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
+#define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
+#define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
+#define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
+#define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
+#define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
+
+#define ARMV6_PMCR_OVERFLOWED_MASK \
+	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
+	 ARMV6_PMCR_CCOUNT_OVERFLOW)
+
+static inline int
+armv6_pmcr_has_overflowed(unsigned long pmcr)
+{
+	return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
+}
+
+static inline int
+armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
+				  enum armv6_counters counter)
+{
+	int ret = 0;
+
+	if (ARMV6_CYCLE_COUNTER == counter)
+		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
+	else if (ARMV6_COUNTER0 == counter)
+		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
+	else if (ARMV6_COUNTER1 == counter)
+		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
+	else
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+	return ret;
+}
+
+static inline u32
+armv6pmu_read_counter(int counter)
+{
+	unsigned long value = 0;
+
+	if (ARMV6_CYCLE_COUNTER == counter)
+		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
+	else if (ARMV6_COUNTER0 == counter)
+		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
+	else if (ARMV6_COUNTER1 == counter)
+		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
+	else
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+	return value;
+}
+
+static inline void
+armv6pmu_write_counter(int counter,
+		       u32 value)
+{
+	if (ARMV6_CYCLE_COUNTER == counter)
+		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
+	else if (ARMV6_COUNTER0 == counter)
+		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
+	else if (ARMV6_COUNTER1 == counter)
+		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
+	else
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+}
+
+void
+armv6pmu_enable_event(struct hw_perf_event *hwc,
+		      int idx)
+{
+	unsigned long val, mask, evt, flags;
+
+	if (ARMV6_CYCLE_COUNTER == idx) {
+		mask	= 0;
+		evt	= ARMV6_PMCR_CCOUNT_IEN;
+	} else if (ARMV6_COUNTER0 == idx) {
+		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
+		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
+			  ARMV6_PMCR_COUNT0_IEN;
+	} else if (ARMV6_COUNTER1 == idx) {
+		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
+		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
+			  ARMV6_PMCR_COUNT1_IEN;
+	} else {
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	/*
+	 * Mask out the current event and set the counter to count the event
+	 * that we're interested in.
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~mask;
+	val |= evt;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static irqreturn_t
+armv6pmu_handle_irq(int irq_num,
+		    void *dev)
+{
+	unsigned long pmcr = armv6_pmcr_read();
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	if (!armv6_pmcr_has_overflowed(pmcr))
+		return IRQ_NONE;
+
+	regs = get_irq_regs();
+
+	/*
+	 * The interrupts are cleared by writing the overflow flags back to
+	 * the control register. All of the other bits don't have any effect
+	 * if they are rewritten, so write the whole value back.
+	 */
+	armv6_pmcr_write(pmcr);
+
+	perf_sample_data_init(&data, 0);
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		/*
+		 * We have a single interrupt for all counters. Check that
+		 * each counter has overflowed before we process it.
+		 */
+		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event, hwc, idx);
+		data.period = event->hw.last_period;
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 0, &data, regs))
+			armpmu->disable(hwc, idx);
+	}
+
+	/*
+	 * Handle the pending perf events.
+	 *
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
+	 * will not work.
+	 */
+	irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+static void
+armv6pmu_start(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val |= ARMV6_PMCR_ENABLE;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+armv6pmu_stop(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~ARMV6_PMCR_ENABLE;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static int
+armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
+		       struct hw_perf_event *event)
+{
+	/* Always place a cycle counter into the cycle counter. */
+	if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
+		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return ARMV6_CYCLE_COUNTER;
+	} else {
+		/*
+		 * For anything other than a cycle counter, try and use
+		 * counter0 and counter1.
+		 */
+		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
+			return ARMV6_COUNTER1;
+
+		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
+			return ARMV6_COUNTER0;
+
+		/* The counters are all in use. */
+		return -EAGAIN;
+	}
+}
+
+static void
+armv6pmu_disable_event(struct hw_perf_event *hwc,
+		       int idx)
+{
+	unsigned long val, mask, evt, flags;
+
+	if (ARMV6_CYCLE_COUNTER == idx) {
+		mask	= ARMV6_PMCR_CCOUNT_IEN;
+		evt	= 0;
+	} else if (ARMV6_COUNTER0 == idx) {
+		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
+		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
+	} else if (ARMV6_COUNTER1 == idx) {
+		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
+		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
+	} else {
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	/*
+	 * Mask out the current event and set the counter to count the number
+	 * of ETM bus signal assertion cycles. The external reporting should
+	 * be disabled and so this should never increment.
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~mask;
+	val |= evt;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
+			      int idx)
+{
+	unsigned long val, mask, flags, evt = 0;
+
+	if (ARMV6_CYCLE_COUNTER == idx) {
+		mask	= ARMV6_PMCR_CCOUNT_IEN;
+	} else if (ARMV6_COUNTER0 == idx) {
+		mask	= ARMV6_PMCR_COUNT0_IEN;
+	} else if (ARMV6_COUNTER1 == idx) {
+		mask	= ARMV6_PMCR_COUNT1_IEN;
+	} else {
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	/*
+	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
+	 * simply disable the interrupt reporting.
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~mask;
+	val |= evt;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static const struct arm_pmu armv6pmu = {
+	.id			= ARM_PERF_PMU_ID_V6,
+	.name			= "v6",
+	.handle_irq		= armv6pmu_handle_irq,
+	.enable			= armv6pmu_enable_event,
+	.disable		= armv6pmu_disable_event,
+	.read_counter		= armv6pmu_read_counter,
+	.write_counter		= armv6pmu_write_counter,
+	.get_event_idx		= armv6pmu_get_event_idx,
+	.start			= armv6pmu_start,
+	.stop			= armv6pmu_stop,
+	.cache_map		= &armv6_perf_cache_map,
+	.event_map		= &armv6_perf_map,
+	.raw_event_mask		= 0xFF,
+	.num_events		= 3,
+	.max_period		= (1LLU << 32) - 1,
+};
+
+const struct arm_pmu *__init armv6pmu_init(void)
+{
+	return &armv6pmu;
+}
+
+/*
+ * ARMv6mpcore is almost identical to single core ARMv6 with the exception
+ * that some of the events have different enumerations and that there is no
+ * *hack* to stop the programmable counters. To stop the counters we simply
+ * disable the interrupt reporting and update the event. When unthrottling we
+ * reset the period and enable the interrupt reporting.
+ */
+static const struct arm_pmu armv6mpcore_pmu = {
+	.id			= ARM_PERF_PMU_ID_V6MP,
+	.name			= "v6mpcore",
+	.handle_irq		= armv6pmu_handle_irq,
+	.enable			= armv6pmu_enable_event,
+	.disable		= armv6mpcore_pmu_disable_event,
+	.read_counter		= armv6pmu_read_counter,
+	.write_counter		= armv6pmu_write_counter,
+	.get_event_idx		= armv6pmu_get_event_idx,
+	.start			= armv6pmu_start,
+	.stop			= armv6pmu_stop,
+	.cache_map		= &armv6mpcore_perf_cache_map,
+	.event_map		= &armv6mpcore_perf_map,
+	.raw_event_mask		= 0xFF,
+	.num_events		= 3,
+	.max_period		= (1LLU << 32) - 1,
+};
+
+const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+{
+	return &armv6mpcore_pmu;
+}
+#else
+const struct arm_pmu *__init armv6pmu_init(void)
+{
+	return NULL;
+}
+
+const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+{
+	return NULL;
+}
+#endif	/* CONFIG_CPU_V6 */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
new file mode 100644
index 0000000..4d04239
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -0,0 +1,906 @@
+/*
+ * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
+ *
+ * ARMv7 support: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ *
+ * Copied from ARMv6 code, with the low level code inspired
+ *  by the ARMv7 Oprofile code.
+ *
+ * Cortex-A8 has up to 4 configurable performance counters and
+ *  a single cycle counter.
+ * Cortex-A9 has up to 31 configurable performance counters and
+ *  a single cycle counter.
+ *
+ * All counters can be enabled/disabled and IRQ masked separately. The cycle
+ *  counter and all 4 performance counters together can be reset separately.
+ */
+
+#ifdef CONFIG_CPU_V7
+/* Common ARMv7 event types */
+enum armv7_perf_types {
+	ARMV7_PERFCTR_PMNC_SW_INCR		= 0x00,
+	ARMV7_PERFCTR_IFETCH_MISS		= 0x01,
+	ARMV7_PERFCTR_ITLB_MISS			= 0x02,
+	ARMV7_PERFCTR_DCACHE_REFILL		= 0x03,
+	ARMV7_PERFCTR_DCACHE_ACCESS		= 0x04,
+	ARMV7_PERFCTR_DTLB_REFILL		= 0x05,
+	ARMV7_PERFCTR_DREAD			= 0x06,
+	ARMV7_PERFCTR_DWRITE			= 0x07,
+
+	ARMV7_PERFCTR_EXC_TAKEN			= 0x09,
+	ARMV7_PERFCTR_EXC_EXECUTED		= 0x0A,
+	ARMV7_PERFCTR_CID_WRITE			= 0x0B,
+	/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
+	 * It counts:
+	 *  - all branch instructions,
+	 *  - instructions that explicitly write the PC,
+	 *  - exception generating instructions.
+	 */
+	ARMV7_PERFCTR_PC_WRITE			= 0x0C,
+	ARMV7_PERFCTR_PC_IMM_BRANCH		= 0x0D,
+	ARMV7_PERFCTR_UNALIGNED_ACCESS		= 0x0F,
+	ARMV7_PERFCTR_PC_BRANCH_MIS_PRED	= 0x10,
+	ARMV7_PERFCTR_CLOCK_CYCLES		= 0x11,
+
+	ARMV7_PERFCTR_PC_BRANCH_MIS_USED	= 0x12,
+
+	ARMV7_PERFCTR_CPU_CYCLES		= 0xFF
+};
+
+/* ARMv7 Cortex-A8 specific event types */
+enum armv7_a8_perf_types {
+	ARMV7_PERFCTR_INSTR_EXECUTED		= 0x08,
+
+	ARMV7_PERFCTR_PC_PROC_RETURN		= 0x0E,
+
+	ARMV7_PERFCTR_WRITE_BUFFER_FULL		= 0x40,
+	ARMV7_PERFCTR_L2_STORE_MERGED		= 0x41,
+	ARMV7_PERFCTR_L2_STORE_BUFF		= 0x42,
+	ARMV7_PERFCTR_L2_ACCESS			= 0x43,
+	ARMV7_PERFCTR_L2_CACH_MISS		= 0x44,
+	ARMV7_PERFCTR_AXI_READ_CYCLES		= 0x45,
+	ARMV7_PERFCTR_AXI_WRITE_CYCLES		= 0x46,
+	ARMV7_PERFCTR_MEMORY_REPLAY		= 0x47,
+	ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY	= 0x48,
+	ARMV7_PERFCTR_L1_DATA_MISS		= 0x49,
+	ARMV7_PERFCTR_L1_INST_MISS		= 0x4A,
+	ARMV7_PERFCTR_L1_DATA_COLORING		= 0x4B,
+	ARMV7_PERFCTR_L1_NEON_DATA		= 0x4C,
+	ARMV7_PERFCTR_L1_NEON_CACH_DATA		= 0x4D,
+	ARMV7_PERFCTR_L2_NEON			= 0x4E,
+	ARMV7_PERFCTR_L2_NEON_HIT		= 0x4F,
+	ARMV7_PERFCTR_L1_INST			= 0x50,
+	ARMV7_PERFCTR_PC_RETURN_MIS_PRED	= 0x51,
+	ARMV7_PERFCTR_PC_BRANCH_FAILED		= 0x52,
+	ARMV7_PERFCTR_PC_BRANCH_TAKEN		= 0x53,
+	ARMV7_PERFCTR_PC_BRANCH_EXECUTED	= 0x54,
+	ARMV7_PERFCTR_OP_EXECUTED		= 0x55,
+	ARMV7_PERFCTR_CYCLES_INST_STALL		= 0x56,
+	ARMV7_PERFCTR_CYCLES_INST		= 0x57,
+	ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL	= 0x58,
+	ARMV7_PERFCTR_CYCLES_NEON_INST_STALL	= 0x59,
+	ARMV7_PERFCTR_NEON_CYCLES		= 0x5A,
+
+	ARMV7_PERFCTR_PMU0_EVENTS		= 0x70,
+	ARMV7_PERFCTR_PMU1_EVENTS		= 0x71,
+	ARMV7_PERFCTR_PMU_EVENTS		= 0x72,
+};
+
+/* ARMv7 Cortex-A9 specific event types */
+enum armv7_a9_perf_types {
+	ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC	= 0x40,
+	ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC	= 0x41,
+	ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC	= 0x42,
+
+	ARMV7_PERFCTR_COHERENT_LINE_MISS	= 0x50,
+	ARMV7_PERFCTR_COHERENT_LINE_HIT		= 0x51,
+
+	ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES	= 0x60,
+	ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES	= 0x61,
+	ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES	= 0x62,
+	ARMV7_PERFCTR_STREX_EXECUTED_PASSED	= 0x63,
+	ARMV7_PERFCTR_STREX_EXECUTED_FAILED	= 0x64,
+	ARMV7_PERFCTR_DATA_EVICTION		= 0x65,
+	ARMV7_PERFCTR_ISSUE_STAGE_NO_INST	= 0x66,
+	ARMV7_PERFCTR_ISSUE_STAGE_EMPTY		= 0x67,
+	ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE	= 0x68,
+
+	ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS	= 0x6E,
+
+	ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST	= 0x70,
+	ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST	= 0x71,
+	ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST	= 0x72,
+	ARMV7_PERFCTR_FP_EXECUTED_INST		= 0x73,
+	ARMV7_PERFCTR_NEON_EXECUTED_INST	= 0x74,
+
+	ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES	= 0x80,
+	ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES	= 0x81,
+	ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES	= 0x82,
+	ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES	= 0x83,
+	ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES	= 0x84,
+	ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES	= 0x85,
+	ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES	= 0x86,
+
+	ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES	= 0x8A,
+	ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES	= 0x8B,
+
+	ARMV7_PERFCTR_ISB_INST			= 0x90,
+	ARMV7_PERFCTR_DSB_INST			= 0x91,
+	ARMV7_PERFCTR_DMB_INST			= 0x92,
+	ARMV7_PERFCTR_EXT_INTERRUPTS		= 0x93,
+
+	ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED	= 0xA0,
+	ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED	= 0xA1,
+	ARMV7_PERFCTR_PLE_FIFO_FLUSH		= 0xA2,
+	ARMV7_PERFCTR_PLE_RQST_COMPLETED	= 0xA3,
+	ARMV7_PERFCTR_PLE_FIFO_OVERFLOW		= 0xA4,
+	ARMV7_PERFCTR_PLE_RQST_PROG		= 0xA5
+};
+
+/*
+ * Cortex-A8 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		/*
+		 * The performance counters don't differentiate between read
+		 * and write accesses/misses so this isn't strictly correct,
+		 * but it's the best we can do. Writes and reads get
+		 * combined.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_INST,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_INST_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_INST,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_INST_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACH_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACH_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		/*
+		 * Only ITLB misses and DTLB refills are supported.
+		 * If users want the DTLB refills misses a raw counter
+		 * must be used.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+/*
+ * Cortex-A9 HW events mapping
+ */
+static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    =
+					ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = ARMV7_PERFCTR_COHERENT_LINE_MISS,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		/*
+		 * The performance counters don't differentiate between read
+		 * and write accesses/misses so this isn't strictly correct,
+		 * but it's the best we can do. Writes and reads get
+		 * combined.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		/*
+		 * Only ITLB misses and DTLB refills are supported.
+		 * If users want the DTLB refills misses a raw counter
+		 * must be used.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+/*
+ * Perf Events counters
+ */
+enum armv7_counters {
+	ARMV7_CYCLE_COUNTER		= 1,	/* Cycle counter */
+	ARMV7_COUNTER0			= 2,	/* First event counter */
+};
+
+/*
+ * The cycle counter is ARMV7_CYCLE_COUNTER.
+ * The first event counter is ARMV7_COUNTER0.
+ * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
+ */
+#define	ARMV7_COUNTER_LAST	(ARMV7_COUNTER0 + armpmu->num_events - 1)
+
+/*
+ * ARMv7 low level PMNC access
+ */
+
+/*
+ * Per-CPU PMNC: config reg
+ */
+#define ARMV7_PMNC_E		(1 << 0) /* Enable all counters */
+#define ARMV7_PMNC_P		(1 << 1) /* Reset all counters */
+#define ARMV7_PMNC_C		(1 << 2) /* Cycle counter reset */
+#define ARMV7_PMNC_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV7_PMNC_X		(1 << 4) /* Export to ETM */
+#define ARMV7_PMNC_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
+#define	ARMV7_PMNC_N_SHIFT	11	 /* Number of counters supported */
+#define	ARMV7_PMNC_N_MASK	0x1f
+#define	ARMV7_PMNC_MASK		0x3f	 /* Mask for writable bits */
+
+/*
+ * Available counters
+ */
+#define ARMV7_CNT0		0	/* First event counter */
+#define ARMV7_CCNT		31	/* Cycle counter */
+
+/* Perf Event to low level counters mapping */
+#define ARMV7_EVENT_CNT_TO_CNTx	(ARMV7_COUNTER0 - ARMV7_CNT0)
+
+/*
+ * CNTENS: counters enable reg
+ */
+#define ARMV7_CNTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_CNTENS_C		(1 << ARMV7_CCNT)
+
+/*
+ * CNTENC: counters disable reg
+ */
+#define ARMV7_CNTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_CNTENC_C		(1 << ARMV7_CCNT)
+
+/*
+ * INTENS: counters overflow interrupt enable reg
+ */
+#define ARMV7_INTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_INTENS_C		(1 << ARMV7_CCNT)
+
+/*
+ * INTENC: counters overflow interrupt disable reg
+ */
+#define ARMV7_INTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_INTENC_C		(1 << ARMV7_CCNT)
+
+/*
+ * EVTSEL: Event selection reg
+ */
+#define	ARMV7_EVTSEL_MASK	0xff		/* Mask for writable bits */
+
+/*
+ * SELECT: Counter selection reg
+ */
+#define	ARMV7_SELECT_MASK	0x1f		/* Mask for writable bits */
+
+/*
+ * FLAG: counters overflow flag status reg
+ */
+#define ARMV7_FLAG_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_FLAG_C		(1 << ARMV7_CCNT)
+#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
+#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
+
+static inline unsigned long armv7_pmnc_read(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
+	return val;
+}
+
+static inline void armv7_pmnc_write(unsigned long val)
+{
+	val &= ARMV7_PMNC_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+}
+
+static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
+{
+	return pmnc & ARMV7_OVERFLOWED_MASK;
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
+					enum armv7_counters counter)
+{
+	int ret = 0;
+
+	if (counter == ARMV7_CYCLE_COUNTER)
+		ret = pmnc & ARMV7_FLAG_C;
+	else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
+		ret = pmnc & ARMV7_FLAG_P(counter);
+	else
+		pr_err("CPU%u checking wrong counter %d overflow status\n",
+			smp_processor_id(), counter);
+
+	return ret;
+}
+
+static inline int armv7_pmnc_select_counter(unsigned int idx)
+{
+	u32 val;
+
+	if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
+		pr_err("CPU%u selecting wrong PMNC counter"
+			" %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7pmu_read_counter(int idx)
+{
+	unsigned long value = 0;
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
+		if (armv7_pmnc_select_counter(idx) == idx)
+			asm volatile("mrc p15, 0, %0, c9, c13, 2"
+				     : "=r" (value));
+	} else
+		pr_err("CPU%u reading wrong counter %d\n",
+			smp_processor_id(), idx);
+
+	return value;
+}
+
+static inline void armv7pmu_write_counter(int idx, u32 value)
+{
+	if (idx == ARMV7_CYCLE_COUNTER)
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
+		if (armv7_pmnc_select_counter(idx) == idx)
+			asm volatile("mcr p15, 0, %0, c9, c13, 2"
+				     : : "r" (value));
+	} else
+		pr_err("CPU%u writing wrong counter %d\n",
+			smp_processor_id(), idx);
+}
+
+static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
+{
+	if (armv7_pmnc_select_counter(idx) == idx) {
+		val &= ARMV7_EVTSEL_MASK;
+		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+	}
+}
+
+static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
+{
+	u32 val;
+
+	if ((idx != ARMV7_CYCLE_COUNTER) &&
+	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
+		pr_err("CPU%u enabling wrong PMNC counter"
+			" %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		val = ARMV7_CNTENS_C;
+	else
+		val = ARMV7_CNTENS_P(idx);
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
+{
+	u32 val;
+
+
+	if ((idx != ARMV7_CYCLE_COUNTER) &&
+	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
+		pr_err("CPU%u disabling wrong PMNC counter"
+			" %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		val = ARMV7_CNTENC_C;
+	else
+		val = ARMV7_CNTENC_P(idx);
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
+{
+	u32 val;
+
+	if ((idx != ARMV7_CYCLE_COUNTER) &&
+	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
+		pr_err("CPU%u enabling wrong PMNC counter"
+			" interrupt enable %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		val = ARMV7_INTENS_C;
+	else
+		val = ARMV7_INTENS_P(idx);
+
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
+{
+	u32 val;
+
+	if ((idx != ARMV7_CYCLE_COUNTER) &&
+	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
+		pr_err("CPU%u disabling wrong PMNC counter"
+			" interrupt enable %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		val = ARMV7_INTENC_C;
+	else
+		val = ARMV7_INTENC_P(idx);
+
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7_pmnc_getreset_flags(void)
+{
+	u32 val;
+
+	/* Read */
+	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+
+	/* Write to clear flags */
+	val &= ARMV7_FLAG_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+
+	return val;
+}
+
+#ifdef DEBUG
+static void armv7_pmnc_dump_regs(void)
+{
+	u32 val;
+	unsigned int cnt;
+
+	printk(KERN_INFO "PMNC registers dump:\n");
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+	printk(KERN_INFO "PMNC  =0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
+	printk(KERN_INFO "CNTENS=0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
+	printk(KERN_INFO "INTENS=0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+	printk(KERN_INFO "FLAGS =0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
+	printk(KERN_INFO "SELECT=0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
+	printk(KERN_INFO "CCNT  =0x%08x\n", val);
+
+	for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
+		armv7_pmnc_select_counter(cnt);
+		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
+			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
+		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
+			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+	}
+}
+#endif
+
+void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags;
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters)
+	 * We don't need to set the event if it's a cycle count
+	 */
+	if (idx != ARMV7_CYCLE_COUNTER)
+		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+	/*
+	 * Enable interrupt for this counter
+	 */
+	armv7_pmnc_enable_intens(idx);
+
+	/*
+	 * Enable counter
+	 */
+	armv7_pmnc_enable_counter(idx);
+
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags;
+
+	/*
+	 * Disable counter and interrupt
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Disable interrupt for this counter
+	 */
+	armv7_pmnc_disable_intens(idx);
+
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long pmnc;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	/*
+	 * Get and reset the IRQ flags
+	 */
+	pmnc = armv7_pmnc_getreset_flags();
+
+	/*
+	 * Did an overflow occur?
+	 */
+	if (!armv7_pmnc_has_overflowed(pmnc))
+		return IRQ_NONE;
+
+	/*
+	 * Handle the counter(s) overflow(s)
+	 */
+	regs = get_irq_regs();
+
+	perf_sample_data_init(&data, 0);
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		/*
+		 * We have a single interrupt for all counters. Check that
+		 * each counter has overflowed before we process it.
+		 */
+		if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event, hwc, idx);
+		data.period = event->hw.last_period;
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 0, &data, regs))
+			armpmu->disable(hwc, idx);
+	}
+
+	/*
+	 * Handle the pending perf events.
+	 *
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
+	 * will not work.
+	 */
+	irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+static void armv7pmu_start(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	/* Enable all counters */
+	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void armv7pmu_stop(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	/* Disable all counters */
+	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
+				  struct hw_perf_event *event)
+{
+	int idx;
+
+	/* Always place a cycle counter into the cycle counter. */
+	if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
+		if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return ARMV7_CYCLE_COUNTER;
+	} else {
+		/*
+		 * For anything other than a cycle counter, try and use
+		 * the events counters
+		 */
+		for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
+			if (!test_and_set_bit(idx, cpuc->used_mask))
+				return idx;
+		}
+
+		/* The counters are all in use. */
+		return -EAGAIN;
+	}
+}
+
+static struct arm_pmu armv7pmu = {
+	.handle_irq		= armv7pmu_handle_irq,
+	.enable			= armv7pmu_enable_event,
+	.disable		= armv7pmu_disable_event,
+	.read_counter		= armv7pmu_read_counter,
+	.write_counter		= armv7pmu_write_counter,
+	.get_event_idx		= armv7pmu_get_event_idx,
+	.start			= armv7pmu_start,
+	.stop			= armv7pmu_stop,
+	.raw_event_mask		= 0xFF,
+	.max_period		= (1LLU << 32) - 1,
+};
+
+static u32 __init armv7_reset_read_pmnc(void)
+{
+	u32 nb_cnt;
+
+	/* Initialize & Reset PMNC: C and P bits */
+	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
+
+	/* Read the nb of CNTx counters supported from PMNC */
+	nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+
+	/* Add the CPU cycles counter and return */
+	return nb_cnt + 1;
+}
+
+const struct arm_pmu *__init armv7_a8_pmu_init(void)
+{
+	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
+	armv7pmu.name		= "ARMv7 Cortex-A8";
+	armv7pmu.cache_map	= &armv7_a8_perf_cache_map;
+	armv7pmu.event_map	= &armv7_a8_perf_map;
+	armv7pmu.num_events	= armv7_reset_read_pmnc();
+	return &armv7pmu;
+}
+
+const struct arm_pmu *__init armv7_a9_pmu_init(void)
+{
+	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
+	armv7pmu.name		= "ARMv7 Cortex-A9";
+	armv7pmu.cache_map	= &armv7_a9_perf_cache_map;
+	armv7pmu.event_map	= &armv7_a9_perf_map;
+	armv7pmu.num_events	= armv7_reset_read_pmnc();
+	return &armv7pmu;
+}
+#else
+const struct arm_pmu *__init armv7_a8_pmu_init(void)
+{
+	return NULL;
+}
+
+const struct arm_pmu *__init armv7_a9_pmu_init(void)
+{
+	return NULL;
+}
+#endif	/* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
new file mode 100644
index 0000000..4e95927
--- /dev/null
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -0,0 +1,807 @@
+/*
+ * ARMv5 [xscale] Performance counter handling code.
+ *
+ * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * Based on the previous xscale OProfile code.
+ *
+ * There are two variants of the xscale PMU that we support:
+ * 	- xscale1pmu: 2 event counters and a cycle counter
+ * 	- xscale2pmu: 4 event counters and a cycle counter
+ * The two variants share event definitions, but have different
+ * PMU structures.
+ */
+
+#ifdef CONFIG_CPU_XSCALE
+enum xscale_perf_types {
+	XSCALE_PERFCTR_ICACHE_MISS		= 0x00,
+	XSCALE_PERFCTR_ICACHE_NO_DELIVER	= 0x01,
+	XSCALE_PERFCTR_DATA_STALL		= 0x02,
+	XSCALE_PERFCTR_ITLB_MISS		= 0x03,
+	XSCALE_PERFCTR_DTLB_MISS		= 0x04,
+	XSCALE_PERFCTR_BRANCH			= 0x05,
+	XSCALE_PERFCTR_BRANCH_MISS		= 0x06,
+	XSCALE_PERFCTR_INSTRUCTION		= 0x07,
+	XSCALE_PERFCTR_DCACHE_FULL_STALL	= 0x08,
+	XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG	= 0x09,
+	XSCALE_PERFCTR_DCACHE_ACCESS		= 0x0A,
+	XSCALE_PERFCTR_DCACHE_MISS		= 0x0B,
+	XSCALE_PERFCTR_DCACHE_WRITE_BACK	= 0x0C,
+	XSCALE_PERFCTR_PC_CHANGED		= 0x0D,
+	XSCALE_PERFCTR_BCU_REQUEST		= 0x10,
+	XSCALE_PERFCTR_BCU_FULL			= 0x11,
+	XSCALE_PERFCTR_BCU_DRAIN		= 0x12,
+	XSCALE_PERFCTR_BCU_ECC_NO_ELOG		= 0x14,
+	XSCALE_PERFCTR_BCU_1_BIT_ERR		= 0x15,
+	XSCALE_PERFCTR_RMW			= 0x16,
+	/* XSCALE_PERFCTR_CCNT is not hardware defined */
+	XSCALE_PERFCTR_CCNT			= 0xFE,
+	XSCALE_PERFCTR_UNUSED			= 0xFF,
+};
+
+enum xscale_counters {
+	XSCALE_CYCLE_COUNTER	= 1,
+	XSCALE_COUNTER0,
+	XSCALE_COUNTER1,
+	XSCALE_COUNTER2,
+	XSCALE_COUNTER3,
+};
+
+static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = XSCALE_PERFCTR_CCNT,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = XSCALE_PERFCTR_INSTRUCTION,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = XSCALE_PERFCTR_BRANCH_MISS,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
+};
+
+static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					   [PERF_COUNT_HW_CACHE_OP_MAX]
+					   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+#define	XSCALE_PMU_ENABLE	0x001
+#define XSCALE_PMN_RESET	0x002
+#define	XSCALE_CCNT_RESET	0x004
+#define	XSCALE_PMU_RESET	(CCNT_RESET | PMN_RESET)
+#define XSCALE_PMU_CNT64	0x008
+
+#define XSCALE1_OVERFLOWED_MASK	0x700
+#define XSCALE1_CCOUNT_OVERFLOW	0x400
+#define XSCALE1_COUNT0_OVERFLOW	0x100
+#define XSCALE1_COUNT1_OVERFLOW	0x200
+#define XSCALE1_CCOUNT_INT_EN	0x040
+#define XSCALE1_COUNT0_INT_EN	0x010
+#define XSCALE1_COUNT1_INT_EN	0x020
+#define XSCALE1_COUNT0_EVT_SHFT	12
+#define XSCALE1_COUNT0_EVT_MASK	(0xff << XSCALE1_COUNT0_EVT_SHFT)
+#define XSCALE1_COUNT1_EVT_SHFT	20
+#define XSCALE1_COUNT1_EVT_MASK	(0xff << XSCALE1_COUNT1_EVT_SHFT)
+
+static inline u32
+xscale1pmu_read_pmnc(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
+	return val;
+}
+
+static inline void
+xscale1pmu_write_pmnc(u32 val)
+{
+	/* upper 4bits and 7, 11 are write-as-0 */
+	val &= 0xffff77f;
+	asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
+}
+
+static inline int
+xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
+					enum xscale_counters counter)
+{
+	int ret = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
+		break;
+	case XSCALE_COUNTER0:
+		ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
+		break;
+	case XSCALE_COUNTER1:
+		ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+	}
+
+	return ret;
+}
+
+static irqreturn_t
+xscale1pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long pmnc;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	/*
+	 * NOTE: there's an A stepping erratum that states if an overflow
+	 *       bit already exists and another occurs, the previous
+	 *       Overflow bit gets cleared. There's no workaround.
+	 *	 Fixed in B stepping or later.
+	 */
+	pmnc = xscale1pmu_read_pmnc();
+
+	/*
+	 * Write the value back to clear the overflow flags. Overflow
+	 * flags remain in pmnc for use below. We also disable the PMU
+	 * while we process the interrupt.
+	 */
+	xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+	if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
+		return IRQ_NONE;
+
+	regs = get_irq_regs();
+
+	perf_sample_data_init(&data, 0);
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event, hwc, idx);
+		data.period = event->hw.last_period;
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 0, &data, regs))
+			armpmu->disable(hwc, idx);
+	}
+
+	irq_work_run();
+
+	/*
+	 * Re-enable the PMU.
+	 */
+	pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+	xscale1pmu_write_pmnc(pmnc);
+
+	return IRQ_HANDLED;
+}
+
+static void
+xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long val, mask, evt, flags;
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		mask = 0;
+		evt = XSCALE1_CCOUNT_INT_EN;
+		break;
+	case XSCALE_COUNTER0:
+		mask = XSCALE1_COUNT0_EVT_MASK;
+		evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
+			XSCALE1_COUNT0_INT_EN;
+		break;
+	case XSCALE_COUNTER1:
+		mask = XSCALE1_COUNT1_EVT_MASK;
+		evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
+			XSCALE1_COUNT1_INT_EN;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val &= ~mask;
+	val |= evt;
+	xscale1pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long val, mask, evt, flags;
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		mask = XSCALE1_CCOUNT_INT_EN;
+		evt = 0;
+		break;
+	case XSCALE_COUNTER0:
+		mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
+		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER1:
+		mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
+		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val &= ~mask;
+	val |= evt;
+	xscale1pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static int
+xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
+			struct hw_perf_event *event)
+{
+	if (XSCALE_PERFCTR_CCNT == event->config_base) {
+		if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return XSCALE_CYCLE_COUNTER;
+	} else {
+		if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
+			return XSCALE_COUNTER1;
+
+		if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
+			return XSCALE_COUNTER0;
+
+		return -EAGAIN;
+	}
+}
+
+static void
+xscale1pmu_start(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val |= XSCALE_PMU_ENABLE;
+	xscale1pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+xscale1pmu_stop(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val &= ~XSCALE_PMU_ENABLE;
+	xscale1pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static inline u32
+xscale1pmu_read_counter(int counter)
+{
+	u32 val = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
+		break;
+	}
+
+	return val;
+}
+
+static inline void
+xscale1pmu_write_counter(int counter, u32 val)
+{
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
+		break;
+	}
+}
+
+static const struct arm_pmu xscale1pmu = {
+	.id		= ARM_PERF_PMU_ID_XSCALE1,
+	.name		= "xscale1",
+	.handle_irq	= xscale1pmu_handle_irq,
+	.enable		= xscale1pmu_enable_event,
+	.disable	= xscale1pmu_disable_event,
+	.read_counter	= xscale1pmu_read_counter,
+	.write_counter	= xscale1pmu_write_counter,
+	.get_event_idx	= xscale1pmu_get_event_idx,
+	.start		= xscale1pmu_start,
+	.stop		= xscale1pmu_stop,
+	.cache_map	= &xscale_perf_cache_map,
+	.event_map	= &xscale_perf_map,
+	.raw_event_mask	= 0xFF,
+	.num_events	= 3,
+	.max_period	= (1LLU << 32) - 1,
+};
+
+const struct arm_pmu *__init xscale1pmu_init(void)
+{
+	return &xscale1pmu;
+}
+
+#define XSCALE2_OVERFLOWED_MASK	0x01f
+#define XSCALE2_CCOUNT_OVERFLOW	0x001
+#define XSCALE2_COUNT0_OVERFLOW	0x002
+#define XSCALE2_COUNT1_OVERFLOW	0x004
+#define XSCALE2_COUNT2_OVERFLOW	0x008
+#define XSCALE2_COUNT3_OVERFLOW	0x010
+#define XSCALE2_CCOUNT_INT_EN	0x001
+#define XSCALE2_COUNT0_INT_EN	0x002
+#define XSCALE2_COUNT1_INT_EN	0x004
+#define XSCALE2_COUNT2_INT_EN	0x008
+#define XSCALE2_COUNT3_INT_EN	0x010
+#define XSCALE2_COUNT0_EVT_SHFT	0
+#define XSCALE2_COUNT0_EVT_MASK	(0xff << XSCALE2_COUNT0_EVT_SHFT)
+#define XSCALE2_COUNT1_EVT_SHFT	8
+#define XSCALE2_COUNT1_EVT_MASK	(0xff << XSCALE2_COUNT1_EVT_SHFT)
+#define XSCALE2_COUNT2_EVT_SHFT	16
+#define XSCALE2_COUNT2_EVT_MASK	(0xff << XSCALE2_COUNT2_EVT_SHFT)
+#define XSCALE2_COUNT3_EVT_SHFT	24
+#define XSCALE2_COUNT3_EVT_MASK	(0xff << XSCALE2_COUNT3_EVT_SHFT)
+
+static inline u32
+xscale2pmu_read_pmnc(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
+	/* bits 1-2 and 4-23 are read-unpredictable */
+	return val & 0xff000009;
+}
+
+static inline void
+xscale2pmu_write_pmnc(u32 val)
+{
+	/* bits 4-23 are write-as-0, 24-31 are write ignored */
+	val &= 0xf;
+	asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_overflow_flags(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
+	return val;
+}
+
+static inline void
+xscale2pmu_write_overflow_flags(u32 val)
+{
+	asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_event_select(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
+	return val;
+}
+
+static inline void
+xscale2pmu_write_event_select(u32 val)
+{
+	asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
+}
+
+static inline u32
+xscale2pmu_read_int_enable(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
+	return val;
+}
+
+static void
+xscale2pmu_write_int_enable(u32 val)
+{
+	asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
+}
+
+static inline int
+xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
+					enum xscale_counters counter)
+{
+	int ret = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
+		break;
+	case XSCALE_COUNTER0:
+		ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
+		break;
+	case XSCALE_COUNTER1:
+		ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
+		break;
+	case XSCALE_COUNTER2:
+		ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
+		break;
+	case XSCALE_COUNTER3:
+		ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+	}
+
+	return ret;
+}
+
+static irqreturn_t
+xscale2pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long pmnc, of_flags;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	/* Disable the PMU. */
+	pmnc = xscale2pmu_read_pmnc();
+	xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+	/* Check the overflow flag register. */
+	of_flags = xscale2pmu_read_overflow_flags();
+	if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
+		return IRQ_NONE;
+
+	/* Clear the overflow bits. */
+	xscale2pmu_write_overflow_flags(of_flags);
+
+	regs = get_irq_regs();
+
+	perf_sample_data_init(&data, 0);
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event, hwc, idx);
+		data.period = event->hw.last_period;
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 0, &data, regs))
+			armpmu->disable(hwc, idx);
+	}
+
+	irq_work_run();
+
+	/*
+	 * Re-enable the PMU.
+	 */
+	pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+	xscale2pmu_write_pmnc(pmnc);
+
+	return IRQ_HANDLED;
+}
+
+static void
+xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags, ien, evtsel;
+
+	ien = xscale2pmu_read_int_enable();
+	evtsel = xscale2pmu_read_event_select();
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		ien |= XSCALE2_CCOUNT_INT_EN;
+		break;
+	case XSCALE_COUNTER0:
+		ien |= XSCALE2_COUNT0_INT_EN;
+		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER1:
+		ien |= XSCALE2_COUNT1_INT_EN;
+		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER2:
+		ien |= XSCALE2_COUNT2_INT_EN;
+		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER3:
+		ien |= XSCALE2_COUNT3_INT_EN;
+		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	xscale2pmu_write_event_select(evtsel);
+	xscale2pmu_write_int_enable(ien);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags, ien, evtsel;
+
+	ien = xscale2pmu_read_int_enable();
+	evtsel = xscale2pmu_read_event_select();
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		ien &= ~XSCALE2_CCOUNT_INT_EN;
+		break;
+	case XSCALE_COUNTER0:
+		ien &= ~XSCALE2_COUNT0_INT_EN;
+		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER1:
+		ien &= ~XSCALE2_COUNT1_INT_EN;
+		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER2:
+		ien &= ~XSCALE2_COUNT2_INT_EN;
+		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER3:
+		ien &= ~XSCALE2_COUNT3_INT_EN;
+		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	xscale2pmu_write_event_select(evtsel);
+	xscale2pmu_write_int_enable(ien);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static int
+xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
+			struct hw_perf_event *event)
+{
+	int idx = xscale1pmu_get_event_idx(cpuc, event);
+	if (idx >= 0)
+		goto out;
+
+	if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
+		idx = XSCALE_COUNTER3;
+	else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
+		idx = XSCALE_COUNTER2;
+out:
+	return idx;
+}
+
+static void
+xscale2pmu_start(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
+	val |= XSCALE_PMU_ENABLE;
+	xscale2pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+xscale2pmu_stop(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale2pmu_read_pmnc();
+	val &= ~XSCALE_PMU_ENABLE;
+	xscale2pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static inline u32
+xscale2pmu_read_counter(int counter)
+{
+	u32 val = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER2:
+		asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER3:
+		asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
+		break;
+	}
+
+	return val;
+}
+
+static inline void
+xscale2pmu_write_counter(int counter, u32 val)
+{
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER2:
+		asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER3:
+		asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
+		break;
+	}
+}
+
+static const struct arm_pmu xscale2pmu = {
+	.id		= ARM_PERF_PMU_ID_XSCALE2,
+	.name		= "xscale2",
+	.handle_irq	= xscale2pmu_handle_irq,
+	.enable		= xscale2pmu_enable_event,
+	.disable	= xscale2pmu_disable_event,
+	.read_counter	= xscale2pmu_read_counter,
+	.write_counter	= xscale2pmu_write_counter,
+	.get_event_idx	= xscale2pmu_get_event_idx,
+	.start		= xscale2pmu_start,
+	.stop		= xscale2pmu_stop,
+	.cache_map	= &xscale_perf_cache_map,
+	.event_map	= &xscale_perf_map,
+	.raw_event_mask	= 0xFF,
+	.num_events	= 5,
+	.max_period	= (1LLU << 32) - 1,
+};
+
+const struct arm_pmu *__init xscale2pmu_init(void)
+{
+	return &xscale2pmu;
+}
+#else
+const struct arm_pmu *__init xscale1pmu_init(void)
+{
+	return NULL;
+}
+
+const struct arm_pmu *__init xscale2pmu_init(void)
+{
+	return NULL;
+}
+#endif	/* CONFIG_CPU_XSCALE */
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 1e4cbd4..64f6bc1 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -174,8 +174,8 @@
  */
 .L_found:
 #if __LINUX_ARM_ARCH__ >= 5
-		rsb	r1, r3, #0
-		and	r3, r3, r1
+		rsb	r0, r3, #0
+		and	r3, r3, r0
 		clz	r3, r3
 		rsb	r3, r3, #31
 		add	r0, r2, r3
@@ -190,5 +190,7 @@
 		addeq	r2, r2, #1
 		mov	r0, r2
 #endif
+		cmp	r1, r0			@ Clamp to maxbit
+		movlo	r0, r1
 		mov	pc, lr
 
diff --git a/arch/arm/mach-aaec2000/include/mach/vmalloc.h b/arch/arm/mach-aaec2000/include/mach/vmalloc.h
index cff4e0a..a6299e8 100644
--- a/arch/arm/mach-aaec2000/include/mach/vmalloc.h
+++ b/arch/arm/mach-aaec2000/include/mach/vmalloc.h
@@ -11,6 +11,6 @@
 #ifndef __ASM_ARCH_VMALLOC_H
 #define __ASM_ARCH_VMALLOC_H
 
-#define VMALLOC_END		0xd0000000
+#define VMALLOC_END		0xd0000000UL
 
 #endif /* __ASM_ARCH_VMALLOC_H */
diff --git a/arch/arm/mach-bcmring/include/mach/vmalloc.h b/arch/arm/mach-bcmring/include/mach/vmalloc.h
index 3db3a09..7397bd7 100644
--- a/arch/arm/mach-bcmring/include/mach/vmalloc.h
+++ b/arch/arm/mach-bcmring/include/mach/vmalloc.h
@@ -22,4 +22,4 @@
  * 0xe0000000 to 0xefffffff. This gives us 256 MB of vm space and handles
  * larger physical memory designs better.
  */
-#define VMALLOC_END       0xf0000000
+#define VMALLOC_END       0xf0000000UL
diff --git a/arch/arm/mach-clps711x/include/mach/vmalloc.h b/arch/arm/mach-clps711x/include/mach/vmalloc.h
index 30b3a287..467b961 100644
--- a/arch/arm/mach-clps711x/include/mach/vmalloc.h
+++ b/arch/arm/mach-clps711x/include/mach/vmalloc.h
@@ -17,4 +17,4 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#define VMALLOC_END       0xd0000000
+#define VMALLOC_END       0xd0000000UL
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 9be261b..2652af1 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -359,8 +359,8 @@
 	CLK(NULL, "uart1", &uart1_clk),
 	CLK(NULL, "uart2", &uart2_clk),
 	CLK("i2c_davinci.1", NULL, &i2c_clk),
-	CLK("davinci-asp.0", NULL, &asp0_clk),
-	CLK("davinci-asp.1", NULL, &asp1_clk),
+	CLK("davinci-mcbsp.0", NULL, &asp0_clk),
+	CLK("davinci-mcbsp.1", NULL, &asp1_clk),
 	CLK("davinci_mmc.0", NULL, &mmcsd0_clk),
 	CLK("davinci_mmc.1", NULL, &mmcsd1_clk),
 	CLK("spi_davinci.0", NULL, &spi0_clk),
@@ -664,7 +664,7 @@
 };
 
 static struct platform_device dm355_asp1_device = {
-	.name		= "davinci-asp",
+	.name		= "davinci-mcbsp",
 	.id		= 1,
 	.num_resources	= ARRAY_SIZE(dm355_asp1_resources),
 	.resource	= dm355_asp1_resources,
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index a12065e..c466d71 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -459,7 +459,7 @@
 	CLK(NULL, "usb", &usb_clk),
 	CLK("davinci_emac.1", NULL, &emac_clk),
 	CLK("davinci_voicecodec", NULL, &voicecodec_clk),
-	CLK("davinci-asp.0", NULL, &asp0_clk),
+	CLK("davinci-mcbsp", NULL, &asp0_clk),
 	CLK(NULL, "rto", &rto_clk),
 	CLK(NULL, "mjcp", &mjcp_clk),
 	CLK(NULL, NULL, NULL),
@@ -922,8 +922,8 @@
 };
 
 static struct platform_device dm365_asp_device = {
-	.name		= "davinci-asp",
-	.id		= 0,
+	.name		= "davinci-mcbsp",
+	.id		= -1,
 	.num_resources	= ARRAY_SIZE(dm365_asp_resources),
 	.resource	= dm365_asp_resources,
 };
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 0608dd7..9a2376b 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -302,7 +302,7 @@
 	CLK("davinci_emac.1", NULL, &emac_clk),
 	CLK("i2c_davinci.1", NULL, &i2c_clk),
 	CLK("palm_bk3710", NULL, &ide_clk),
-	CLK("davinci-asp", NULL, &asp_clk),
+	CLK("davinci-mcbsp", NULL, &asp_clk),
 	CLK("davinci_mmc.0", NULL, &mmcsd_clk),
 	CLK(NULL, "spi", &spi_clk),
 	CLK(NULL, "gpio", &gpio_clk),
@@ -580,7 +580,7 @@
 };
 
 static struct platform_device dm644x_asp_device = {
-	.name		= "davinci-asp",
+	.name		= "davinci-mcbsp",
 	.id		= -1,
 	.num_resources	= ARRAY_SIZE(dm644x_asp_resources),
 	.resource	= dm644x_asp_resources,
diff --git a/arch/arm/mach-ebsa110/include/mach/vmalloc.h b/arch/arm/mach-ebsa110/include/mach/vmalloc.h
index 60bde56..ea141b7a 100644
--- a/arch/arm/mach-ebsa110/include/mach/vmalloc.h
+++ b/arch/arm/mach-ebsa110/include/mach/vmalloc.h
@@ -7,4 +7,4 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#define VMALLOC_END       0xdf000000
+#define VMALLOC_END       0xdf000000UL
diff --git a/arch/arm/mach-footbridge/include/mach/vmalloc.h b/arch/arm/mach-footbridge/include/mach/vmalloc.h
index 0ffbb7c..40ba78e 100644
--- a/arch/arm/mach-footbridge/include/mach/vmalloc.h
+++ b/arch/arm/mach-footbridge/include/mach/vmalloc.h
@@ -7,4 +7,4 @@
  */
 
 
-#define VMALLOC_END       0xf0000000
+#define VMALLOC_END       0xf0000000UL
diff --git a/arch/arm/mach-h720x/include/mach/vmalloc.h b/arch/arm/mach-h720x/include/mach/vmalloc.h
index a45915b..8520b4a 100644
--- a/arch/arm/mach-h720x/include/mach/vmalloc.h
+++ b/arch/arm/mach-h720x/include/mach/vmalloc.h
@@ -5,6 +5,6 @@
 #ifndef __ARCH_ARM_VMALLOC_H
 #define __ARCH_ARM_VMALLOC_H
 
-#define VMALLOC_END       0xd0000000
+#define VMALLOC_END       0xd0000000UL
 
 #endif
diff --git a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c
index 026263c..7e1e9dc 100644
--- a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c
+++ b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c
@@ -250,9 +250,6 @@
 	.flags = IMXUART_HAVE_RTSCTS,
 };
 
-#if defined(CONFIG_TOUCHSCREEN_ADS7846) \
-	|| defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE)
-
 #define ADS7846_PENDOWN (GPIO_PORTD | 25)
 
 static void ads7846_dev_init(void)
@@ -273,9 +270,7 @@
 	.get_pendown_state	= ads7846_get_pendown_state,
 	.keep_vref_on		= 1,
 };
-#endif
 
-#if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE)
 static struct spi_board_info eukrea_mbimx27_spi_board_info[] __initdata = {
 	[0] = {
 		.modalias	= "ads7846",
@@ -294,7 +289,6 @@
 	.chipselect	= eukrea_mbimx27_spi_cs,
 	.num_chipselect = ARRAY_SIZE(eukrea_mbimx27_spi_cs),
 };
-#endif
 
 static struct i2c_board_info eukrea_mbimx27_i2c_devices[] = {
 	{
diff --git a/arch/arm/mach-integrator/include/mach/vmalloc.h b/arch/arm/mach-integrator/include/mach/vmalloc.h
index e056e7c..2f5a2baf 100644
--- a/arch/arm/mach-integrator/include/mach/vmalloc.h
+++ b/arch/arm/mach-integrator/include/mach/vmalloc.h
@@ -17,4 +17,4 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#define VMALLOC_END       0xd0000000
+#define VMALLOC_END       0xd0000000UL
diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig
index dbbcfeb..31e5fd6 100644
--- a/arch/arm/mach-msm/Kconfig
+++ b/arch/arm/mach-msm/Kconfig
@@ -49,6 +49,8 @@
 
 config MSM_SOC_REV_A
 	bool
+config  ARCH_MSM_SCORPIONMP
+	bool
 
 config  ARCH_MSM_ARM11
 	bool
diff --git a/arch/arm/mach-msm/include/mach/vmalloc.h b/arch/arm/mach-msm/include/mach/vmalloc.h
index 31a32ad..d138448 100644
--- a/arch/arm/mach-msm/include/mach/vmalloc.h
+++ b/arch/arm/mach-msm/include/mach/vmalloc.h
@@ -16,7 +16,7 @@
 #ifndef __ASM_ARCH_MSM_VMALLOC_H
 #define __ASM_ARCH_MSM_VMALLOC_H
 
-#define VMALLOC_END	  0xd0000000
+#define VMALLOC_END	  0xd0000000UL
 
 #endif
 
diff --git a/arch/arm/mach-mx25/devices-imx25.h b/arch/arm/mach-mx25/devices-imx25.h
index 93afa10..d94d282 100644
--- a/arch/arm/mach-mx25/devices-imx25.h
+++ b/arch/arm/mach-mx25/devices-imx25.h
@@ -42,9 +42,9 @@
 #define imx25_add_mxc_nand(pdata)	\
 	imx_add_mxc_nand(&imx25_mxc_nand_data, pdata)
 
-extern const struct imx_spi_imx_data imx25_spi_imx_data[] __initconst;
+extern const struct imx_spi_imx_data imx25_cspi_data[] __initconst;
 #define imx25_add_spi_imx(id, pdata)	\
-	imx_add_spi_imx(&imx25_spi_imx_data[id], pdata)
+	imx_add_spi_imx(&imx25_cspi_data[id], pdata)
 #define imx25_add_spi_imx0(pdata)	imx25_add_spi_imx(0, pdata)
 #define imx25_add_spi_imx1(pdata)	imx25_add_spi_imx(1, pdata)
 #define imx25_add_spi_imx2(pdata)	imx25_add_spi_imx(2, pdata)
diff --git a/arch/arm/mach-mx3/mach-pcm037_eet.c b/arch/arm/mach-mx3/mach-pcm037_eet.c
index 99e0894..fda5654 100644
--- a/arch/arm/mach-mx3/mach-pcm037_eet.c
+++ b/arch/arm/mach-mx3/mach-pcm037_eet.c
@@ -14,6 +14,7 @@
 
 #include <mach/common.h>
 #include <mach/iomux-mx3.h>
+#include <mach/spi.h>
 
 #include <asm/mach-types.h>
 
@@ -59,14 +60,12 @@
 };
 
 /* Platform Data for MXC CSPI */
-#if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE)
 static int pcm037_spi1_cs[] = {MXC_SPI_CS(1), IOMUX_TO_GPIO(MX31_PIN_KEY_COL7)};
 
 static const struct spi_imx_master pcm037_spi1_pdata __initconst = {
 	.chipselect = pcm037_spi1_cs,
 	.num_chipselect = ARRAY_SIZE(pcm037_spi1_cs),
 };
-#endif
 
 /* GPIO-keys input device */
 static struct gpio_keys_button pcm037_gpio_keys[] = {
@@ -171,7 +170,7 @@
 	},
 };
 
-static int eet_init_devices(void)
+static int __init eet_init_devices(void)
 {
 	if (!machine_is_pcm037() || pcm037_variant() != PCM037_EET)
 		return 0;
diff --git a/arch/arm/mach-netx/include/mach/vmalloc.h b/arch/arm/mach-netx/include/mach/vmalloc.h
index 7cca357..871f1ef 100644
--- a/arch/arm/mach-netx/include/mach/vmalloc.h
+++ b/arch/arm/mach-netx/include/mach/vmalloc.h
@@ -16,4 +16,4 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#define VMALLOC_END       0xd0000000
+#define VMALLOC_END       0xd0000000UL
diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c
index ea0d80a..e7f9ee6 100644
--- a/arch/arm/mach-omap1/devices.c
+++ b/arch/arm/mach-omap1/devices.c
@@ -321,10 +321,9 @@
 static int __init omap_init_wdt(void)
 {
 	if (!cpu_is_omap16xx())
-		return;
+		return -ENODEV;
 
-	platform_device_register(&omap_wdt_device);
-	return 0;
+	return platform_device_register(&omap_wdt_device);
 }
 subsys_initcall(omap_init_wdt);
 #endif
diff --git a/arch/arm/mach-omap1/include/mach/camera.h b/arch/arm/mach-omap1/include/mach/camera.h
index fd54b45..847d00f 100644
--- a/arch/arm/mach-omap1/include/mach/camera.h
+++ b/arch/arm/mach-omap1/include/mach/camera.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_ARCH_CAMERA_H_
 #define __ASM_ARCH_CAMERA_H_
 
+#include <media/omap1_camera.h>
+
 void omap1_camera_init(void *);
 
 static inline void omap1_set_camera_info(struct omap1_cam_platform_data *info)
diff --git a/arch/arm/mach-omap1/include/mach/vmalloc.h b/arch/arm/mach-omap1/include/mach/vmalloc.h
index b001f67..22ec4a4 100644
--- a/arch/arm/mach-omap1/include/mach/vmalloc.h
+++ b/arch/arm/mach-omap1/include/mach/vmalloc.h
@@ -17,4 +17,4 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
-#define VMALLOC_END	0xd8000000
+#define VMALLOC_END	0xd8000000UL
diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c
index 067f437..53ac762 100644
--- a/arch/arm/mach-omap2/board-devkit8000.c
+++ b/arch/arm/mach-omap2/board-devkit8000.c
@@ -242,9 +242,6 @@
 	mmc[0].gpio_cd = gpio + 0;
 	omap2_hsmmc_init(mmc);
 
-	/* link regulators to MMC adapters */
-	devkit8000_vmmc1_supply.dev = mmc[0].dev;
-
 	/* TWL4030_GPIO_MAX + 1 == ledB, PMU_STAT (out, active low LED) */
 	gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1;
 
diff --git a/arch/arm/mach-omap2/include/mach/vmalloc.h b/arch/arm/mach-omap2/include/mach/vmalloc.h
index 4da31e9..8663199 100644
--- a/arch/arm/mach-omap2/include/mach/vmalloc.h
+++ b/arch/arm/mach-omap2/include/mach/vmalloc.h
@@ -17,4 +17,4 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
-#define VMALLOC_END	  0xf8000000
+#define VMALLOC_END	  0xf8000000UL
diff --git a/arch/arm/mach-pnx4008/include/mach/vmalloc.h b/arch/arm/mach-pnx4008/include/mach/vmalloc.h
index 31b65ee..184913c 100644
--- a/arch/arm/mach-pnx4008/include/mach/vmalloc.h
+++ b/arch/arm/mach-pnx4008/include/mach/vmalloc.h
@@ -17,4 +17,4 @@
  * The vmalloc() routines leaves a hole of 4kB between each vmalloced
  * area for the same reason. ;)
  */
-#define VMALLOC_END       0xd0000000
+#define VMALLOC_END       0xd0000000UL
diff --git a/arch/arm/mach-rpc/include/mach/vmalloc.h b/arch/arm/mach-rpc/include/mach/vmalloc.h
index 3bcd86f..fb70022 100644
--- a/arch/arm/mach-rpc/include/mach/vmalloc.h
+++ b/arch/arm/mach-rpc/include/mach/vmalloc.h
@@ -7,4 +7,4 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#define VMALLOC_END       0xdc000000
+#define VMALLOC_END       0xdc000000UL
diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig
index 1ca7bdc..579d2f0 100644
--- a/arch/arm/mach-s3c64xx/Kconfig
+++ b/arch/arm/mach-s3c64xx/Kconfig
@@ -143,7 +143,7 @@
 	select S3C_DEV_USB_HSOTG
 	select S3C_DEV_WDT
 	select SAMSUNG_DEV_KEYPAD
-	select HAVE_S3C2410_WATCHDOG
+	select HAVE_S3C2410_WATCHDOG if WATCHDOG
 	select S3C64XX_SETUP_SDHCI
 	select S3C64XX_SETUP_I2C1
 	select S3C64XX_SETUP_IDE
diff --git a/arch/arm/mach-shark/include/mach/vmalloc.h b/arch/arm/mach-shark/include/mach/vmalloc.h
index 8e845b6..b10df98 100644
--- a/arch/arm/mach-shark/include/mach/vmalloc.h
+++ b/arch/arm/mach-shark/include/mach/vmalloc.h
@@ -1,4 +1,4 @@
 /*
  * arch/arm/mach-shark/include/mach/vmalloc.h
  */
-#define VMALLOC_END       0xd0000000
+#define VMALLOC_END       0xd0000000UL
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index 32d9e28..d326054 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -163,11 +163,13 @@
 		.name		= "loader",
 		.offset		= 0x00000000,
 		.size		= 512 * 1024,
+		.mask_flags	= MTD_WRITEABLE,
 	},
 	{
 		.name		= "bootenv",
 		.offset		= MTDPART_OFS_APPEND,
 		.size		= 512 * 1024,
+		.mask_flags	= MTD_WRITEABLE,
 	},
 	{
 		.name		= "kernel_ro",
@@ -581,6 +583,10 @@
 		return -EINVAL;
 
 	switch (rate) {
+	case 44100:
+		clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 11283000));
+		ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
+		break;
 	case 48000:
 		clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 85428000));
 		clk_set_rate(fdiv_clk, clk_round_rate(fdiv_clk, 12204000));
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 7db31e6..b25ce90 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -220,8 +220,7 @@
 	__raw_writel(__raw_readl(PLLC2CR) & ~0x80000000, PLLC2CR);
 }
 
-static int pllc2_set_rate(struct clk *clk,
-			  unsigned long rate, int algo_id)
+static int pllc2_set_rate(struct clk *clk, unsigned long rate)
 {
 	unsigned long value;
 	int idx;
@@ -463,8 +462,7 @@
 	return 0;
 }
 
-static int fsidiv_set_rate(struct clk *clk,
-			   unsigned long rate, int algo_id)
+static int fsidiv_set_rate(struct clk *clk, unsigned long rate)
 {
 	int idx;
 
diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c
index 4cd3cae..30b2f40 100644
--- a/arch/arm/mach-shmobile/intc-sh7372.c
+++ b/arch/arm/mach-shmobile/intc-sh7372.c
@@ -98,7 +98,7 @@
 	INTC_VECT(IRQ14A, 0x03c0), INTC_VECT(IRQ15A, 0x03e0),
 	INTC_VECT(IRQ16A, 0x3200), INTC_VECT(IRQ17A, 0x3220),
 	INTC_VECT(IRQ18A, 0x3240), INTC_VECT(IRQ19A, 0x3260),
-	INTC_VECT(IRQ20A, 0x3280), INTC_VECT(IRQ31A, 0x32a0),
+	INTC_VECT(IRQ20A, 0x3280), INTC_VECT(IRQ21A, 0x32a0),
 	INTC_VECT(IRQ22A, 0x32c0), INTC_VECT(IRQ23A, 0x32e0),
 	INTC_VECT(IRQ24A, 0x3300), INTC_VECT(IRQ25A, 0x3320),
 	INTC_VECT(IRQ26A, 0x3340), INTC_VECT(IRQ27A, 0x3360),
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index 73fb1a5..608a137 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -75,14 +75,14 @@
 static inline void ux500_cache_wait(void __iomem *reg, unsigned long mask)
 {
 	/* wait for the operation to complete */
-	while (readl(reg) & mask)
+	while (readl_relaxed(reg) & mask)
 		;
 }
 
 static inline void ux500_cache_sync(void)
 {
 	void __iomem *base = __io_address(UX500_L2CC_BASE);
-	writel(0, base + L2X0_CACHE_SYNC);
+	writel_relaxed(0, base + L2X0_CACHE_SYNC);
 	ux500_cache_wait(base + L2X0_CACHE_SYNC, 1);
 }
 
@@ -107,7 +107,7 @@
 	uint32_t l2x0_way_mask = (1<<16) - 1;	/* Bitmask of active ways */
 
 	/* invalidate all ways */
-	writel(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
+	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
 	ux500_cache_wait(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);
 	ux500_cache_sync();
 }
diff --git a/arch/arm/mach-versatile/include/mach/vmalloc.h b/arch/arm/mach-versatile/include/mach/vmalloc.h
index ebd8a25..7d8e069 100644
--- a/arch/arm/mach-versatile/include/mach/vmalloc.h
+++ b/arch/arm/mach-versatile/include/mach/vmalloc.h
@@ -18,4 +18,4 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#define VMALLOC_END		0xd8000000
+#define VMALLOC_END		0xd8000000UL
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 17e7b0b..55c17a6 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -206,8 +206,8 @@
 	 */
 	if (pfn_valid(pfn)) {
 		printk(KERN_WARNING "BUG: Your driver calls ioremap() on system memory.  This leads\n"
-		       KERN_WARNING "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n"
-		       KERN_WARNING "will fail in the next kernel release.  Please fix your driver.\n");
+		       "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n"
+		       "will fail in the next kernel release.  Please fix your driver.\n");
 		WARN_ON(1);
 	}
 
diff --git a/arch/arm/plat-mxc/devices/platform-imx-dma.c b/arch/arm/plat-mxc/devices/platform-imx-dma.c
index 02d9890..3a705c7 100644
--- a/arch/arm/plat-mxc/devices/platform-imx-dma.c
+++ b/arch/arm/plat-mxc/devices/platform-imx-dma.c
@@ -12,15 +12,7 @@
 
 #include <mach/hardware.h>
 #include <mach/devices-common.h>
-#ifdef SDMA_IS_MERGED
 #include <mach/sdma.h>
-#else
-struct sdma_platform_data {
-	int sdma_version;
-	char *cpu_name;
-	int to_version;
-};
-#endif
 
 struct imx_imx_sdma_data {
 	resource_size_t iobase;
diff --git a/arch/arm/plat-mxc/devices/platform-spi_imx.c b/arch/arm/plat-mxc/devices/platform-spi_imx.c
index e48340e..17f724c 100644
--- a/arch/arm/plat-mxc/devices/platform-spi_imx.c
+++ b/arch/arm/plat-mxc/devices/platform-spi_imx.c
@@ -27,6 +27,7 @@
 	imx_spi_imx_data_entry(MX21, CSPI, "imx21-cspi", _id, _hwid, SZ_4K)
 	imx21_cspi_data_entry(0, 1),
 	imx21_cspi_data_entry(1, 2),
+};
 #endif
 
 #ifdef CONFIG_ARCH_MX25
diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c
index aedf9c1..63cdc60 100644
--- a/arch/arm/plat-nomadik/timer.c
+++ b/arch/arm/plat-nomadik/timer.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2008 STMicroelectronics
  * Copyright (C) 2010 Alessandro Rubini
+ * Copyright (C) 2010 Linus Walleij for ST-Ericsson
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2, as
@@ -16,11 +17,13 @@
 #include <linux/clk.h>
 #include <linux/jiffies.h>
 #include <linux/err.h>
+#include <linux/cnt32_to_63.h>
+#include <linux/timer.h>
 #include <asm/mach/time.h>
 
 #include <plat/mtu.h>
 
-void __iomem *mtu_base; /* ssigned by machine code */
+void __iomem *mtu_base; /* Assigned by machine code */
 
 /*
  * Kernel assumes that sched_clock can be called early
@@ -48,16 +51,82 @@
 /*
  * Override the global weak sched_clock symbol with this
  * local implementation which uses the clocksource to get some
- * better resolution when scheduling the kernel. We accept that
- * this wraps around for now, since it is just a relative time
- * stamp. (Inspired by OMAP implementation.)
+ * better resolution when scheduling the kernel.
+ *
+ * Because the hardware timer period may be quite short
+ * (32.3 secs on the 133 MHz MTU timer selection on ux500)
+ * and because cnt32_to_63() needs to be called at least once per
+ * half period to work properly, a kernel keepwarm() timer is set up
+ * to ensure this requirement is always met.
+ *
+ * Also the sched_clock timer will wrap around at some point,
+ * here we set it to run continously for a year.
  */
+#define SCHED_CLOCK_MIN_WRAP 3600*24*365
+static struct timer_list cnt32_to_63_keepwarm_timer;
+static u32 sched_mult;
+static u32 sched_shift;
+
 unsigned long long notrace sched_clock(void)
 {
-	return clocksource_cyc2ns(nmdk_clksrc.read(
-				  &nmdk_clksrc),
-				  nmdk_clksrc.mult,
-				  nmdk_clksrc.shift);
+	u64 cycles;
+
+	if (unlikely(!mtu_base))
+		return 0;
+
+	cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0)));
+	/*
+	 * sched_mult is guaranteed to be even so will
+	 * shift out bit 63
+	 */
+	return (cycles * sched_mult) >> sched_shift;
+}
+
+/* Just kick sched_clock every so often */
+static void cnt32_to_63_keepwarm(unsigned long data)
+{
+	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
+	(void) sched_clock();
+}
+
+/*
+ * Set up a timer to keep sched_clock():s 32_to_63 algorithm warm
+ * once in half a 32bit timer wrap interval.
+ */
+static void __init nmdk_sched_clock_init(unsigned long rate)
+{
+	u32 v;
+	unsigned long delta;
+	u64 days;
+
+	/* Find the apropriate mult and shift factors */
+	clocks_calc_mult_shift(&sched_mult, &sched_shift,
+			       rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP);
+	/* We need to multiply by an even number to get rid of bit 63 */
+	if (sched_mult & 1)
+		sched_mult++;
+
+	/* Let's see what we get, take max counter and scale it */
+	days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift;
+	do_div(days, NSEC_PER_SEC);
+	do_div(days, (3600*24));
+
+	pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n",
+		(64 - sched_shift), rate, (unsigned long) days);
+
+	/*
+	 * Program a timer to kick us at half 32bit wraparound
+	 * Formula: seconds per wrap = (2^32) / f
+	 */
+	v = 0xFFFFFFFFUL / rate;
+	/* We want half of the wrap time to keep cnt32_to_63 warm */
+	v /= 2;
+	pr_debug("sched_clock: prescaled timer rate: %lu Hz, "
+		 "initialize keepwarm timer every %d seconds\n", rate, v);
+	/* Convert seconds to jiffies */
+	delta = msecs_to_jiffies(v*1000);
+	setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta);
+	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta));
 }
 
 /* Clockevent device: use one-shot mode */
@@ -161,13 +230,15 @@
 	writel(0, mtu_base + MTU_BGLR(0));
 	writel(cr | MTU_CRn_ENA, mtu_base + MTU_CR(0));
 
-	/* Now the scheduling clock is ready */
+	/* Now the clock source is ready */
 	nmdk_clksrc.read = nmdk_read_timer;
 
 	if (clocksource_register(&nmdk_clksrc))
 		pr_err("timer: failed to initialize clock source %s\n",
 		       nmdk_clksrc.name);
 
+	nmdk_sched_clock_init(rate);
+
 	/* Timer 1 is used for events */
 
 	clockevents_calc_mult_shift(&nmdk_clkevt, rate, MTU_MIN_RANGE);
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index f5c5b8d..2c28265 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -1983,6 +1983,8 @@
 
 	dma_write(OMAP2_DMA_CSR_CLEAR_MASK, CSR(ch));
 	dma_write(1 << ch, IRQSTATUS_L0);
+	/* read back the register to flush the write */
+	dma_read(IRQSTATUS_L0);
 
 	/* If the ch is not chained then chain_id will be -1 */
 	if (dma_chan[ch].chain_id != -1) {
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index cd0c090..b407bc8 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -7,7 +7,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/unistd.h>
 #include <linux/user.h>
 #include <linux/uaccess.h>
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 2b63b01..efad120 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -16,7 +16,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/ptrace.h>
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 9747813..933bd38 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -28,7 +28,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/ptrace.h>
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index 3a078ad..331de72 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -202,7 +202,7 @@
 }
 
 static int
-simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
+simscsi_queuecommand_lck (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
 	unsigned int target_id = sc->device->id;
 	char fname[MAX_ROOT_LEN+16];
@@ -326,6 +326,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(simscsi_queuecommand)
+
 static int
 simscsi_host_reset (struct scsi_cmnd *sc)
 {
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 18732ab..c2a1fc2 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/ptrace.h>
diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c
index 6d33905..e2a63af 100644
--- a/arch/m68knommu/kernel/process.c
+++ b/arch/m68knommu/kernel/process.c
@@ -19,7 +19,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/ptrace.h>
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 0d0f804..e1b14a6 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -14,7 +14,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/ptrace.h>
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index ba430a0..3039408 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -28,7 +28,6 @@
 #include <linux/namei.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/utsname.h>
 #include <linux/vfs.h>
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 9779ece..88a0ad1 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -20,7 +20,6 @@
 #include <linux/times.h>
 #include <linux/time.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/sem.h>
 #include <linux/msg.h>
 #include <linux/shm.h>
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b644719..e625e9e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -4,6 +4,10 @@
 	bool
 	default y if !PPC64
 
+config 32BIT
+	bool
+	default y if PPC32
+
 config 64BIT
 	bool
 	default y if PPC64
diff --git a/arch/powerpc/boot/div64.S b/arch/powerpc/boot/div64.S
index 722f360..d271ab5 100644
--- a/arch/powerpc/boot/div64.S
+++ b/arch/powerpc/boot/div64.S
@@ -33,9 +33,10 @@
 	cntlzw	r0,r5		# we are shifting the dividend right
 	li	r10,-1		# to make it < 2^32, and shifting
 	srw	r10,r10,r0	# the divisor right the same amount,
-	add	r9,r4,r10	# rounding up (so the estimate cannot
+	addc	r9,r4,r10	# rounding up (so the estimate cannot
 	andc	r11,r6,r10	# ever be too large, only too small)
 	andc	r9,r9,r10
+	addze	r9,r9
 	or	r11,r5,r11
 	rotlw	r9,r9,r0
 	rotlw	r11,r11,r0
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 7a9db64..42850ee 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -337,7 +337,7 @@
 		/* FP registers 32 -> 63 */
 #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
 		if (current)
-			memcpy(mem, current->thread.evr[regno-32],
+			memcpy(mem, &current->thread.evr[regno-32],
 					dbg_reg_def[regno].size);
 #else
 		/* fp registers not used by kernel, leave zero */
@@ -362,7 +362,7 @@
 	if (regno >= 32 && regno < 64) {
 		/* FP registers 32 -> 63 */
 #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
-		memcpy(current->thread.evr[regno-32], mem,
+		memcpy(&current->thread.evr[regno-32], mem,
 				dbg_reg_def[regno].size);
 #else
 		/* fp registers not used by kernel, leave zero */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 2a178b0..ce6f61c 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -497,9 +497,8 @@
 }
 
 /*
- * Called into from start_kernel, after lock_kernel has been called.
- * Initializes bootmem, which is unsed to manage page allocation until
- * mem_init is called.
+ * Called into from start_kernel this initializes bootmem, which is used
+ * to manage page allocation until mem_init is called.
  */
 void __init setup_arch(char **cmdline_p)
 {
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index b1b6043..4e5bf1e 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -23,7 +23,6 @@
 #include <linux/resource.h>
 #include <linux/times.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/sem.h>
 #include <linux/msg.h>
 #include <linux/shm.h>
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 83f534d..5e95844 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1123,7 +1123,7 @@
 	else
 #endif /* CONFIG_PPC_HAS_HASH_64K */
 		rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
-				    subpage_protection(pgdir, ea));
+				    subpage_protection(mm, ea));
 
 	/* Dump some info in case of hash insertion failure, they should
 	 * never happen so it is really useful to know if/when they do
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 8b04c54..8526bd9 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -138,8 +138,11 @@
 	cmpldi	cr0,r15,0			/* Check for user region */
 	std	r14,EX_TLB_ESR(r12)		/* write crazy -1 to frame */
 	beq	normal_tlb_miss
+
+	li	r11,_PAGE_PRESENT|_PAGE_BAP_SX	/* Base perm */
+	oris	r11,r11,_PAGE_ACCESSED@h
 	/* XXX replace the RMW cycles with immediate loads + writes */
-1:	mfspr	r10,SPRN_MAS1
+	mfspr	r10,SPRN_MAS1
 	cmpldi	cr0,r15,8			/* Check for vmalloc region */
 	rlwinm	r10,r10,0,16,1			/* Clear TID */
 	mtspr	SPRN_MAS1,r10
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 36c0c44..2a030d8 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -585,6 +585,6 @@
 	ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
 
 	/* Finally limit subsequent allocations */
-	memblock_set_current_limit(ppc64_memblock_base + ppc64_rma_size);
+	memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
 }
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index c667f0f..3139814 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -47,6 +47,12 @@
 config PPC_PSERIES_DEBUG
 	depends on PPC_PSERIES && PPC_EARLY_DEBUG
 	bool "Enable extra debug logging in platforms/pseries"
+        help
+	  Say Y here if you want the pseries core to produce a bunch of
+	  debug messages to the system log. Select this if you are having a
+	  problem with the pseries core and want to see more of what is
+	  going on. This does not enable debugging in lpar.c, which must
+	  be manually done due to its verbosity.
 	default y
 
 config PPC_SMLPAR
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 34b7dc1..17a11c8 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -21,8 +21,6 @@
  * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  */
 
-#undef DEBUG
-
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/list.h>
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 4b7a062..5fcc92a 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -25,8 +25,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#undef DEBUG
-
 #include <linux/pci.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 45e0c61..05221b1 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -6,6 +6,18 @@
 
 source "lib/Kconfig.debug"
 
+config STRICT_DEVMEM
+	def_bool y
+	prompt "Filter access to /dev/mem"
+	---help---
+	  This option restricts access to /dev/mem.  If this option is
+	  disabled, you allow userspace access to all memory, including
+	  kernel and userspace memory. Accidental memory access is likely
+	  to be disastrous.
+	  Memory access is required for experts who want to debug the kernel.
+
+	  If you are unsure, say Y.
+
 config DEBUG_STRICT_USER_COPY_CHECKS
 	bool "Strict user copy size checks"
 	---help---
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index a8729ea..3c987e9 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -130,6 +130,11 @@
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
 
+static inline int devmem_is_allowed(unsigned long pfn)
+{
+	return 0;
+}
+
 #define HAVE_ARCH_FREE_PAGE
 #define HAVE_ARCH_ALLOC_PAGE
 
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 1e6449c..53acaa8 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -25,7 +25,6 @@
 #include <linux/resource.h>
 #include <linux/times.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/sem.h>
 #include <linux/msg.h>
 #include <linux/shm.h>
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index d60fc43..2564793 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -30,6 +30,7 @@
 #include <asm/sections.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/hardirq.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -212,7 +213,7 @@
 	/* Set the PER control regs, turns on single step for this address */
 	__ctl_load(kprobe_per_regs, 9, 11);
 	regs->psw.mask |= PSW_MASK_PER;
-	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK);
+	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 }
 
 static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
@@ -239,7 +240,7 @@
 	__get_cpu_var(current_kprobe) = p;
 	/* Save the interrupt and per flags */
 	kcb->kprobe_saved_imask = regs->psw.mask &
-	    (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK);
+		(PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
 	/* Save the control regs that govern PER */
 	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
 }
@@ -316,8 +317,6 @@
 		return 1;
 
 ss_probe:
-	if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO))
-		local_irq_disable();
 	prepare_singlestep(p, regs);
 	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;
@@ -350,6 +349,7 @@
 	struct hlist_node *node, *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+	kprobe_opcode_t *correct_ret_addr = NULL;
 
 	INIT_HLIST_HEAD(&empty_rp);
 	kretprobe_hash_lock(current, &head, &flags);
@@ -372,10 +372,32 @@
 			/* another task is sharing our hash bucket */
 			continue;
 
-		if (ri->rp && ri->rp->handler)
-			ri->rp->handler(ri, regs);
+		orig_ret_address = (unsigned long)ri->ret_addr;
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+	correct_ret_addr = ri->ret_addr;
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
 
 		orig_ret_address = (unsigned long)ri->ret_addr;
+
+		if (ri->rp && ri->rp->handler) {
+			ri->ret_addr = correct_ret_addr;
+			ri->rp->handler(ri, regs);
+		}
+
 		recycle_rp_inst(ri, &empty_rp);
 
 		if (orig_ret_address != trampoline_address) {
@@ -387,7 +409,7 @@
 			break;
 		}
 	}
-	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
 	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
 
 	reset_current_kprobe();
@@ -465,8 +487,6 @@
 		goto out;
 	}
 	reset_current_kprobe();
-	if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO))
-		local_irq_enable();
 out:
 	preempt_enable_no_resched();
 
@@ -482,7 +502,7 @@
 	return 1;
 }
 
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -508,8 +528,6 @@
 			restore_previous_kprobe(kcb);
 		else {
 			reset_current_kprobe();
-			if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO))
-				local_irq_enable();
 		}
 		preempt_enable_no_resched();
 		break;
@@ -553,6 +571,18 @@
 	return 0;
 }
 
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	int ret;
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_disable();
+	ret = kprobe_trap_handler(regs, trapnr);
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+	return ret;
+}
+
 /*
  * Wrapper routine to for handling exceptions.
  */
@@ -560,8 +590,12 @@
 				       unsigned long val, void *data)
 {
 	struct die_args *args = (struct die_args *)data;
+	struct pt_regs *regs = args->regs;
 	int ret = NOTIFY_DONE;
 
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_disable();
+
 	switch (val) {
 	case DIE_BPT:
 		if (kprobe_handler(args->regs))
@@ -572,16 +606,17 @@
 			ret = NOTIFY_STOP;
 		break;
 	case DIE_TRAP:
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() &&
-		    kprobe_fault_handler(args->regs, args->trapnr))
+		if (!preemptible() && kprobe_running() &&
+		    kprobe_trap_handler(args->regs, args->trapnr))
 			ret = NOTIFY_STOP;
-		preempt_enable();
 		break;
 	default:
 		break;
 	}
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+
 	return ret;
 }
 
@@ -595,6 +630,7 @@
 
 	/* setup return addr to the jprobe handler routine */
 	regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE;
+	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 
 	/* r14 is the function return address */
 	kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14];
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 38e641c..45b405c 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -20,18 +20,17 @@
 static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
-	unsigned long mask, result;
+	unsigned long mask;
 	pte_t *ptep, pte;
 	struct page *page;
 
-	result = write ? 0 : _PAGE_RO;
-	mask = result | _PAGE_INVALID | _PAGE_SPECIAL;
+	mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
 
 	ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
 	do {
 		pte = *ptep;
 		barrier();
-		if ((pte_val(pte) & mask) != result)
+		if ((pte_val(pte) & mask) != 0)
 			return 0;
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 46d5179..e3c73cd 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -199,10 +199,13 @@
 #define ARCH_HAS_PREFETCHW
 static inline void prefetch(void *x)
 {
-	__asm__ __volatile__ ("pref @%0\n\t" : : "r" (x) : "memory");
+	__builtin_prefetch(x, 0, 3);
 }
 
-#define prefetchw(x)	prefetch(x)
+static inline void prefetchw(void *x)
+{
+	__builtin_prefetch(x, 1, 3);
+}
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
index 4eabc68c..b601fa3 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
@@ -110,7 +110,7 @@
 	return 0;
 }
 
-static int shoc_clk_set_rate(struct clk *clk, unsigned long rate, int algo_id)
+static int shoc_clk_set_rate(struct clk *clk, unsigned long rate)
 {
 	unsigned long frqcr3;
 	unsigned int tmp;
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index 81f5837..8c6a350 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -88,7 +88,7 @@
 	}
 
 	if (op & CACHEFLUSH_I)
-		flush_cache_all();
+		flush_icache_range(addr, addr+len);
 
 	up_read(&current->mm->mmap_sem);
 	return 0;
diff --git a/arch/sh/kernel/vsyscall/vsyscall-trapa.S b/arch/sh/kernel/vsyscall/vsyscall-trapa.S
index 3b6eb34..3e70f85 100644
--- a/arch/sh/kernel/vsyscall/vsyscall-trapa.S
+++ b/arch/sh/kernel/vsyscall/vsyscall-trapa.S
@@ -8,9 +8,9 @@
 	 * fill out .eh_frame -- PFM. */
 .LEND_vsyscall:
 	.size __kernel_vsyscall,.-.LSTART_vsyscall
-	.previous
 
 	.section .eh_frame,"a",@progbits
+	.previous
 .LCIE:
 	.ualong	.LCIE_end - .LCIE_start
 .LCIE_start:
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 7524689..16582d8 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -12,7 +12,6 @@
 #include <linux/sched.h>
 #include <linux/threads.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/init.h>
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index e6375a7..6db18c6 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -17,7 +17,6 @@
 #include <linux/resource.h>
 #include <linux/times.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/sem.h>
 #include <linux/msg.h>
 #include <linux/shm.h>
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 675c9e1..42b282f 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -19,7 +19,6 @@
 #include <linux/mman.h>
 #include <linux/utsname.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/ipc.h>
 
 #include <asm/uaccess.h>
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index 12b9f35..4491f4c 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -16,7 +16,6 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/perf_event.h>
 
 enum direction {
diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c
index b351770..3107381 100644
--- a/arch/sparc/kernel/windows.c
+++ b/arch/sparc/kernel/windows.c
@@ -9,7 +9,6 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 07ec8a86..e11b5fc 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -329,6 +329,18 @@
 
 menu "Bus options"
 
+config PCI
+	bool "PCI support"
+	default y
+	select PCI_DOMAINS
+	---help---
+	  Enable PCI root complex support, so PCIe endpoint devices can
+	  be attached to the Tile chip.  Many, but not all, PCI devices
+	  are supported under Tilera's root complex driver.
+
+config PCI_DOMAINS
+	bool
+
 config NO_IOMEM
 	def_bool !PCI
 
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
index c5741da4..14a3f85 100644
--- a/arch/tile/include/asm/cacheflush.h
+++ b/arch/tile/include/asm/cacheflush.h
@@ -137,4 +137,56 @@
 	mb_incoherent();
 }
 
+/*
+ * Flush & invalidate a VA range that is homed remotely on a single core,
+ * waiting until the memory controller holds the flushed values.
+ */
+static inline void finv_buffer_remote(void *buffer, size_t size)
+{
+	char *p;
+	int i;
+
+	/*
+	 * Flush and invalidate the buffer out of the local L1/L2
+	 * and request the home cache to flush and invalidate as well.
+	 */
+	__finv_buffer(buffer, size);
+
+	/*
+	 * Wait for the home cache to acknowledge that it has processed
+	 * all the flush-and-invalidate requests.  This does not mean
+	 * that the flushed data has reached the memory controller yet,
+	 * but it does mean the home cache is processing the flushes.
+	 */
+	__insn_mf();
+
+	/*
+	 * Issue a load to the last cache line, which can't complete
+	 * until all the previously-issued flushes to the same memory
+	 * controller have also completed.  If we weren't striping
+	 * memory, that one load would be sufficient, but since we may
+	 * be, we also need to back up to the last load issued to
+	 * another memory controller, which would be the point where
+	 * we crossed an 8KB boundary (the granularity of striping
+	 * across memory controllers).  Keep backing up and doing this
+	 * until we are before the beginning of the buffer, or have
+	 * hit all the controllers.
+	 */
+	for (i = 0, p = (char *)buffer + size - 1;
+	     i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer;
+	     ++i) {
+		const unsigned long STRIPE_WIDTH = 8192;
+
+		/* Force a load instruction to issue. */
+		*(volatile char *)p;
+
+		/* Jump to end of previous stripe. */
+		p -= STRIPE_WIDTH;
+		p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1));
+	}
+
+	/* Wait for the loads (and thus flushes) to have completed. */
+	__insn_mf();
+}
+
 #endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index ee43328..d3cbb9b 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -55,9 +55,6 @@
 #define ioremap_writethrough(physaddr, size)	ioremap(physaddr, size)
 #define ioremap_fullcache(physaddr, size)	ioremap(physaddr, size)
 
-void __iomem *ioport_map(unsigned long port, unsigned int len);
-extern inline void ioport_unmap(void __iomem *addr) {}
-
 #define mmiowb()
 
 /* Conversion between virtual and physical mappings.  */
@@ -189,12 +186,22 @@
  * we never run, uses them unconditionally.
  */
 
-static inline int ioport_panic(void)
+static inline long ioport_panic(void)
 {
 	panic("inb/outb and friends do not exist on tile");
 	return 0;
 }
 
+static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	return (void __iomem *) ioport_panic();
+}
+
+static inline void ioport_unmap(void __iomem *addr)
+{
+	ioport_panic();
+}
+
 static inline u8 inb(unsigned long addr)
 {
 	return ioport_panic();
diff --git a/arch/tile/include/asm/pci-bridge.h b/arch/tile/include/asm/pci-bridge.h
deleted file mode 100644
index e853b0e..0000000
--- a/arch/tile/include/asm/pci-bridge.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#ifndef _ASM_TILE_PCI_BRIDGE_H
-#define _ASM_TILE_PCI_BRIDGE_H
-
-#include <linux/ioport.h>
-#include <linux/pci.h>
-
-struct device_node;
-struct pci_controller;
-
-/*
- * pci_io_base returns the memory address at which you can access
- * the I/O space for PCI bus number `bus' (or NULL on error).
- */
-extern void __iomem *pci_bus_io_base(unsigned int bus);
-extern unsigned long pci_bus_io_base_phys(unsigned int bus);
-extern unsigned long pci_bus_mem_base_phys(unsigned int bus);
-
-/* Allocate a new PCI host bridge structure */
-extern struct pci_controller *pcibios_alloc_controller(void);
-
-/* Helper function for setting up resources */
-extern void pci_init_resource(struct resource *res, unsigned long start,
-			      unsigned long end, int flags, char *name);
-
-/* Get the PCI host controller for a bus */
-extern struct pci_controller *pci_bus_to_hose(int bus);
-
-/*
- * Structure of a PCI controller (host bridge)
- */
-struct pci_controller {
-	int index;		/* PCI domain number */
-	struct pci_bus *root_bus;
-
-	int first_busno;
-	int last_busno;
-
-	int hv_cfg_fd[2];	/* config{0,1} fds for this PCIe controller */
-	int hv_mem_fd;		/* fd to Hypervisor for MMIO operations */
-
-	struct pci_ops *ops;
-
-	int irq_base;		/* Base IRQ from the Hypervisor	*/
-	int plx_gen1;		/* flag for PLX Gen 1 configuration */
-
-	/* Address ranges that are routed to this controller/bridge. */
-	struct resource mem_resources[3];
-};
-
-static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus)
-{
-	return bus->sysdata;
-}
-
-extern void setup_indirect_pci_nomap(struct pci_controller *hose,
-			       void __iomem *cfg_addr, void __iomem *cfg_data);
-extern void setup_indirect_pci(struct pci_controller *hose,
-			       u32 cfg_addr, u32 cfg_data);
-extern void setup_grackle(struct pci_controller *hose);
-
-extern unsigned char common_swizzle(struct pci_dev *, unsigned char *);
-
-/*
- *   The following code swizzles for exactly one bridge.  The routine
- *   common_swizzle below handles multiple bridges.  But there are a
- *   some boards that don't follow the PCI spec's suggestion so we
- *   break this piece out separately.
- */
-static inline unsigned char bridge_swizzle(unsigned char pin,
-		unsigned char idsel)
-{
-	return (((pin-1) + idsel) % 4) + 1;
-}
-
-/*
- * The following macro is used to lookup irqs in a standard table
- * format for those PPC systems that do not already have PCI
- * interrupts properly routed.
- */
-/* FIXME - double check this */
-#define PCI_IRQ_TABLE_LOOKUP ({ \
-	long _ctl_ = -1; \
-	if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \
-		_ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \
-	_ctl_; \
-})
-
-/*
- * Scan the buses below a given PCI host bridge and assign suitable
- * resources to all devices found.
- */
-extern int pciauto_bus_scan(struct pci_controller *, int);
-
-#ifdef CONFIG_PCI
-extern unsigned long pci_address_to_pio(phys_addr_t address);
-#else
-static inline unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	return (unsigned long)-1;
-}
-#endif
-
-#endif /* _ASM_TILE_PCI_BRIDGE_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index b0c15da..c3fc458 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,7 +15,29 @@
 #ifndef _ASM_TILE_PCI_H
 #define _ASM_TILE_PCI_H
 
-#include <asm/pci-bridge.h>
+#include <linux/pci.h>
+
+/*
+ * Structure of a PCI controller (host bridge)
+ */
+struct pci_controller {
+	int index;		/* PCI domain number */
+	struct pci_bus *root_bus;
+
+	int first_busno;
+	int last_busno;
+
+	int hv_cfg_fd[2];	/* config{0,1} fds for this PCIe controller */
+	int hv_mem_fd;		/* fd to Hypervisor for MMIO operations */
+
+	struct pci_ops *ops;
+
+	int irq_base;		/* Base IRQ from the Hypervisor	*/
+	int plx_gen1;		/* flag for PLX Gen 1 configuration */
+
+	/* Address ranges that are routed to this controller/bridge. */
+	struct resource mem_resources[3];
+};
 
 /*
  * The hypervisor maps the entirety of CPA-space as bus addresses, so
@@ -24,57 +46,13 @@
  */
 #define PCI_DMA_BUS_IS_PHYS     1
 
-struct pci_controller *pci_bus_to_hose(int bus);
-unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp);
 int __init tile_pci_init(void);
-void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
+
 void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
+
 void __devinit pcibios_fixup_bus(struct pci_bus *bus);
 
-int __devinit _tile_cfg_read(struct pci_controller *hose,
-				    int bus,
-				    int slot,
-				    int function,
-				    int offset,
-				    int size,
-				    u32 *val);
-int __devinit _tile_cfg_write(struct pci_controller *hose,
-				     int bus,
-				     int slot,
-				     int function,
-				     int offset,
-				     int size,
-				     u32 val);
-
-/*
- * These are used to to config reads and writes in the early stages of
- * setup before the driver infrastructure has been set up enough to be
- * able to do config reads and writes.
- */
-#define early_cfg_read(where, size, value) \
-	_tile_cfg_read(controller, \
-		       current_bus, \
-		       pci_slot, \
-		       pci_fn, \
-		       where, \
-		       size, \
-		       value)
-
-#define early_cfg_write(where, size, value) \
-	_tile_cfg_write(controller, \
-		       current_bus, \
-		       pci_slot, \
-		       pci_fn, \
-		       where, \
-		       size, \
-		       value)
-
-
-
-#define PCICFG_BYTE	1
-#define PCICFG_WORD	2
-#define PCICFG_DWORD	4
-
 #define	TILE_NUM_PCIE	2
 
 #define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index)
@@ -88,33 +66,33 @@
 }
 
 /*
- * I/O space is currently not supported.
+ * pcibios_assign_all_busses() tells whether or not the bus numbers
+ * should be reassigned, in case the BIOS didn't do it correctly, or
+ * in case we don't have a BIOS and we want to let Linux do it.
  */
+static inline int pcibios_assign_all_busses(void)
+{
+	return 1;
+}
 
-#define TILE_PCIE_LOWER_IO		0x0
-#define TILE_PCIE_UPPER_IO		0x10000
-#define TILE_PCIE_PCIE_IO_SIZE		0x0000FFFF
-
-#define _PAGE_NO_CACHE		0
-#define _PAGE_GUARDED		0
-
-
-#define pcibios_assign_all_busses()    pci_assign_all_buses
-extern int pci_assign_all_buses;
-
+/*
+ * No special bus mastering setup handling.
+ */
 static inline void pcibios_set_master(struct pci_dev *dev)
 {
-	/* No special bus mastering setup handling */
 }
 
 #define PCIBIOS_MIN_MEM		0
-#define PCIBIOS_MIN_IO		TILE_PCIE_LOWER_IO
+#define PCIBIOS_MIN_IO		0
 
 /*
  * This flag tells if the platform is TILEmpower that needs
  * special configuration for the PLX switch chip.
  */
-extern int blade_pci;
+extern int tile_plx_gen1;
+
+/* Use any cpu for PCI. */
+#define cpumask_of_pcibus(bus) cpu_online_mask
 
 /* implement the pci_ DMA API in terms of the generic device dma_ one */
 #include <asm-generic/pci-dma-compat.h>
@@ -122,7 +100,4 @@
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
-/* Use any cpu for PCI. */
-#define cpumask_of_pcibus(bus) cpu_online_mask
-
 #endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 1747ff3..a9e7c87 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -292,8 +292,18 @@
 /* Are we using huge pages in the TLB for kernel data? */
 extern int kdata_huge;
 
+/* Support standard Linux prefetching. */
+#define ARCH_HAS_PREFETCH
+#define prefetch(x) __builtin_prefetch(x)
 #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE()
 
+/* Bring a value into the L1D, faulting the TLB if necessary. */
+#ifdef __tilegx__
+#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x))
+#else
+#define prefetch_L1(x) __insn_prefetch_L1((void *)(x))
+#endif
+
 #else /* __ASSEMBLY__ */
 
 /* Do some slow action (e.g. read a slow SPR). */
diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h
new file mode 100644
index 0000000..3a73b2b
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_impl.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drivers/xgbe/impl.h
+ * Implementation details for the NetIO library.
+ */
+
+#ifndef __DRV_XGBE_IMPL_H__
+#define __DRV_XGBE_IMPL_H__
+
+#include <hv/netio_errors.h>
+#include <hv/netio_intf.h>
+#include <hv/drv_xgbe_intf.h>
+
+
+/** How many groups we have (log2). */
+#define LOG2_NUM_GROUPS (12)
+/** How many groups we have. */
+#define NUM_GROUPS (1 << LOG2_NUM_GROUPS)
+
+/** Number of output requests we'll buffer per tile. */
+#define EPP_REQS_PER_TILE (32)
+
+/** Words used in an eDMA command without checksum acceleration. */
+#define EDMA_WDS_NO_CSUM      8
+/** Words used in an eDMA command with checksum acceleration. */
+#define EDMA_WDS_CSUM        10
+/** Total available words in the eDMA command FIFO. */
+#define EDMA_WDS_TOTAL      128
+
+
+/*
+ * FIXME: These definitions are internal and should have underscores!
+ * NOTE: The actual numeric values here are intentional and allow us to
+ * optimize the concept "if small ... else if large ... else ...", by
+ * checking for the low bit being set, and then for non-zero.
+ * These are used as array indices, so they must have the values (0, 1, 2)
+ * in some order.
+ */
+#define SIZE_SMALL (1)       /**< Small packet queue. */
+#define SIZE_LARGE (2)       /**< Large packet queue. */
+#define SIZE_JUMBO (0)       /**< Jumbo packet queue. */
+
+/** The number of "SIZE_xxx" values. */
+#define NETIO_NUM_SIZES 3
+
+
+/*
+ * Default numbers of packets for IPP drivers.  These values are chosen
+ * such that CIPP1 will not overflow its L2 cache.
+ */
+
+/** The default number of small packets. */
+#define NETIO_DEFAULT_SMALL_PACKETS 2750
+/** The default number of large packets. */
+#define NETIO_DEFAULT_LARGE_PACKETS 2500
+/** The default number of jumbo packets. */
+#define NETIO_DEFAULT_JUMBO_PACKETS 250
+
+
+/** Log2 of the size of a memory arena. */
+#define NETIO_ARENA_SHIFT      24      /* 16 MB */
+/** Size of a memory arena. */
+#define NETIO_ARENA_SIZE       (1 << NETIO_ARENA_SHIFT)
+
+
+/** A queue of packets.
+ *
+ * This structure partially defines a queue of packets waiting to be
+ * processed.  The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler.  The other part of the queue state, the read offset, is
+ * kept in user space, not in hypervisor space, so it is in a separate data
+ * structure.
+ *
+ * The read offset (__packet_receive_read in the user part of the queue
+ * structure) points to the next packet to be read. When the read offset is
+ * equal to the write offset, the queue is empty; therefore the queue must
+ * contain one more slot than the required maximum queue size.
+ *
+ * Here's an example of all 3 state variables and what they mean.  All
+ * pointers move left to right.
+ *
+ * @code
+ *   I   I   V   V   V   V   I   I   I   I
+ *   0   1   2   3   4   5   6   7   8   9  10
+ *           ^       ^       ^               ^
+ *           |               |               |
+ *           |               |               __last_packet_plus_one
+ *           |               __buffer_write
+ *           __packet_receive_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one
+ * = 10).  The read pointer is at 2, and the write pointer is at 6; thus,
+ * there are valid, unread packets in slots 2, 3, 4, and 5.  The remaining
+ * slots are invalid (do not contain a packet).
+ */
+typedef struct {
+  /** Byte offset of the next notify packet to be written: zero for the first
+   *  packet on the queue, sizeof (netio_pkt_t) for the second packet on the
+   *  queue, etc. */
+  volatile uint32_t __packet_write;
+
+  /** Offset of the packet after the last valid packet (i.e., when any
+   *  pointer is incremented to this value, it wraps back to zero). */
+  uint32_t __last_packet_plus_one;
+}
+__netio_packet_queue_t;
+
+
+/** A queue of buffers.
+ *
+ * This structure partially defines a queue of empty buffers which have been
+ * obtained via requests to the IPP.  (The elements of the queue are packet
+ * handles, which are transformed into a full netio_pkt_t when the buffer is
+ * retrieved.)  The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler.  The other parts of the queue state, the read offset and
+ * requested write offset, are kept in user space, not in hypervisor space, so
+ * they are in a separate data structure.
+ *
+ * The read offset (__buffer_read in the user part of the queue structure)
+ * points to the next buffer to be read. When the read offset is equal to the
+ * write offset, the queue is empty; therefore the queue must contain one more
+ * slot than the required maximum queue size.
+ *
+ * The requested write offset (__buffer_requested_write in the user part of
+ * the queue structure) points to the slot which will hold the next buffer we
+ * request from the IPP, once we get around to sending such a request.  When
+ * the requested write offset is equal to the write offset, no requests for
+ * new buffers are outstanding; when the requested write offset is one greater
+ * than the read offset, no more requests may be sent.
+ *
+ * Note that, unlike the packet_queue, the buffer_queue places incoming
+ * buffers at decreasing addresses.  This makes the check for "is it time to
+ * wrap the buffer pointer" cheaper in the assembly code which receives new
+ * buffers, and means that the value which defines the queue size,
+ * __last_buffer, is different than in the packet queue.  Also, the offset
+ * used in the packet_queue is already scaled by the size of a packet; here we
+ * use unscaled slot indices for the offsets.  (These differences are
+ * historical, and in the future it's possible that the packet_queue will look
+ * more like this queue.)
+ *
+ * @code
+ * Here's an example of all 4 state variables and what they mean.  Remember:
+ * all pointers move right to left.
+ *
+ *   V   V   V   I   I   R   R   V   V   V
+ *   0   1   2   3   4   5   6   7   8   9
+ *           ^       ^       ^           ^
+ *           |       |       |           |
+ *           |       |       |           __last_buffer
+ *           |       |       __buffer_write
+ *           |       __buffer_requested_write
+ *           __buffer_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9).
+ * The read pointer is at 2, and the write pointer is at 6; thus, there are
+ * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7.  The requested write
+ * pointer is at 4; thus, requests have been made to the IPP for buffers which
+ * will be placed in slots 6 and 5 when they arrive.  Finally, the remaining
+ * slots are invalid (do not contain a buffer).
+ */
+typedef struct
+{
+  /** Ordinal number of the next buffer to be written: 0 for the first slot in
+   *  the queue, 1 for the second slot in the queue, etc. */
+  volatile uint32_t __buffer_write;
+
+  /** Ordinal number of the last buffer (i.e., when any pointer is decremented
+   *  below zero, it is reloaded with this value). */
+  uint32_t __last_buffer;
+}
+__netio_buffer_queue_t;
+
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+typedef struct __netio_queue_impl_t
+{
+  /** The queue of packets waiting to be received. */
+  __netio_packet_queue_t __packet_receive_queue;
+  /** The intr bit mask that IDs this device. */
+  unsigned int __intr_id;
+  /** Offset to queues of empty buffers, one per size. */
+  uint32_t __buffer_queue[NETIO_NUM_SIZES];
+  /** The address of the first EPP tile, or -1 if no EPP. */
+  /* ISSUE: Actually this is always "0" or "~0". */
+  uint32_t __epp_location;
+  /** The queue ID that this queue represents. */
+  unsigned int __queue_id;
+  /** Number of acknowledgements received. */
+  volatile uint32_t __acks_received;
+  /** Last completion number received for packet_sendv. */
+  volatile uint32_t __last_completion_rcv;
+  /** Number of packets allowed to be outstanding. */
+  uint32_t __max_outstanding;
+  /** First VA available for packets. */
+  void* __va_0;
+  /** First VA in second range available for packets. */
+  void* __va_1;
+  /** Padding to align the "__packets" field to the size of a netio_pkt_t. */
+  uint32_t __padding[3];
+  /** The packets themselves. */
+  netio_pkt_t __packets[0];
+}
+netio_queue_impl_t;
+
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+typedef struct __netio_queue_user_impl_t
+{
+  /** The next incoming packet to be read. */
+  uint32_t __packet_receive_read;
+  /** The next empty buffers to be read, one index per size. */
+  uint8_t __buffer_read[NETIO_NUM_SIZES];
+  /** Where the empty buffer we next request from the IPP will go, one index
+   * per size. */
+  uint8_t __buffer_requested_write[NETIO_NUM_SIZES];
+  /** PCIe interface flag. */
+  uint8_t __pcie;
+  /** Number of packets left to be received before we send a credit update. */
+  uint32_t __receive_credit_remaining;
+  /** Value placed in __receive_credit_remaining when it reaches zero. */
+  uint32_t __receive_credit_interval;
+  /** First fast I/O routine index. */
+  uint32_t __fastio_index;
+  /** Number of acknowledgements expected. */
+  uint32_t __acks_outstanding;
+  /** Last completion number requested. */
+  uint32_t __last_completion_req;
+  /** File descriptor for driver. */
+  int __fd;
+}
+netio_queue_user_impl_t;
+
+
+#define NETIO_GROUP_CHUNK_SIZE   64   /**< Max # groups in one IPP request */
+#define NETIO_BUCKET_CHUNK_SIZE  64   /**< Max # buckets in one IPP request */
+
+
+/** Internal structure used to convey packet send information to the
+ * hypervisor.  FIXME: Actually, it's not used for that anymore, but
+ * netio_packet_send() still uses it internally.
+ */
+typedef struct
+{
+  uint16_t flags;              /**< Packet flags (__NETIO_SEND_FLG_xxx) */
+  uint16_t transfer_size;      /**< Size of packet */
+  uint32_t va;                 /**< VA of start of packet */
+  __netio_pkt_handle_t handle; /**< Packet handle */
+  uint32_t csum0;              /**< First checksum word */
+  uint32_t csum1;              /**< Second checksum word */
+}
+__netio_send_cmd_t;
+
+
+/** Flags used in two contexts:
+ *  - As the "flags" member in the __netio_send_cmd_t, above; used only
+ *    for netio_pkt_send_{prepare,commit}.
+ *  - As part of the flags passed to the various send packet fast I/O calls.
+ */
+
+/** Need acknowledgement on this packet.  Note that some code in the
+ *  normal send_pkt fast I/O handler assumes that this is equal to 1. */
+#define __NETIO_SEND_FLG_ACK    0x1
+
+/** Do checksum on this packet.  (Only used with the __netio_send_cmd_t;
+ *  normal packet sends use a special fast I/O index to denote checksumming,
+ *  and multi-segment sends test the checksum descriptor.) */
+#define __NETIO_SEND_FLG_CSUM   0x2
+
+/** Get a completion on this packet.  Only used with multi-segment sends.  */
+#define __NETIO_SEND_FLG_COMPLETION 0x4
+
+/** Position of the number-of-extra-segments value in the flags word.
+    Only used with multi-segment sends. */
+#define __NETIO_SEND_FLG_XSEG_SHIFT 3
+
+/** Width of the number-of-extra-segments value in the flags word. */
+#define __NETIO_SEND_FLG_XSEG_WIDTH 2
+
+#endif /* __DRV_XGBE_IMPL_H__ */
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h
new file mode 100644
index 0000000..146e47d
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_intf.h
@@ -0,0 +1,615 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_xgbe_intf.h
+ * Interface to the hypervisor XGBE driver.
+ */
+
+#ifndef __DRV_XGBE_INTF_H__
+#define __DRV_XGBE_INTF_H__
+
+/**
+ * An object for forwarding VAs and PAs to the hypervisor.
+ * @ingroup types
+ *
+ * This allows the supervisor to specify a number of areas of memory to
+ * store packet buffers.
+ */
+typedef struct
+{
+  /** The physical address of the memory. */
+  HV_PhysAddr pa;
+  /** Page table entry for the memory.  This is only used to derive the
+   *  memory's caching mode; the PA bits are ignored. */
+  HV_PTE pte;
+  /** The virtual address of the memory. */
+  HV_VirtAddr va;
+  /** Size (in bytes) of the memory area. */
+  int size;
+
+}
+netio_ipp_address_t;
+
+/** The various pread/pwrite offsets into the hypervisor-level driver.
+ * @ingroup types
+ */
+typedef enum
+{
+  /** Inform the Linux driver of the address of the NetIO arena memory.
+   *  This offset is actually only used to convey information from netio
+   *  to the Linux driver; it never makes it from there to the hypervisor.
+   *  Write-only; takes a uint32_t specifying the VA address. */
+  NETIO_FIXED_ADDR               = 0x5000000000000000ULL,
+
+  /** Inform the Linux driver of the size of the NetIO arena memory.
+   *  This offset is actually only used to convey information from netio
+   *  to the Linux driver; it never makes it from there to the hypervisor.
+   *  Write-only; takes a uint32_t specifying the VA size. */
+  NETIO_FIXED_SIZE               = 0x5100000000000000ULL,
+
+  /** Register current tile with IPP.  Write then read: write, takes a
+   *  netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */
+  NETIO_IPP_INPUT_REGISTER_OFF   = 0x6000000000000000ULL,
+
+  /** Unregister current tile from IPP.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL,
+
+  /** Start packets flowing.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_INIT_OFF       = 0x6200000000000000ULL,
+
+  /** Stop packets flowing.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_UNINIT_OFF     = 0x6300000000000000ULL,
+
+  /** Configure group (typically we group on VLAN).  Write-only: takes an
+   *  array of netio_group_t's, low 24 bits of the offset is the base group
+   *  number times the size of a netio_group_t. */
+  NETIO_IPP_INPUT_GROUP_CFG_OFF  = 0x6400000000000000ULL,
+
+  /** Configure bucket.  Write-only: takes an array of netio_bucket_t's, low
+   *  24 bits of the offset is the base bucket number times the size of a
+   *  netio_bucket_t. */
+  NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL,
+
+  /** Get/set a parameter.  Read or write: read or write data is the parameter
+   *  value, low 32 bits of the offset is a __netio_getset_offset_t. */
+  NETIO_IPP_PARAM_OFF            = 0x6600000000000000ULL,
+
+  /** Get fast I/O index.  Read-only; returns a 4-byte base index value. */
+  NETIO_IPP_GET_FASTIO_OFF       = 0x6700000000000000ULL,
+
+  /** Configure hijack IP address.  Packets with this IPv4 dest address
+   *  go to bucket NETIO_NUM_BUCKETS - 1.  Write-only: takes an IP address
+   *  in some standard form.  FIXME: Define the form! */
+  NETIO_IPP_INPUT_HIJACK_CFG_OFF  = 0x6800000000000000ULL,
+
+  /**
+   * Offsets beyond this point are reserved for the supervisor (although that
+   * enforcement must be done by the supervisor driver itself).
+   */
+  NETIO_IPP_USER_MAX_OFF         = 0x6FFFFFFFFFFFFFFFULL,
+
+  /** Register I/O memory.  Write-only, takes a netio_ipp_address_t. */
+  NETIO_IPP_IOMEM_REGISTER_OFF   = 0x7000000000000000ULL,
+
+  /** Unregister I/O memory.  Write-only, takes a netio_ipp_address_t. */
+  NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL,
+
+  /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux
+   * userspace code due to limitations in the pread/pwrite syscalls. */
+
+  /** Drain LIPP buffers. */
+  NETIO_IPP_DRAIN_OFF              = 0xFA00000000000000ULL,
+
+  /** Supply a netio_ipp_address_t to be used as shared memory for the
+   *  LEPP command queue. */
+  NETIO_EPP_SHM_OFF              = 0xFB00000000000000ULL,
+
+  /* 0xFC... is currently unused. */
+
+  /** Stop IPP/EPP tiles.  Write-only, takes a dummy argument.  */
+  NETIO_IPP_STOP_SHIM_OFF        = 0xFD00000000000000ULL,
+
+  /** Start IPP/EPP tiles.  Write-only, takes a dummy argument.  */
+  NETIO_IPP_START_SHIM_OFF       = 0xFE00000000000000ULL,
+
+  /** Supply packet arena.  Write-only, takes an array of
+    * netio_ipp_address_t values. */
+  NETIO_IPP_ADDRESS_OFF          = 0xFF00000000000000ULL,
+} netio_hv_offset_t;
+
+/** Extract the base offset from an offset */
+#define NETIO_BASE_OFFSET(off)    ((off) & 0xFF00000000000000ULL)
+/** Extract the local offset from an offset */
+#define NETIO_LOCAL_OFFSET(off)   ((off) & 0x00FFFFFFFFFFFFFFULL)
+
+
+/**
+ * Get/set offset.
+ */
+typedef union
+{
+  struct
+  {
+    uint64_t addr:48;        /**< Class-specific address */
+    unsigned int class:8;    /**< Class (e.g., NETIO_PARAM) */
+    unsigned int opcode:8;   /**< High 8 bits of NETIO_IPP_PARAM_OFF */
+  }
+  bits;                      /**< Bitfields */
+  uint64_t word;             /**< Aggregated value to use as the offset */
+}
+__netio_getset_offset_t;
+
+/**
+ * Fast I/O index offsets (must be contiguous).
+ */
+typedef enum
+{
+  NETIO_FASTIO_ALLOCATE         = 0, /**< Get empty packet buffer */
+  NETIO_FASTIO_FREE_BUFFER      = 1, /**< Give buffer back to IPP */
+  NETIO_FASTIO_RETURN_CREDITS   = 2, /**< Give credits to IPP */
+  NETIO_FASTIO_SEND_PKT_NOCK    = 3, /**< Send a packet, no checksum */
+  NETIO_FASTIO_SEND_PKT_CK      = 4, /**< Send a packet, with checksum */
+  NETIO_FASTIO_SEND_PKT_VEC     = 5, /**< Send a vector of packets */
+  NETIO_FASTIO_SENDV_PKT        = 6, /**< Sendv one packet */
+  NETIO_FASTIO_NUM_INDEX        = 7, /**< Total number of fast I/O indices */
+} netio_fastio_index_t;
+
+/** 3-word return type for Fast I/O call. */
+typedef struct
+{
+  int err;            /**< Error code. */
+  uint32_t val0;      /**< Value.  Meaning depends upon the specific call. */
+  uint32_t val1;      /**< Value.  Meaning depends upon the specific call. */
+} netio_fastio_rv3_t;
+
+/** 0-argument fast I/O call */
+int __netio_fastio0(uint32_t fastio_index);
+/** 1-argument fast I/O call */
+int __netio_fastio1(uint32_t fastio_index, uint32_t arg0);
+/** 3-argument fast I/O call, 2-word return value */
+netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0,
+                                       uint32_t arg1, uint32_t arg2);
+/** 4-argument fast I/O call */
+int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3);
+/** 6-argument fast I/O call */
+int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5);
+/** 9-argument fast I/O call */
+int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5,
+                    uint32_t arg6, uint32_t arg7, uint32_t arg8);
+
+/** Allocate an empty packet.
+ * @param fastio_index Fast I/O index.
+ * @param size Size of the packet to allocate.
+ */
+#define __netio_fastio_allocate(fastio_index, size) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size)
+
+/** Free a buffer.
+ * @param fastio_index Fast I/O index.
+ * @param handle Handle for the packet to free.
+ */
+#define __netio_fastio_free_buffer(fastio_index, handle) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle)
+
+/** Increment our receive credits.
+ * @param fastio_index Fast I/O index.
+ * @param credits Number of credits to add.
+ */
+#define __netio_fastio_return_credits(fastio_index, credits) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits)
+
+/** Send packet, no checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ */
+#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \
+  __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \
+                  size, va, handle)
+
+/** Send packet, calculate checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ * @param csum0 Shim checksum header.
+ * @param csum1 Checksum seed.
+ */
+#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \
+                                   csum0, csum1) \
+  __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \
+                  size, va, handle, csum0, csum1)
+
+
+/** Format for the "csum0" argument to the __netio_fastio_send routines
+ * and LEPP.  Note that this is currently exactly identical to the
+ * ShimProtocolOffloadHeader.
+ */
+typedef union
+{
+  struct
+  {
+    unsigned int start_byte:7;       /**< The first byte to be checksummed */
+    unsigned int count:14;           /**< Number of bytes to be checksummed. */
+    unsigned int destination_byte:7; /**< The byte to write the checksum to. */
+    unsigned int reserved:4;         /**< Reserved. */
+  } bits;                            /**< Decomposed method of access. */
+  unsigned int word;                 /**< To send out the IDN. */
+} __netio_checksum_header_t;
+
+
+/** Sendv packet with 1 or 2 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ *        1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment, if 2 segments.
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ *        segment, if 2 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \
+                                     va_F, va_L, len_F_L) \
+  __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L)
+
+/** Send packet on PCIe interface.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe.
+ * @param va_F Virtual address of the packet buffer.
+ * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0.
+ * @param len_F_L Length of the packet buffer in low 16 bits.
+ */
+#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \
+                                     va_F, va_L, len_F_L) \
+  __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L)
+
+/** Sendv packet with 3 or 4 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ *        1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment (third segment if 3 segments,
+ *        fourth segment if 4 segments).
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ *        segment in high 16 bits.
+ * @param va_M0 Virtual address of "middle 0" segment; this segment is sent
+ *        second when there are three segments, and third if there are four.
+ * @param va_M1 Virtual address of "middle 1" segment; this segment is sent
+ *        second when there are four segments.
+ * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle
+ *        1 segment, if 4 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \
+                                     va_L, len_F_L, va_M0, va_M1, len_M0_M1) \
+  __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1)
+
+/** Send vector of packets.
+ * @param fastio_index Fast I/O index.
+ * @param seqno Number of packets transmitted so far on this interface;
+ *        used to decide which packets should be acknowledged.
+ * @param nentries Number of entries in vector.
+ * @param va Virtual address of start of vector entry array.
+ * @return 3-word netio_fastio_rv3_t structure.  The structure's err member
+ *         is an error code, or zero if no error.  The val0 member is the
+ *         updated value of seqno; it has been incremented by 1 for each
+ *         packet sent.  That increment may be less than nentries if an
+ *         error occured, or if some of the entries in the vector contain
+ *         handles equal to NETIO_PKT_HANDLE_NONE.  The val1 member is the
+ *         updated value of nentries; it has been decremented by 1 for each
+ *         vector entry processed.  Again, that decrement may be less than
+ *         nentries (leaving the returned value positive) if an error
+ *         occurred.
+ */
+#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \
+  __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \
+                      nentries, va)
+
+
+/** An egress DMA command for LEPP. */
+typedef struct
+{
+  /** Is this a TSO transfer?
+   *
+   * NOTE: This field is always 0, to distinguish it from
+   * lepp_tso_cmd_t.  It must come first!
+   */
+  uint8_t tso               : 1;
+
+  /** Unused padding bits. */
+  uint8_t _unused           : 3;
+
+  /** Should this packet be sent directly from caches instead of DRAM,
+   * using hash-for-home to locate the packet data?
+   */
+  uint8_t hash_for_home     : 1;
+
+  /** Should we compute a checksum? */
+  uint8_t compute_checksum  : 1;
+
+  /** Is this the final buffer for this packet?
+   *
+   * A single packet can be split over several input buffers (a "gather"
+   * operation).  This flag indicates that this is the last buffer
+   * in a packet.
+   */
+  uint8_t end_of_packet     : 1;
+
+  /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */
+  uint8_t send_completion   : 1;
+
+  /** High bits of Client Physical Address of the start of the buffer
+   *  to be egressed.
+   *
+   *  NOTE: Only 6 bits are actually needed here, as CPAs are
+   *  currently 38 bits.  So two bits could be scavenged from this.
+   */
+  uint8_t cpa_hi;
+
+  /** The number of bytes to be egressed. */
+  uint16_t length;
+
+  /** Low 32 bits of Client Physical Address of the start of the buffer
+   *  to be egressed.
+   */
+  uint32_t cpa_lo;
+
+  /** Checksum information (only used if 'compute_checksum'). */
+  __netio_checksum_header_t checksum_data;
+
+} lepp_cmd_t;
+
+
+/** A chunk of physical memory for a TSO egress. */
+typedef struct
+{
+  /** The low bits of the CPA. */
+  uint32_t cpa_lo;
+  /** The high bits of the CPA. */
+  uint16_t cpa_hi		: 15;
+  /** Should this packet be sent directly from caches instead of DRAM,
+   *  using hash-for-home to locate the packet data?
+   */
+  uint16_t hash_for_home	: 1;
+  /** The length in bytes. */
+  uint16_t length;
+} lepp_frag_t;
+
+
+/** An LEPP command that handles TSO. */
+typedef struct
+{
+  /** Is this a TSO transfer?
+   *
+   *  NOTE: This field is always 1, to distinguish it from
+   *  lepp_cmd_t.  It must come first!
+   */
+  uint8_t tso             : 1;
+
+  /** Unused padding bits. */
+  uint8_t _unused         : 7;
+
+  /** Size of the header[] array in bytes.  It must be in the range
+   *  [40, 127], which are the smallest header for a TCP packet over
+   *  Ethernet and the maximum possible prepend size supported by
+   *  hardware, respectively.  Note that the array storage must be
+   *  padded out to a multiple of four bytes so that the following
+   *  LEPP command is aligned properly.
+   */
+  uint8_t header_size;
+
+  /** Byte offset of the IP header in header[]. */
+  uint8_t ip_offset;
+
+  /** Byte offset of the TCP header in header[]. */
+  uint8_t tcp_offset;
+
+  /** The number of bytes to use for the payload of each packet,
+   *  except of course the last one, which may not have enough bytes.
+   *  This means that each Ethernet packet except the last will have a
+   *  size of header_size + payload_size.
+   */
+  uint16_t payload_size;
+
+  /** The length of the 'frags' array that follows this struct. */
+  uint16_t num_frags;
+
+  /** The actual frags. */
+  lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */];
+
+  /*
+   * The packet header template logically follows frags[],
+   * but you can't declare that in C.
+   *
+   * uint32_t header[header_size_in_words_rounded_up];
+   */
+
+} lepp_tso_cmd_t;
+
+
+/** An LEPP completion ring entry. */
+typedef void* lepp_comp_t;
+
+
+/** Maximum number of frags for one TSO command.  This is adapted from
+ *  linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
+ *  our page size of exactly 65536.  We add one for a "body" fragment.
+ */
+#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
+
+/** Total number of bytes needed for an lepp_tso_cmd_t. */
+#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
+  (sizeof(lepp_tso_cmd_t) + \
+   (num_frags) * sizeof(lepp_frag_t) + \
+   (((header_size) + 3) & -4))
+
+/** The size of the lepp "cmd" queue. */
+#define LEPP_CMD_QUEUE_BYTES \
+ (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \
+  (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t))
+
+/** The largest possible command that can go in lepp_queue_t::cmds[]. */
+#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128)
+
+/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive).
+ */
+#define LEPP_CMD_LIMIT \
+  (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE)
+
+/** The maximum number of completions in an LEPP queue. */
+#define LEPP_COMP_QUEUE_SIZE \
+  ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t))
+
+/** Increment an index modulo the queue size. */
+#define LEPP_QINC(var) \
+  (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1))
+
+/** A queue used to convey egress commands from the client to LEPP. */
+typedef struct
+{
+  /** Index of first completion not yet processed by user code.
+   *  If this is equal to comp_busy, there are no such completions.
+   *
+   *  NOTE: This is only read/written by the user.
+   */
+  unsigned int comp_head;
+
+  /** Index of first completion record not yet completed.
+   *  If this is equal to comp_tail, there are no such completions.
+   *  This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever
+   *  a command with the 'completion' bit set is finished.
+   *
+   *  NOTE: This is only written by LEPP, only read by the user.
+   */
+  volatile unsigned int comp_busy;
+
+  /** Index of the first empty slot in the completion ring.
+   *  Entries from this up to but not including comp_head (in ring order)
+   *  can be filled in with completion data.
+   *
+   *  NOTE: This is only read/written by the user.
+   */
+  unsigned int comp_tail;
+
+  /** Byte index of first command enqueued for LEPP but not yet processed.
+   *
+   *  This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+   *
+   *  NOTE: LEPP advances this counter as soon as it no longer needs
+   *  the cmds[] storage for this entry, but the transfer is not actually
+   *  complete (i.e. the buffer pointed to by the command is no longer
+   *  needed) until comp_busy advances.
+   *
+   *  If this is equal to cmd_tail, the ring is empty.
+   *
+   *  NOTE: This is only written by LEPP, only read by the user.
+   */
+  volatile unsigned int cmd_head;
+
+  /** Byte index of first empty slot in the command ring.  This field can
+   *  be incremented up to but not equal to cmd_head (because that would
+   *  mean the ring is empty).
+   *
+   *  This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+   *
+   *  NOTE: This is read/written by the user, only read by LEPP.
+   */
+  volatile unsigned int cmd_tail;
+
+  /** A ring of variable-sized egress DMA commands.
+   *
+   *  NOTE: Only written by the user, only read by LEPP.
+   */
+  char cmds[LEPP_CMD_QUEUE_BYTES]
+    __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+
+  /** A ring of user completion data.
+   *  NOTE: Only read/written by the user.
+   */
+  lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE]
+    __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+} lepp_queue_t;
+
+
+/** An internal helper function for determining the number of entries
+ *  available in a ring buffer, given that there is one sentinel.
+ */
+static inline unsigned int
+_lepp_num_free_slots(unsigned int head, unsigned int tail)
+{
+  /*
+   * One entry is reserved for use as a sentinel, to distinguish
+   * "empty" from "full".  So we compute
+   * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation.
+   */
+  return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0);
+}
+
+
+/** Returns how many new comp entries can be enqueued. */
+static inline unsigned int
+lepp_num_free_comp_slots(const lepp_queue_t* q)
+{
+  return _lepp_num_free_slots(q->comp_head, q->comp_tail);
+}
+
+static inline int
+lepp_qsub(int v1, int v2)
+{
+  int delta = v1 - v2;
+  return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE);
+}
+
+
+/** FIXME: Check this from linux, via a new "pwrite()" call. */
+#define LIPP_VERSION 1
+
+
+/** We use exactly two bytes of alignment padding. */
+#define LIPP_PACKET_PADDING 2
+
+/** The minimum size of a "small" buffer (including the padding). */
+#define LIPP_SMALL_PACKET_SIZE 128
+
+/*
+ * NOTE: The following two values should total to less than around
+ * 13582, to keep the total size used for "lipp_state_t" below 64K.
+ */
+
+/** The maximum number of "small" buffers.
+ *  This is enough for 53 network cpus with 128 credits.  Note that
+ *  if these are exhausted, we will fall back to using large buffers.
+ */
+#define LIPP_SMALL_BUFFERS 6785
+
+/** The maximum number of "large" buffers.
+ *  This is enough for 53 network cpus with 128 credits.
+ */
+#define LIPP_LARGE_BUFFERS 6785
+
+#endif /* __DRV_XGBE_INTF_H__ */
diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h
new file mode 100644
index 0000000..e1591bf
--- /dev/null
+++ b/arch/tile/include/hv/netio_errors.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * Error codes returned from NetIO routines.
+ */
+
+#ifndef __NETIO_ERRORS_H__
+#define __NETIO_ERRORS_H__
+
+/**
+ * @addtogroup error
+ *
+ * @brief The error codes returned by NetIO functions.
+ *
+ * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and
+ * a negative value if an error occurs.
+ *
+ * In cases where a NetIO function failed due to a error reported by
+ * system libraries, the error code will be the negation of the
+ * system errno at the time of failure.  The @ref netio_strerror()
+ * function will deliver error strings for both NetIO and system error
+ * codes.
+ *
+ * @{
+ */
+
+/** The set of all NetIO errors. */
+typedef enum
+{
+  /** Operation successfully completed. */
+  NETIO_NO_ERROR        = 0,
+
+  /** A packet was successfully retrieved from an input queue. */
+  NETIO_PKT             = 0,
+
+  /** Largest NetIO error number. */
+  NETIO_ERR_MAX         = -701,
+
+  /** The tile is not registered with the IPP. */
+  NETIO_NOT_REGISTERED  = -701,
+
+  /** No packet was available to retrieve from the input queue. */
+  NETIO_NOPKT           = -702,
+
+  /** The requested function is not implemented. */
+  NETIO_NOT_IMPLEMENTED = -703,
+
+  /** On a registration operation, the target queue already has the maximum
+   *  number of tiles registered for it, and no more may be added.  On a
+   *  packet send operation, the output queue is full and nothing more can
+   *  be queued until some of the queued packets are actually transmitted. */
+  NETIO_QUEUE_FULL      = -704,
+
+  /** The calling process or thread is not bound to exactly one CPU. */
+  NETIO_BAD_AFFINITY    = -705,
+
+  /** Cannot allocate memory on requested controllers. */
+  NETIO_CANNOT_HOME     = -706,
+
+  /** On a registration operation, the IPP specified is not configured
+   *  to support the options requested; for instance, the application
+   *  wants a specific type of tagged headers which the configured IPP
+   *  doesn't support.  Or, the supplied configuration information is
+   *  not self-consistent, or is out of range; for instance, specifying
+   *  both NETIO_RECV and NETIO_NO_RECV, or asking for more than
+   *  NETIO_MAX_SEND_BUFFERS to be preallocated.  On a VLAN or bucket
+   *  configure operation, the number of items, or the base item, was
+   *  out of range.
+   */
+  NETIO_BAD_CONFIG      = -707,
+
+  /** Too many tiles have registered to transmit packets. */
+  NETIO_TOOMANY_XMIT    = -708,
+
+  /** Packet transmission was attempted on a queue which was registered
+      with transmit disabled. */
+  NETIO_UNREG_XMIT      = -709,
+
+  /** This tile is already registered with the IPP. */
+  NETIO_ALREADY_REGISTERED = -710,
+
+  /** The Ethernet link is down. The application should try again later. */
+  NETIO_LINK_DOWN       = -711,
+
+  /** An invalid memory buffer has been specified.  This may be an unmapped
+   * virtual address, or one which does not meet alignment requirements.
+   * For netio_input_register(), this error may be returned when multiple
+   * processes specify different memory regions to be used for NetIO
+   * buffers.  That can happen if these processes specify explicit memory
+   * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init()
+   * has not been called by a common ancestor of the processes.
+   */
+  NETIO_FAULT           = -712,
+
+  /** Cannot combine user-managed shared memory and cache coherence. */
+  NETIO_BAD_CACHE_CONFIG = -713,
+
+  /** Smallest NetIO error number. */
+  NETIO_ERR_MIN         = -713,
+
+#ifndef __DOXYGEN__
+  /** Used internally to mean that no response is needed; never returned to
+   *  an application. */
+  NETIO_NO_RESPONSE     = 1
+#endif
+} netio_error_t;
+
+/** @} */
+
+#endif /* __NETIO_ERRORS_H__ */
diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h
new file mode 100644
index 0000000..8d20972
--- /dev/null
+++ b/arch/tile/include/hv/netio_intf.h
@@ -0,0 +1,2975 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * NetIO interface structures and macros.
+ */
+
+#ifndef __NETIO_INTF_H__
+#define __NETIO_INTF_H__
+
+#include <hv/netio_errors.h>
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__)
+#include <assert.h>
+#define netio_assert assert  /**< Enable assertions from macros */
+#else
+#define netio_assert(...) ((void)(0))  /**< Disable assertions from macros */
+#endif
+
+/*
+ * If none of these symbols are defined, we're building libnetio in an
+ * environment where we have pthreads, so we'll enable locking.
+ */
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \
+    !defined(__NEWLIB__)
+#define _NETIO_PTHREAD       /**< Include a mutex in netio_queue_t below */
+
+/*
+ * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on
+ * per-packet NetIO operations.  We still do pthread locking on things
+ * like netio_input_register, though.  This is used for building
+ * libnetio_unlocked.
+ */
+#ifndef NETIO_UNLOCKED
+
+/* Avoid PLT overhead by using our own inlined per-cpu lock. */
+#include <sched.h>
+typedef int _netio_percpu_mutex_t;
+
+static __inline int
+_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock)
+{
+  *lock = 0;
+  return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock)
+{
+  while (__builtin_expect(__insn_tns(lock), 0))
+    sched_yield();
+  return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock)
+{
+  *lock = 0;
+  return 0;
+}
+
+#else /* NETIO_UNLOCKED */
+
+/* Don't do any locking for per-packet NetIO operations. */
+typedef int _netio_percpu_mutex_t;
+#define _netio_percpu_mutex_init(L)
+#define _netio_percpu_mutex_lock(L)
+#define _netio_percpu_mutex_unlock(L)
+
+#endif /* NETIO_UNLOCKED */
+#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */
+
+/** How many tiles can register for a given queue.
+ *  @ingroup setup */
+#define NETIO_MAX_TILES_PER_QUEUE  64
+
+
+/** Largest permissible queue identifier.
+ *  @ingroup setup  */
+#define NETIO_MAX_QUEUE_ID        255
+
+
+#ifndef __DOXYGEN__
+
+/* Metadata packet checksum/ethertype flags. */
+
+/** The L4 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L4_CSUM_SHIFT           0
+#define _NETIO_PKT_NO_L4_CSUM_RMASK           1
+#define _NETIO_PKT_NO_L4_CSUM_MASK \
+         (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT)
+
+/** The L3 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L3_CSUM_SHIFT           1
+#define _NETIO_PKT_NO_L3_CSUM_RMASK           1
+#define _NETIO_PKT_NO_L3_CSUM_MASK \
+         (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT)
+
+/** The L3 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L3_CSUM_SHIFT          2
+#define _NETIO_PKT_BAD_L3_CSUM_RMASK          1
+#define _NETIO_PKT_BAD_L3_CSUM_MASK \
+         (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT)
+
+/** The Ethernet packet type is unrecognized. */
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT    3
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK    1
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \
+         (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \
+          _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT)
+
+/* Metadata packet type flags. */
+
+/** Where the packet type bits are; this field is the index into
+ *  _netio_pkt_info. */
+#define _NETIO_PKT_TYPE_SHIFT        4
+#define _NETIO_PKT_TYPE_RMASK        0x3F
+
+/** How many VLAN tags the packet has, and, if we have two, which one we
+ *  actually grouped on.  A VLAN within a proprietary (Marvell or Broadcom)
+ *  tag is counted here. */
+#define _NETIO_PKT_VLAN_SHIFT        4
+#define _NETIO_PKT_VLAN_RMASK        0x3
+#define _NETIO_PKT_VLAN_MASK \
+         (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT)
+#define _NETIO_PKT_VLAN_NONE         0   /* No VLAN tag. */
+#define _NETIO_PKT_VLAN_ONE          1   /* One VLAN tag. */
+#define _NETIO_PKT_VLAN_TWO_OUTER    2   /* Two VLAN tags, outer one used. */
+#define _NETIO_PKT_VLAN_TWO_INNER    3   /* Two VLAN tags, inner one used. */
+
+/** Which proprietary tags the packet has. */
+#define _NETIO_PKT_TAG_SHIFT         6
+#define _NETIO_PKT_TAG_RMASK         0x3
+#define _NETIO_PKT_TAG_MASK \
+          (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT)
+#define _NETIO_PKT_TAG_NONE          0   /* No proprietary tags. */
+#define _NETIO_PKT_TAG_MRVL          1   /* Marvell HyperG.Stack tags. */
+#define _NETIO_PKT_TAG_MRVL_EXT      2   /* HyperG.Stack extended tags. */
+#define _NETIO_PKT_TAG_BRCM          3   /* Broadcom HiGig tags. */
+
+/** Whether a packet has an LLC + SNAP header. */
+#define _NETIO_PKT_SNAP_SHIFT        8
+#define _NETIO_PKT_SNAP_RMASK        0x1
+#define _NETIO_PKT_SNAP_MASK \
+          (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT)
+
+/* NOTE: Bits 9 and 10 are unused. */
+
+/** Length of any custom data before the L2 header, in words. */
+#define _NETIO_PKT_CUSTOM_LEN_SHIFT  11
+#define _NETIO_PKT_CUSTOM_LEN_RMASK  0x1F
+#define _NETIO_PKT_CUSTOM_LEN_MASK \
+          (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT)
+
+/** The L4 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16
+#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1
+#define _NETIO_PKT_BAD_L4_CSUM_MASK \
+          (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT)
+
+/** Length of the L2 header, in words. */
+#define _NETIO_PKT_L2_LEN_SHIFT  17
+#define _NETIO_PKT_L2_LEN_RMASK  0x1F
+#define _NETIO_PKT_L2_LEN_MASK \
+          (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT)
+
+
+/* Flags in minimal packet metadata. */
+
+/** We need an eDMA checksum on this packet. */
+#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT            0
+#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK            1
+#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \
+         (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT)
+
+/* Data within the packet information table. */
+
+/* Note that, for efficiency, code which uses these fields assumes that none
+ * of the shift values below are zero.  See uses below for an explanation. */
+
+/** Offset within the L2 header of the innermost ethertype (in halfwords). */
+#define _NETIO_PKT_INFO_ETYPE_SHIFT       6
+#define _NETIO_PKT_INFO_ETYPE_RMASK    0x1F
+
+/** Offset within the L2 header of the VLAN tag (in halfwords). */
+#define _NETIO_PKT_INFO_VLAN_SHIFT       11
+#define _NETIO_PKT_INFO_VLAN_RMASK     0x1F
+
+#endif
+
+
+/** The size of a memory buffer representing a small packet.
+ *  @ingroup egress */
+#define SMALL_PACKET_SIZE 256
+
+/** The size of a memory buffer representing a large packet.
+ *  @ingroup egress */
+#define LARGE_PACKET_SIZE 2048
+
+/** The size of a memory buffer representing a jumbo packet.
+ *  @ingroup egress */
+#define JUMBO_PACKET_SIZE (12 * 1024)
+
+
+/* Common ethertypes.
+ * @ingroup ingress */
+/** @{ */
+/** The ethertype of IPv4. */
+#define ETHERTYPE_IPv4 (0x0800)
+/** The ethertype of ARP. */
+#define ETHERTYPE_ARP (0x0806)
+/** The ethertype of VLANs. */
+#define ETHERTYPE_VLAN (0x8100)
+/** The ethertype of a Q-in-Q header. */
+#define ETHERTYPE_Q_IN_Q (0x9100)
+/** The ethertype of IPv6. */
+#define ETHERTYPE_IPv6 (0x86DD)
+/** The ethertype of MPLS. */
+#define ETHERTYPE_MPLS (0x8847)
+/** @} */
+
+
+/** The possible return values of NETIO_PKT_STATUS.
+ * @ingroup ingress
+ */
+typedef enum
+{
+  /** No problems were detected with this packet. */
+  NETIO_PKT_STATUS_OK,
+  /** The packet is undersized; this is expected behavior if the packet's
+    * ethertype is unrecognized, but otherwise the packet is likely corrupt. */
+  NETIO_PKT_STATUS_UNDERSIZE,
+  /** The packet is oversized and some trailing bytes have been discarded.
+      This is expected behavior for short packets, since it's impossible to
+      precisely determine the amount of padding which may have been added to
+      them to make them meet the minimum Ethernet packet size. */
+  NETIO_PKT_STATUS_OVERSIZE,
+  /** The packet was judged to be corrupt by hardware (for instance, it had
+      a bad CRC, or part of it was discarded due to lack of buffer space in
+      the I/O shim) and should be discarded. */
+  NETIO_PKT_STATUS_BAD
+} netio_pkt_status_t;
+
+
+/** Log2 of how many buckets we have. */
+#define NETIO_LOG2_NUM_BUCKETS (10)
+
+/** How many buckets we have.
+ * @ingroup ingress */
+#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS)
+
+
+/**
+ * @brief A group-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given group.
+ */
+typedef union {
+  /** The header broken down into bits. */
+  struct {
+    /** Whether we should balance on L4, if available */
+    unsigned int __balance_on_l4:1;
+    /** Whether we should balance on L3, if available */
+    unsigned int __balance_on_l3:1;
+    /** Whether we should balance on L2, if available */
+    unsigned int __balance_on_l2:1;
+    /** Reserved for future use */
+    unsigned int __reserved:1;
+    /** The base bucket to use to send traffic */
+    unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS;
+    /** The mask to apply to the balancing value. This must be one less
+     * than a power of two, e.g. 0x3 or 0xFF.
+     */
+    unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS;
+    /** Pad to 32 bits */
+    unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS);
+  } bits;
+  /** To send out the IDN. */
+  unsigned int word;
+}
+netio_group_t;
+
+
+/**
+ * @brief A VLAN-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given VLAN.
+ */
+typedef netio_group_t netio_vlan_t;
+
+
+/**
+ * A bucket-to-queue mapping.
+ * @ingroup setup
+ */
+typedef unsigned char netio_bucket_t;
+
+
+/**
+ * A packet size can always fit in a netio_size_t.
+ * @ingroup setup
+ */
+typedef unsigned int netio_size_t;
+
+
+/**
+ * @brief Ethernet standard (ingress) packet metadata.
+ *
+ * @ingroup ingress
+ *
+ * This is additional data associated with each packet.
+ * This structure is opaque and accessed through the @ref ingress.
+ *
+ * Also, the buffer population operation currently assumes that standard
+ * metadata is at least as large as minimal metadata, and will need to be
+ * modified if that is no longer the case.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[24];
+#else
+  /** The overall ordinal of the packet */
+  unsigned int __packet_ordinal;
+  /** The ordinal of the packet within the group */
+  unsigned int __group_ordinal;
+  /** The best flow hash IPP could compute. */
+  unsigned int __flow_hash;
+  /** Flags pertaining to checksum calculation, packet type, etc. */
+  unsigned int __flags;
+  /** The first word of "user data". */
+  unsigned int __user_data_0;
+  /** The second word of "user data". */
+  unsigned int __user_data_1;
+#endif
+}
+netio_pkt_metadata_t;
+
+
+/** To ensure that the L3 header is aligned mod 4, the L2 header should be
+ * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes
+ * long.  The standard way to do this is to simply add 2 bytes of padding
+ * before the L2 header.
+ */
+#define NETIO_PACKET_PADDING 2
+
+
+
+/**
+ * @brief Ethernet minimal (egress) packet metadata.
+ *
+ * @ingroup egress
+ *
+ * This structure represents information about packets which have
+ * been processed by @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer().  This structure is opaque
+ * and accessed through the @ref egress.
+ *
+ * @internal This structure is actually copied into the memory used by
+ * standard metadata, which is assumed to be large enough.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[14];
+#else
+  /** The offset of the L2 header from the start of the packet data. */
+  unsigned short l2_offset;
+  /** The offset of the L3 header from the start of the packet data. */
+  unsigned short l3_offset;
+  /** Where to write the checksum. */
+  unsigned char csum_location;
+  /** Where to start checksumming from. */
+  unsigned char csum_start;
+  /** Flags pertaining to checksum calculation etc. */
+  unsigned short flags;
+  /** The L2 length of the packet. */
+  unsigned short l2_length;
+  /** The checksum with which to seed the checksum generator. */
+  unsigned short csum_seed;
+  /** How much to checksum. */
+  unsigned short csum_length;
+#endif
+}
+netio_pkt_minimal_metadata_t;
+
+
+#ifndef __DOXYGEN__
+
+/**
+ * @brief An I/O notification header.
+ *
+ * This is the first word of data received from an I/O shim in a notification
+ * packet. It contains framing and status information.
+ */
+typedef union
+{
+  unsigned int word; /**< The whole word. */
+  /** The various fields. */
+  struct
+  {
+    unsigned int __channel:7;    /**< Resource channel. */
+    unsigned int __type:4;       /**< Type. */
+    unsigned int __ack:1;        /**< Whether an acknowledgement is needed. */
+    unsigned int __reserved:1;   /**< Reserved. */
+    unsigned int __protocol:1;   /**< A protocol-specific word is added. */
+    unsigned int __status:2;     /**< Status of the transfer. */
+    unsigned int __framing:2;    /**< Framing of the transfer. */
+    unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */
+  } bits;
+}
+__netio_pkt_notif_t;
+
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_HANDLE_BASE(p) \
+  ((unsigned char*)((p).word & 0xFFFFFFC0))
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_BASE(p) \
+  _NETIO_PKT_HANDLE_BASE(p->__packet)
+
+/**
+ * @brief An I/O notification packet (second word)
+ *
+ * This is the second word of data received from an I/O shim in a notification
+ * packet.  This is the virtual address of the packet buffer, plus some flag
+ * bits.  (The virtual address of the packet is always 256-byte aligned so we
+ * have room for 8 bits' worth of flags in the low 8 bits.)
+ *
+ * @internal
+ * NOTE: The low two bits must contain "__queue", so the "packet size"
+ * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly.
+ *
+ * If __addr or __offset are moved, _NETIO_PKT_BASE
+ * (defined right below this) must be changed.
+ */
+typedef union
+{
+  unsigned int word; /**< The whole word. */
+  /** The various fields. */
+  struct
+  {
+    /** Which queue the packet will be returned to once it is sent back to
+        the IPP.  This is one of the SIZE_xxx values. */
+    unsigned int __queue:2;
+
+    /** The IPP handle of the sending IPP. */
+    unsigned int __ipp_handle:2;
+
+    /** Reserved for future use. */
+    unsigned int __reserved:1;
+
+    /** If 1, this packet has minimal (egress) metadata; otherwise, it
+        has standard (ingress) metadata. */
+    unsigned int __minimal:1;
+
+    /** Offset of the metadata within the packet.  This value is multiplied
+     *  by 64 and added to the base packet address to get the metadata
+     *  address.  Note that this field is aligned within the word such that
+     *  you can easily extract the metadata address with a 26-bit mask. */
+    unsigned int __offset:2;
+
+    /** The top 24 bits of the packet's virtual address. */
+    unsigned int __addr:24;
+  } bits;
+}
+__netio_pkt_handle_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/**
+ * @brief A handle for an I/O packet's storage.
+ * @ingroup ingress
+ *
+ * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its
+ * packet metadata removed.  It is a much smaller type that exists to
+ * facilitate applications where the full ::netio_pkt_t type is too
+ * large, such as those that cache enormous numbers of packets or wish
+ * to transmit packet descriptors over the UDN.
+ *
+ * Because there is no metadata, most ::netio_pkt_t operations cannot be
+ * performed on a netio_pkt_handle_t.  It supports only
+ * netio_free_handle() (to free the buffer) and
+ * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents).
+ * The application must acquire any additional metadata it wants from the
+ * original ::netio_pkt_t and record it separately.
+ *
+ * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling
+ * NETIO_PKT_HANDLE().  An invalid handle (analogous to NULL) can be
+ * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can
+ * be tested for validity with NETIO_PKT_HANDLE_IS_VALID().
+ */
+typedef struct
+{
+  unsigned int word; /**< Opaque bits. */
+} netio_pkt_handle_t;
+
+/**
+ * @brief A packet descriptor.
+ *
+ * @ingroup ingress
+ * @ingroup egress
+ *
+ * This data structure represents a packet.  The structure is manipulated
+ * through the @ref ingress and the @ref egress.
+ *
+ * While the contents of a netio_pkt_t are opaque, the structure itself is
+ * portable.  This means that it may be shared between all tiles which have
+ * done a netio_input_register() call for the interface on which the pkt_t
+ * was initially received (via netio_get_packet()) or retrieved (via
+ * netio_get_buffer()).  The contents of a netio_pkt_t can be transmitted to
+ * another tile via shared memory, or via a UDN message, or by other means.
+ * The destination tile may then use the pkt_t as if it had originally been
+ * received locally; it may read or write the packet's data, read its
+ * metadata, free the packet, send the packet, transfer the netio_pkt_t to
+ * yet another tile, and so forth.
+ *
+ * Once a netio_pkt_t has been transferred to a second tile, the first tile
+ * should not reference the original copy; in particular, if more than one
+ * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will
+ * become corrupted.  Note also that each tile which reads or modifies
+ * packet data must obey the memory coherency rules outlined in @ref input.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[32];
+#else
+  /** For an ingress packet (one with standard metadata), this is the
+   *  notification header we got from the I/O shim.  For an egress packet
+   *  (one with minimal metadata), this word is zero if the packet has not
+   *  been populated, and nonzero if it has. */
+  __netio_pkt_notif_t __notif_header;
+
+  /** Virtual address of the packet buffer, plus state flags. */
+  __netio_pkt_handle_t __packet;
+
+  /** Metadata associated with the packet. */
+  netio_pkt_metadata_t __metadata;
+#endif
+}
+netio_pkt_t;
+
+
+#ifndef __DOXYGEN__
+
+#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal)
+#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags)
+
+/* Packet information table, used by the attribute access functions below. */
+extern const uint16_t _netio_pkt_info[];
+
+#endif /* __DOXYGEN__ */
+
+
+#ifndef __DOXYGEN__
+/* These macros are deprecated and will disappear in a future MDE release. */
+#define NETIO_PKT_GOOD_CHECKSUM(pkt) \
+  NETIO_PKT_L4_CSUM_CORRECT(pkt)
+#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \
+  NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt)
+#endif /* __DOXYGEN__ */
+
+
+/* Packet attribute access functions. */
+
+/** Return a pointer to the metadata for a packet.
+ * @ingroup ingress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_M" suffix usually improves performance.  This
+ * function must be called on an 'ingress' packet (i.e. one retrieved
+ * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer have not been called). Use of this
+ * function on an 'egress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_metadata_t*
+NETIO_PKT_METADATA(netio_pkt_t* pkt)
+{
+  netio_assert(!pkt->__packet.bits.__minimal);
+  return &pkt->__metadata;
+}
+
+
+/** Return a pointer to the minimal metadata for a packet.
+ * @ingroup egress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_MM" suffix usually improves performance.  This
+ * function must be called on an 'egress' packet (i.e. one on which
+ * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer()
+ * have been called, or one retrieved by @ref netio_get_buffer()). Use of
+ * this function on an 'ingress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_minimal_metadata_t*
+NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+  netio_assert(pkt->__packet.bits.__minimal);
+  return (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+}
+
+
+/** Determine whether a packet has 'minimal' metadata.
+ * @ingroup pktfuncs
+ *
+ * This function will return nonzero if the packet is an 'egress'
+ * packet (i.e. one on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer() have been called, or one
+ * retrieved by @ref netio_get_buffer()), and zero if the packet
+ * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(),
+ * which has not been converted into an 'egress' packet).
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet has minimal metadata.
+ */
+static __inline unsigned int
+NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt)
+{
+  return pkt->__packet.bits.__minimal;
+}
+
+
+/** Return a handle for a packet's storage.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A handle for the packet's storage.
+ */
+static __inline netio_pkt_handle_t
+NETIO_PKT_HANDLE(netio_pkt_t* pkt)
+{
+  netio_pkt_handle_t h;
+  h.word = pkt->__packet.word;
+  return h;
+}
+
+
+/** A special reserved value indicating the absence of a packet handle.
+ *
+ * @ingroup pktfuncs
+ */
+#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 })
+
+
+/** Test whether a packet handle is valid.
+ *
+ * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE
+ * to indicate no packet at all.  This function tests to see if a packet
+ * handle is a real handle, not this special reserved value.
+ *
+ * @ingroup pktfuncs
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return One if the packet handle is valid, else zero.
+ */
+static __inline unsigned int
+NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle)
+{
+  return handle.word != 0;
+}
+
+
+
+/** Return a pointer to the start of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle)
+{
+  return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING;
+}
+
+
+/** Return the length of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  /*
+   * Note that we effectively need to extract a quantity from the flags word
+   * which is measured in words, and then turn it into bytes by shifting
+   * it left by 2.  We do this all at once by just shifting right two less
+   * bits, and shifting the mask up two bits.
+   */
+  return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) &
+          (_NETIO_PKT_CUSTOM_LEN_RMASK << 2));
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size -
+          NETIO_PACKET_PADDING);
+}
+
+
+/** Return a pointer to the start of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt));
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  /*
+   * Note that we effectively need to extract a quantity from the flags word
+   * which is measured in words, and then turn it into bytes by shifting
+   * it left by 2.  We do this all at once by just shifting right two less
+   * bits, and shifting the mask up two bits.  We then add two bytes.
+   */
+  return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) &
+          (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2;
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) -
+          NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the start of the packet's L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) +
+          NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally,
+ *  the IP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_LENGTH_M(mda, pkt) -
+          NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup ingress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_DATA_M(mda, pkt) +
+          NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero.  (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.)  The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals.  Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__packet_ordinal;
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values.  In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.)  The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals.  Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__group_ordinal;
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value will be zero if the packet does not have a VLAN tag, or if
+ * this value was not extracted from the packet.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK;
+  unsigned short* pkt_p;
+  int index;
+  unsigned short val;
+
+  if (vl == _NETIO_PKT_VLAN_NONE)
+    return 0;
+
+  pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+  index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+  val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) &
+              _NETIO_PKT_INFO_VLAN_RMASK];
+
+#ifdef __TILECC__
+  return (__insn_bytex(val) >> 16) & 0xFFF;
+#else
+  return (__builtin_bswap32(val) >> 16) & 0xFFF;
+#endif
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+  int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+  unsigned short val =
+    pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) &
+          _NETIO_PKT_INFO_ETYPE_RMASK];
+
+  return __builtin_bswap32(val) >> 16;
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__flow_hash;
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__user_data_0;
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__user_data_1;
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ *  be correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags &
+           (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK));
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ *  correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags &
+           (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK));
+}
+
+
+/** Determine whether the ethertype was recognized and L3 packet data was
+ *  processed.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the ethertype was recognized and L3 packet data was
+ *   processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK);
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length.  Normally, applications should use
+ * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ *  its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) &&
+          (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) ||
+           NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD));
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return mmd->l2_length;
+}
+
+
+/** Return the length of the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+                              netio_pkt_t* pkt)
+{
+  return mmd->l3_offset - mmd->l2_offset;
+}
+
+
+/** Return the length of the packet, starting with the L3 (IP) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) -
+          NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup egress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return _NETIO_PKT_BASE(pkt) + mmd->l3_offset;
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return _NETIO_PKT_BASE(pkt) + mmd->l2_offset;
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length.  Normally, applications should use
+ * @ref NETIO_PKT_BAD() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS(netio_pkt_t* pkt)
+{
+  netio_assert(!pkt->__packet.bits.__minimal);
+
+  return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ *  its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_BAD_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return  The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt);
+}
+
+
+/** Return a pointer to the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_DATA_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return  The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_DATA_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_DATA_M(mda, pkt);
+  }
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally, the IP)
+ * header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L3_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L3_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup pktfuncs
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L3_DATA_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L3_DATA_M(mda, pkt);
+  }
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero.  (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.)  The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals.  Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values.  In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.)  The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals.  Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This is usually also contained within the packet header.  If the packet
+ * does not have a VLAN tag, the VLAN ID returned by this function is zero.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_VLAN_ID_M(mda, pkt);
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ETHERTYPE_M(mda, pkt);
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_FLOW_HASH_M(mda, pkt);
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_USER_DATA_0_M(mda, pkt);
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_USER_DATA_1_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ *  be correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ *  correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the Ethertype was recognized and L3 packet data was
+ *  processed.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the Ethertype was recognized and L3 packet data was
+ *   processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt);
+}
+
+
+/** Set an egress packet's L2 length, using a metadata pointer to speed the
+ * computation.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt,
+                           int len)
+{
+  mmd->l2_length = len;
+}
+
+
+/** Set an egress packet's L2 length.
+ * @ingroup egress
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set an egress packet's L2 header length, using a metadata pointer to
+ *  speed the computation.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet.  It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+                                  netio_pkt_t* pkt, int len)
+{
+  mmd->l3_offset = mmd->l2_offset + len;
+}
+
+
+/** Set an egress packet's L2 header length.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet.  It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set up an egress packet for hardware checksum computation, using a
+ *  metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ *  NetIO provides the ability to automatically calculate a standard
+ *  16-bit Internet checksum on transmitted packets.  The application
+ *  may specify the point in the packet where the checksum starts, the
+ *  number of bytes to be checksummed, and the two bytes in the packet
+ *  which will be replaced with the completed checksum.  (If the range
+ *  of bytes to be checksummed includes the bytes to be replaced, the
+ *  initial values of those bytes will be included in the checksum.)
+ *
+ *  For some protocols, the packet checksum covers data which is not present
+ *  in the packet, or is at least not contiguous to the main data payload.
+ *  For instance, the TCP checksum includes a "pseudo-header" which includes
+ *  the source and destination IP addresses of the packet.  To accommodate
+ *  this, the checksum engine may be "seeded" with an initial value, which
+ *  the application would need to compute based on the specific protocol's
+ *  requirements.  Note that the seed is given in host byte order (little-
+ *  endian), not network byte order (big-endian); code written to compute a
+ *  pseudo-header checksum in network byte order will need to byte-swap it
+ *  before use as the seed.
+ *
+ *  Note that the checksum is computed as part of the transmission process,
+ *  so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ *   the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ *   the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ *   to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ *   packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd,
+                            netio_pkt_t* pkt, int start, int length,
+                            int location, uint16_t seed)
+{
+  mmd->csum_start = start;
+  mmd->csum_length = length;
+  mmd->csum_location = location;
+  mmd->csum_seed = seed;
+  mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK;
+}
+
+
+/** Set up an egress packet for hardware checksum computation.
+ * @ingroup egress
+ *
+ *  NetIO provides the ability to automatically calculate a standard
+ *  16-bit Internet checksum on transmitted packets.  The application
+ *  may specify the point in the packet where the checksum starts, the
+ *  number of bytes to be checksummed, and the two bytes in the packet
+ *  which will be replaced with the completed checksum.  (If the range
+ *  of bytes to be checksummed includes the bytes to be replaced, the
+ *  initial values of those bytes will be included in the checksum.)
+ *
+ *  For some protocols, the packet checksum covers data which is not present
+ *  in the packet, or is at least not contiguous to the main data payload.
+ *  For instance, the TCP checksum includes a "pseudo-header" which includes
+ *  the source and destination IP addresses of the packet.  To accommodate
+ *  this, the checksum engine may be "seeded" with an initial value, which
+ *  the application would need to compute based on the specific protocol's
+ *  requirements.  Note that the seed is given in host byte order (little-
+ *  endian), not network byte order (big-endian); code written to compute a
+ *  pseudo-header checksum in network byte order will need to byte-swap it
+ *  before use as the seed.
+ *
+ *  Note that the checksum is computed as part of the transmission process,
+ *  so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ *   the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ *   the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ *   to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ *   packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length,
+                         int location, uint16_t seed)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ *  metadata pointer to speed the operation.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ *
+ * @param[in,out] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (pkt->__packet.bits.__offset << 6) +
+         NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ *  metadata pointer to speed the operation.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset;
+}
+
+
+/** Return the number of bytes which could be prepended to a packet.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt);
+  }
+}
+
+
+/** Flush a packet's minimal metadata from the cache, using a metadata pointer
+ *  to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                    netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache, using a metadata
+ *  pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                  netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache,
+ *  using a metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                        netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache, using a metadata pointer
+ *  to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache, using a metadata
+ *  pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache,
+ *  using a metadata pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+/** Number of NUMA nodes we can distribute buffers to.
+ * @ingroup setup */
+#define NETIO_NUM_NODE_WEIGHTS  16
+
+/**
+ * @brief An object for specifying the characteristics of NetIO communication
+ * endpoint.
+ *
+ * @ingroup setup
+ *
+ * The @ref netio_input_register() function uses this structure to define
+ * how an application tile will communicate with an IPP.
+ *
+ *
+ * Future updates to NetIO may add new members to this structure,
+ * which can affect the success of the registration operation.  Thus,
+ * if dynamically initializing the structure, applications are urged to
+ * zero it out first, for example:
+ *
+ * @code
+ * netio_input_config_t config;
+ * memset(&config, 0, sizeof (config));
+ * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE;
+ * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS;
+ * config.queue_id = 0;
+ *     .
+ *     .
+ *     .
+ * @endcode
+ *
+ * since that guarantees that any unused structure members, including
+ * members which did not exist when the application was first developed,
+ * will not have unexpected values.
+ *
+ * If statically initializing the structure, we strongly recommend use of
+ * C99-style named initializers, for example:
+ *
+ * @code
+ * netio_input_config_t config = {
+ *    .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE,
+ *    .num_receive_packets = NETIO_MAX_RECEIVE_PKTS,
+ *    .queue_id = 0,
+ * },
+ * @endcode
+ *
+ * instead of the old-style structure initialization:
+ *
+ * @code
+ * // Bad example! Currently equivalent to the above, but don't do this.
+ * netio_input_config_t config = {
+ *    NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0
+ * },
+ * @endcode
+ *
+ * since the C99 style requires no changes to the code if elements of the
+ * config structure are rearranged.  (It also makes the initialization much
+ * easier to understand.)
+ *
+ * Except for items which address a particular tile's transmit or receive
+ * characteristics, such as the ::NETIO_RECV flag, applications are advised
+ * to specify the same set of configuration data on all registrations.
+ * This prevents differing results if multiple tiles happen to do their
+ * registration operations in a different order on different invocations of
+ * the application.  This is particularly important for things like link
+ * management flags, and buffer size and homing specifications.
+ *
+ * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO
+ * buffer pool is automatically created and mapped into the application's
+ * virtual address space at an address chosen by the operating system,
+ * using the common memory (cmem) facility in the Tilera Multicore
+ * Components library.  The cmem facility allows multiple processes to gain
+ * access to shared memory which is mapped into each process at an
+ * identical virtual address.  In order for this to work, the processes
+ * must have a common ancestor, which must create the common memory using
+ * tmc_cmem_init().
+ *
+ * In programs using the iLib process creation API, or in programs which use
+ * only one process (which include programs using the pthreads library),
+ * tmc_cmem_init() is called automatically.  All other applications
+ * must call it explicitly, before any child processes which might call
+ * netio_input_register() are created.
+ */
+typedef struct
+{
+  /** Registration characteristics.
+
+      This value determines several characteristics of the registration;
+      flags for different types of behavior are ORed together to make the
+      final flag value.  Generally applications should specify exactly
+      one flag from each of the following categories:
+
+      - Whether the application will be receiving packets on this queue
+        (::NETIO_RECV or ::NETIO_NO_RECV).
+
+      - Whether the application will be transmitting packets on this queue,
+        and if so, whether it will request egress checksum calculation
+        (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT).  It is
+        legal to call netio_get_buffer() without one of the XMIT flags,
+        as long as ::NETIO_RECV is specified; in this case, the retrieved
+        buffers must be passed to another tile for transmission.
+
+      - Whether the application expects any vendor-specific tags in
+        its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM,
+        or ::NETIO_TAG_MRVL).  This must match the configuration of the
+        target IPP.
+
+      To accommodate applications written to previous versions of the NetIO
+      interface, none of the flags above are currently required; if omitted,
+      NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM |
+      ::NETIO_TAG_NONE were used.  However, explicit specification of
+      the relevant flags allows NetIO to do a better job of resource
+      allocation, allows earlier detection of certain configuration errors,
+      and may enable advanced features or higher performance in the future,
+      so their use is strongly recommended.
+
+      Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT
+      is a special case, intended primarily for use by programs which
+      retrieve network statistics or do link management operations.
+      When these flags are both specified, the resulting queue may not
+      be used with NetIO routines other than netio_get(), netio_set(),
+      and netio_input_unregister().  See @ref link for more information
+      on link management.
+
+      Other flags are optional; their use is described below.
+  */
+  int flags;
+
+  /** Interface name.  This is a string which identifies the specific
+      Ethernet controller hardware to be used.  The format of the string
+      is a device type and a device index, separated by a slash; so,
+      the first 10 Gigabit Ethernet controller is named "xgbe/0", while
+      the second 10/100/1000 Megabit Ethernet controller is named "gbe/1".
+   */
+  const char* interface;
+
+  /** Receive packet queue size.  This specifies the maximum number
+      of ingress packets that can be received on this queue without
+      being retrieved by @ref netio_get_packet().  If the IPP's distribution
+      algorithm calls for a packet to be sent to this queue, and this
+      number of packets are already pending there, the new packet
+      will either be discarded, or sent to another tile registered
+      for the same queue_id (see @ref drops).  This value must
+      be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least
+      ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain
+      interfaces.
+   */
+  int num_receive_packets;
+
+  /** The queue ID being requested.  Legal values for this range from 0
+      to ::NETIO_MAX_QUEUE_ID, inclusive.  ::NETIO_MAX_QUEUE_ID is always
+      greater than or equal to the number of tiles; this allows one queue
+      for each tile, plus at least one additional queue.  Some applications
+      may wish to use the additional queue as a destination for unwanted
+      packets, since packets delivered to queues for which no tiles have
+      registered are discarded.
+   */
+  unsigned int queue_id;
+
+  /** Maximum number of small send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds
+      empty small egress buffers requested from the IPP but not yet
+      retrieved via @ref netio_get_buffer().  This value must be greater
+      than zero if the application will ever use @ref netio_get_buffer()
+      to allocate empty small egress buffers; it may be no larger than
+      ::NETIO_MAX_SEND_BUFFERS.  See @ref epp for more details on empty
+      buffer caching.
+   */
+  int num_send_buffers_small_total;
+
+  /** Number of small send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty small egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_small_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_small_prealloc;
+
+  /** Maximum number of large send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds empty
+      large egress buffers requested from the IPP but not yet retrieved via
+      @ref netio_get_buffer().  This value must be greater than zero if the
+      application will ever use @ref netio_get_buffer() to allocate empty
+      large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+      See @ref epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_large_total;
+
+  /** Number of large send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty large egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_large_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_large_prealloc;
+
+  /** Maximum number of jumbo send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds empty
+      jumbo egress buffers requested from the IPP but not yet retrieved via
+      @ref netio_get_buffer().  This value must be greater than zero if the
+      application will ever use @ref netio_get_buffer() to allocate empty
+      jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+      See @ref epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_jumbo_total;
+
+  /** Number of jumbo send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty jumbo egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_jumbo_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_jumbo_prealloc;
+
+  /** Total packet buffer size.  This determines the total size, in bytes,
+      of the NetIO buffer pool.  Note that the maximum number of available
+      buffers of each size is determined during hypervisor configuration
+      (see the <em>System Programmer's Guide</em> for details); this just
+      influences how much host memory is allocated for those buffers.
+
+      The buffer pool is allocated from common memory, which will be
+      automatically initialized if needed.  If your buffer pool is larger
+      than 240 MB, you might need to explicitly call @c tmc_cmem_init(),
+      as described in the Application Libraries Reference Manual (UG227).
+
+      Packet buffers are currently allocated in chunks of 16 MB; this
+      value will be rounded up to the next larger multiple of 16 MB.
+      If this value is zero, a default of 32 MB will be used; this was
+      the value used by previous versions of NetIO.  Note that taking this
+      default also affects the placement of buffers on Linux NUMA nodes.
+      See @ref buffer_node_weights for an explanation of buffer placement.
+
+      In order to successfully allocate packet buffers, Linux must have
+      available huge pages on the relevant Linux NUMA nodes.  See the
+      <em>System Programmer's Guide</em> for information on configuring
+      huge page support in Linux.
+   */
+  uint64_t total_buffer_size;
+
+  /** Buffer placement weighting factors.
+
+      This array specifies the relative amount of buffering to place
+      on each of the available Linux NUMA nodes.  This array is
+      indexed by the NUMA node, and the values in the array are
+      proportional to the amount of buffer space to allocate on that
+      node.
+
+      If memory striping is enabled in the Hypervisor, then there is
+      only one logical NUMA node (node 0). In that case, NetIO will by
+      default ignore the suggested buffer node weights, and buffers
+      will be striped across the physical memory controllers. See
+      UG209 System Programmer's Guide for a description of the
+      hypervisor option that controls memory striping.
+
+      If memory striping is disabled, then there are up to four NUMA
+      nodes, corresponding to the four DDRAM controllers in the TILE
+      processor architecture.  See UG100 Tile Processor Architecture
+      Overview for a diagram showing the location of each of the DDRAM
+      controllers relative to the tile array.
+
+      For instance, if memory striping is disabled, the following
+      configuration strucure:
+
+      @code
+      netio_input_config_t config = {
+            .
+            .
+            .
+        .total_buffer_size = 4 * 16 * 1024 * 1024;
+        .buffer_node_weights = { 1, 0, 1, 0 },
+      },
+      @endcode
+
+      would result in 32 MB of buffers being placed on controller 0, and
+      32 MB on controller 2.  (Since buffers are allocated in units of
+      16 MB, some sets of weights will not be able to be matched exactly.)
+
+      For the weights to be effective, @ref total_buffer_size must be
+      nonzero.  If @ref total_buffer_size is zero, causing the default
+      32 MB of buffer space to be used, then any specified weights will
+      be ignored, and buffers will positioned as they were in previous
+      versions of NetIO:
+
+      - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1,
+        and the other 16 MB will be placed on controller 2.
+
+      - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2,
+        and the other 16 MB will be placed on controller 3.
+
+      If @ref total_buffer_size is nonzero, but all weights are zero,
+      then all buffer space will be allocated on Linux NUMA node zero.
+
+      By default, the specified buffer placement is treated as a hint;
+      if sufficient free memory is not available on the specified
+      controllers, the buffers will be allocated elsewhere.  However,
+      if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a
+      failure to allocate buffer space exactly as requested will cause the
+      registration operation to fail with an error of ::NETIO_CANNOT_HOME.
+
+      Note that maximal network performance cannot be achieved with
+      only one memory controller.
+   */
+  uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS];
+
+  /** Fixed virtual address for packet buffers.  Only valid when
+      ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the
+      description of that flag for details.
+   */
+  void* fixed_buffer_va;
+
+  /**
+      Maximum number of outstanding send packet requests.  This value is
+      only relevant when an EPP is in use; it determines the number of
+      slots in the EPP's outgoing packet queue which this tile is allowed
+      to consume, and thus the number of packets which may be sent before
+      the sending tile must wait for an acknowledgment from the EPP.
+      Modifying this value is generally only helpful when using @ref
+      netio_send_packet_vector(), where it can help improve performance by
+      allowing a single vector send operation to process more packets.
+      Typically it is not specified, and the default, which divides the
+      outgoing packet slots evenly between all tiles on the chip, is used.
+
+      If a registration asks for more outgoing packet queue slots than are
+      available, ::NETIO_TOOMANY_XMIT will be returned.  The total number
+      of packet queue slots which are available for all tiles for each EPP
+      is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING.
+
+
+      This value is ignored if ::NETIO_XMIT is not specified in flags.
+      If you want to specify a large value here for a specific tile, you are
+      advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so
+      that they do not consume a default number of packet slots.  Any tile
+      transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING
+      slots allocated to it; values less than that will be silently
+      increased by the NetIO library.
+   */
+  int num_sends_outstanding;
+}
+netio_input_config_t;
+
+
+/** Registration flags; used in the @ref netio_input_config_t structure.
+ * @addtogroup setup
+ */
+/** @{ */
+
+/** Fail a registration request if we can't put packet buffers
+    on the specified memory controllers. */
+#define NETIO_STRICT_HOMING   0x00000002
+
+/** This application expects no tags on its L2 headers. */
+#define NETIO_TAG_NONE        0x00000004
+
+/** This application expects Marvell extended tags on its L2 headers. */
+#define NETIO_TAG_MRVL        0x00000008
+
+/** This application expects Broadcom tags on its L2 headers. */
+#define NETIO_TAG_BRCM        0x00000010
+
+/** This registration may call routines which receive packets. */
+#define NETIO_RECV            0x00000020
+
+/** This registration may not call routines which receive packets. */
+#define NETIO_NO_RECV         0x00000040
+
+/** This registration may call routines which transmit packets. */
+#define NETIO_XMIT            0x00000080
+
+/** This registration may call routines which transmit packets with
+    checksum acceleration. */
+#define NETIO_XMIT_CSUM       0x00000100
+
+/** This registration may not call routines which transmit packets. */
+#define NETIO_NO_XMIT         0x00000200
+
+/** This registration wants NetIO buffers mapped at an application-specified
+    virtual address.
+
+    NetIO buffers are by default created by the TMC common memory facility,
+    which must be configured by a common ancestor of all processes sharing
+    a network interface.  When this flag is specified, NetIO buffers are
+    instead mapped at an address chosen by the application (and specified
+    in @ref netio_input_config_t::fixed_buffer_va).  This allows multiple
+    unrelated but cooperating processes to share a NetIO interface.
+    All processes sharing the same interface must specify this flag,
+    and all must specify the same fixed virtual address.
+
+    @ref netio_input_config_t::fixed_buffer_va must be a
+    multiple of 16 MB, and the packet buffers will occupy @ref
+    netio_input_config_t::total_buffer_size bytes of virtual address
+    space, beginning at that address.  If any of those virtual addresses
+    are currently occupied by other memory objects, like application or
+    shared library code or data, @ref netio_input_register() will return
+    ::NETIO_FAULT.  While it is impossible to provide a fixed_buffer_va
+    which will work for all applications, a good first guess might be to
+    use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size.
+    If that fails, it might be helpful to consult the running application's
+    virtual address description file (/proc/<em>pid</em>/maps) to see
+    which regions of virtual address space are available.
+ */
+#define NETIO_FIXED_BUFFER_VA 0x00000400
+
+/** This registration call will not complete unless the network link
+    is up.  The process will wait several seconds for this to happen (the
+    precise interval is link-dependent), but if the link does not come up,
+    ::NETIO_LINK_DOWN will be returned.  This flag is the default if
+    ::NETIO_NOREQUIRE_LINK_UP is not specified.  Note that this flag by
+    itself does not request that the link be brought up; that can be done
+    with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the
+    latter is the default if no NETIO_AUTO_LINK_xxx flags are specified),
+    or by explicitly setting the link's desired state via netio_set().
+    If the link is not brought up by one of those methods, and this flag
+    is specified, the registration operation will return ::NETIO_LINK_DOWN.
+    This flag is ignored if it is specified along with ::NETIO_NO_XMIT and
+    ::NETIO_NO_RECV.  See @ref link for more information on link
+    management.
+ */
+#define NETIO_REQUIRE_LINK_UP    0x00000800
+
+/** This registration call will complete even if the network link is not up.
+    Whenever the link is not up, packets will not be sent or received:
+    netio_get_packet() will return ::NETIO_NOPKT once all queued packets
+    have been drained, and netio_send_packet() and similar routines will
+    return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP
+    or the I/O shim is full.  See @ref link for more information on link
+    management.
+ */
+#define NETIO_NOREQUIRE_LINK_UP  0x00001000
+
+#ifndef __DOXYGEN__
+/*
+ * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags,
+ * but should not be used directly by applications, and are thus not
+ * documented.
+ */
+#define _NETIO_AUTO_UP        0x00002000
+#define _NETIO_AUTO_DN        0x00004000
+#define _NETIO_AUTO_PRESENT   0x00008000
+#endif
+
+/** Set the desired state of the link to up, allowing any speeds which are
+    supported by the link hardware, as part of this registration operation.
+    Do not take down the link automatically.  This is the default if
+    no other NETIO_AUTO_LINK_xxx flags are specified.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UP     (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP)
+
+/** Set the desired state of the link to up, allowing any speeds which are
+    supported by the link hardware, as part of this registration operation.
+    Set the desired state of the link to down the next time no tiles are
+    registered for packet reception or transmission.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UPDN   (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \
+                                _NETIO_AUTO_DN)
+
+/** Set the desired state of the link to down the next time no tiles are
+    registered for packet reception or transmission.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_DN     (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN)
+
+/** Do not bring up the link automatically as part of this registration
+    operation.  Do not take down the link automatically.  This flag
+    is ignored if it is specified along with ::NETIO_NO_XMIT and
+    ::NETIO_NO_RECV.  See @ref link for more information on link management.
+  */
+#define NETIO_AUTO_LINK_NONE   _NETIO_AUTO_PRESENT
+
+
+/** Minimum number of receive packets. */
+#define NETIO_MIN_RECEIVE_PKTS            16
+
+/** Lower bound on the maximum number of receive packets; may be higher
+    than this on some interfaces. */
+#define NETIO_MAX_RECEIVE_PKTS           128
+
+/** Maximum number of send buffers, per packet size. */
+#define NETIO_MAX_SEND_BUFFERS            16
+
+/** Number of EPP queue slots, and thus outstanding sends, per EPP. */
+#define NETIO_TOTAL_SENDS_OUTSTANDING   2015
+
+/** Minimum number of EPP queue slots, and thus outstanding sends, per
+ *  transmitting tile. */
+#define NETIO_MIN_SENDS_OUTSTANDING       16
+
+
+/**@}*/
+
+#ifndef __DOXYGEN__
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+struct __netio_queue_impl_t;
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+struct __netio_queue_user_impl_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/** A netio_queue_t describes a NetIO communications endpoint.
+ * @ingroup setup
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  uint8_t opaque[8];                 /**< This is an opaque structure. */
+#else
+  struct __netio_queue_impl_t* __system_part;    /**< The system part. */
+  struct __netio_queue_user_impl_t* __user_part; /**< The user part. */
+#ifdef _NETIO_PTHREAD
+  _netio_percpu_mutex_t lock;                    /**< Queue lock. */
+#endif
+#endif
+}
+netio_queue_t;
+
+
+/**
+ * @brief Packet send context.
+ *
+ * @ingroup egress
+ *
+ * Packet send context for use with netio_send_packet_prepare and _commit.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  uint8_t opaque[44];   /**< This is an opaque structure. */
+#else
+  uint8_t flags;        /**< Defined below */
+  uint8_t datalen;      /**< Number of valid words pointed to by data. */
+  uint32_t request[9];  /**< Request to be sent to the EPP or shim.  Note
+                             that this is smaller than the 11-word maximum
+                             request size, since some constant values are
+                             not saved in the context. */
+  uint32_t *data;       /**< Data to be sent to the EPP or shim via IDN. */
+#endif
+}
+netio_send_pkt_context_t;
+
+
+#ifndef __DOXYGEN__
+#define SEND_PKT_CTX_USE_EPP   1  /**< We're sending to an EPP. */
+#define SEND_PKT_CTX_SEND_CSUM 2  /**< Request includes a checksum. */
+#endif
+
+/**
+ * @brief Packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * This data structure is used with netio_send_packet_vector() to send multiple
+ * packets with one NetIO call.  The structure should be initialized by
+ * calling netio_pkt_vector_set(), rather than by setting the fields
+ * directly.
+ *
+ * This structure is guaranteed to be a power of two in size, no
+ * bigger than one L2 cache line, and to be aligned modulo its size.
+ */
+typedef struct
+#ifndef __DOXYGEN__
+__attribute__((aligned(8)))
+#endif
+{
+  /** Reserved for use by the user application.  When initialized with
+   *  the netio_set_pkt_vector_entry() function, this field is guaranteed
+   *  to be visible to readers only after all other fields are already
+   *  visible.  This way it can be used as a valid flag or generation
+   *  counter. */
+  uint8_t user_data;
+
+  /* Structure members below this point should not be accessed directly by
+   * applications, as they may change in the future. */
+
+  /** Low 8 bits of the packet address to send.  The high bits are
+   *  acquired from the 'handle' field. */
+  uint8_t buffer_address_low;
+
+  /** Number of bytes to transmit. */
+  uint16_t size;
+
+  /** The raw handle from a netio_pkt_t.  If this is NETIO_PKT_HANDLE_NONE,
+   *  this vector entry will be skipped and no packet will be transmitted. */
+  netio_pkt_handle_t handle;
+}
+netio_pkt_vector_entry_t;
+
+
+/**
+ * @brief Initialize fields in a packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * @param[out] v Pointer to the vector entry to be initialized.
+ * @param[in] pkt Packet to be transmitted when the vector entry is passed to
+ *        netio_send_packet_vector().  Note that the packet's attributes
+ *        (e.g., its L2 offset and length) are captured at the time this
+ *        routine is called; subsequent changes in those attributes will not
+ *        be reflected in the packet which is actually transmitted.
+ *        Changes in the packet's contents, however, will be so reflected.
+ *        If this is NULL, no packet will be transmitted.
+ * @param[in] user_data User data to be set in the vector entry.
+ *        This function guarantees that the "user_data" field will become
+ *        visible to a reader only after all other fields have become visible.
+ *        This allows a structure in a ring buffer to be written and read
+ *        by a polling reader without any locks or other synchronization.
+ */
+static __inline void
+netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt,
+                     uint8_t user_data)
+{
+  if (pkt)
+  {
+    if (NETIO_PKT_IS_MINIMAL(pkt))
+    {
+      netio_pkt_minimal_metadata_t* mmd =
+        (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+      v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF;
+      v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+    }
+    else
+    {
+      netio_pkt_metadata_t* mda = &pkt->__metadata;
+      v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF;
+      v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt);
+    }
+    v->handle.word = pkt->__packet.word;
+  }
+  else
+  {
+    v->handle.word = 0;   /* Set handle to NETIO_PKT_HANDLE_NONE. */
+  }
+
+  __asm__("" : : : "memory");
+
+  v->user_data = user_data;
+}
+
+
+/**
+ * Flags and structures for @ref netio_get() and @ref netio_set().
+ * @ingroup config
+ */
+
+/** @{ */
+/** Parameter class; addr is a NETIO_PARAM_xxx value. */
+#define NETIO_PARAM       0
+/** Interface MAC address. This address is only valid with @ref netio_get().
+ *  The value is a 6-byte MAC address.  Depending upon the overall system
+ *  design, a MAC address may or may not be available for each interface. */
+#define NETIO_PARAM_MAC        0
+
+/** Determine whether to suspend output on the receipt of pause frames.
+ *  If the value is nonzero, the I/O shim will suspend output when a pause
+ *  frame is received.  If the value is zero, pause frames will be ignored. */
+#define NETIO_PARAM_PAUSE_IN   1
+
+/** Determine whether to send pause frames if the I/O shim packet FIFOs are
+ *  nearly full.  If the value is zero, pause frames are not sent.  If
+ *  the value is nonzero, it is the delay value which will be sent in any
+ *  pause frames which are output, in units of 512 bit times. */
+#define NETIO_PARAM_PAUSE_OUT  2
+
+/** Jumbo frame support.  The value is a 4-byte integer.  If the value is
+ *  nonzero, the MAC will accept frames of up to 10240 bytes.  If the value
+ *  is zero, the MAC will only accept frames of up to 1544 bytes. */
+#define NETIO_PARAM_JUMBO      3
+
+/** I/O shim's overflow statistics register.  The value is two 16-bit integers.
+ *  The first 16-bit value (or the low 16 bits, if the value is treated as a
+ *  32-bit number) is the count of packets which were completely dropped and
+ *  not delivered by the shim.  The second 16-bit value (or the high 16 bits,
+ *  if the value is treated as a 32-bit number) is the count of packets
+ *  which were truncated and thus only partially delivered by the shim.  This
+ *  register is automatically reset to zero after it has been read.
+ */
+#define NETIO_PARAM_OVERFLOW   4
+
+/** IPP statistics.  This address is only valid with @ref netio_get().  The
+ *  value is a netio_stat_t structure.  Unlike the I/O shim statistics, the
+ *  IPP statistics are not all reset to zero on read; see the description
+ *  of the netio_stat_t for details. */
+#define NETIO_PARAM_STAT 5
+
+/** Possible link state.  The value is a combination of "NETIO_LINK_xxx"
+ *  flags.  With @ref netio_get(), this will indicate which flags are
+ *  actually supported by the hardware.
+ *
+ *  For historical reasons, specifying this value to netio_set() will have
+ *  the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is
+ *  discouraged.
+ */
+#define NETIO_PARAM_LINK_POSSIBLE_STATE 6
+
+/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags.
+ *  With @ref netio_set(), this will attempt to immediately bring up the
+ *  link using whichever of the requested flags are supported by the
+ *  hardware, or take down the link if the flags are zero; if this is
+ *  not possible, an error will be returned.  Many programs will want
+ *  to use ::NETIO_PARAM_LINK_DESIRED_STATE instead.
+ *
+ *  For historical reasons, specifying this value to netio_get() will
+ *  have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE,
+ *  but this usage is discouraged.
+ */
+#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE
+
+/** Current link state. This address is only valid with @ref netio_get().
+ *  The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together.
+ *  If the link is down, the value ANDed with NETIO_LINK_SPEED will be
+ *  zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will
+ *  result in exactly one of the NETIO_LINK_xxx values, indicating the
+ *  current speed. */
+#define NETIO_PARAM_LINK_CURRENT_STATE 7
+
+/** Variant symbol for current state, retained for compatibility with
+ *  pre-MDE-2.1 programs. */
+#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE
+
+/** Packet Coherence protocol. This address is only valid with @ref netio_get().
+ *  The value is nonzero if the interface is configured for cache-coherent DMA.
+ */
+#define NETIO_PARAM_COHERENT 8
+
+/** Desired link state. The value is a conbination of "NETIO_LINK_xxx"
+ *  flags, which specify the desired state for the link.  With @ref
+ *  netio_set(), this will, in the background, attempt to bring up the link
+ *  using whichever of the requested flags are reasonable, or take down the
+ *  link if the flags are zero.  The actual link up or down operation may
+ *  happen after this call completes.  If the link state changes in the
+ *  future, the system will continue to try to get back to the desired link
+ *  state; for instance, if the link is brought up successfully, and then
+ *  the network cable is disconnected, the link will go down.  However, the
+ *  desired state of the link is still up, so if the cable is reconnected,
+ *  the link will be brought up again.
+ *
+ *  With @ref netio_get(), this will indicate the desired state for the
+ *  link, as set with a previous netio_set() call, or implicitly by a
+ *  netio_input_register() or netio_input_unregister() operation.  This may
+ *  not reflect the current state of the link; to get that, use
+ *  ::NETIO_PARAM_LINK_CURRENT_STATE. */
+#define NETIO_PARAM_LINK_DESIRED_STATE 9
+
+/** NetIO statistics structure.  Retrieved using the ::NETIO_PARAM_STAT
+ *  address passed to @ref netio_get(). */
+typedef struct
+{
+  /** Number of packets which have been received by the IPP and forwarded
+   *  to a tile's receive queue for processing.  This value wraps at its
+   *  maximum, and is not cleared upon read. */
+  uint32_t packets_received;
+
+  /** Number of packets which have been dropped by the IPP, because they could
+   *  not be received, or could not be forwarded to a tile.  The former happens
+   *  when the IPP does not have a free packet buffer of suitable size for an
+   *  incoming frame.  The latter happens when all potential destination tiles
+   *  for a packet, as defined by the group, bucket, and queue configuration,
+   *  have full receive queues.   This value wraps at its maximum, and is not
+   *  cleared upon read. */
+  uint32_t packets_dropped;
+
+  /*
+   * Note: the #defines after each of the following four one-byte values
+   * denote their location within the third word of the netio_stat_t.  They
+   * are intended for use only by the IPP implementation and are thus omitted
+   * from the Doxygen output.
+   */
+
+  /** Number of packets dropped because no worker was able to accept a new
+   *  packet.  This value saturates at its maximum, and is cleared upon
+   *  read. */
+  uint8_t drops_no_worker;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_WORKER   0
+#endif
+
+  /** Number of packets dropped because no small buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_smallbuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_SMALLBUF 1
+#endif
+
+  /** Number of packets dropped because no large buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_largebuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_LARGEBUF 2
+#endif
+
+  /** Number of packets dropped because no jumbo buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_jumbobuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_JUMBOBUF 3
+#endif
+}
+netio_stat_t;
+
+
+/** Link can run, should run, or is running at 10 Mbps. */
+#define NETIO_LINK_10M         0x01
+
+/** Link can run, should run, or is running at 100 Mbps. */
+#define NETIO_LINK_100M        0x02
+
+/** Link can run, should run, or is running at 1 Gbps. */
+#define NETIO_LINK_1G          0x04
+
+/** Link can run, should run, or is running at 10 Gbps. */
+#define NETIO_LINK_10G         0x08
+
+/** Link should run at the highest speed supported by the link and by
+ *  the device connected to the link.  Only usable as a value for
+ *  the link's desired state; never returned as a value for the current
+ *  or possible states. */
+#define NETIO_LINK_ANYSPEED    0x10
+
+/** All legal link speeds. */
+#define NETIO_LINK_SPEED  (NETIO_LINK_10M  | \
+                           NETIO_LINK_100M | \
+                           NETIO_LINK_1G   | \
+                           NETIO_LINK_10G  | \
+                           NETIO_LINK_ANYSPEED)
+
+
+/** MAC register class.  Addr is a register offset within the MAC.
+ *  Registers within the XGbE and GbE MACs are documented in the Tile
+ *  Processor I/O Device Guide (UG104). MAC registers start at address
+ *  0x4000, and do not include the MAC_INTERFACE registers. */
+#define NETIO_MAC             1
+
+/** MDIO register class (IEEE 802.3 clause 22 format).  Addr is the "addr"
+ *  member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO            2
+
+/** MDIO register class (IEEE 802.3 clause 45 format).  Addr is the "addr"
+ *  member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO_CLAUSE45   3
+
+/** NetIO MDIO address type.  Retrieved or provided using the ::NETIO_MDIO
+ *  address passed to @ref netio_get() or @ref netio_set(). */
+typedef union
+{
+  struct
+  {
+    unsigned int reg:16;  /**< MDIO register offset.  For clause 22 access,
+                               must be less than 32. */
+    unsigned int phy:5;   /**< Which MDIO PHY to access. */
+    unsigned int dev:5;   /**< Which MDIO device to access within that PHY.
+                               Applicable for clause 45 access only; ignored
+                               for clause 22 access. */
+  }
+  bits;                   /**< Container for bitfields. */
+  uint64_t addr;          /**< Value to pass to @ref netio_get() or
+                           *   @ref netio_set(). */
+}
+netio_mdio_addr_t;
+
+/** @} */
+
+#endif /* __NETIO_INTF_H__ */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 112b1e2..b4c8e8e 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -15,3 +15,4 @@
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_PCI)		+= pci.o
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index 67617a0..dbc213a 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -21,7 +21,6 @@
 #include <linux/kdev_t.h>
 #include <linux/fs.h>
 #include <linux/fcntl.h>
-#include <linux/smp_lock.h>
 #include <linux/uaccess.h>
 #include <linux/signal.h>
 #include <asm/syscalls.h>
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index fb64b99..543d6a3 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -15,7 +15,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
new file mode 100644
index 0000000..a1ee25b
--- /dev/null
+++ b/arch/tile/kernel/pci.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/capability.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/byteorder.h>
+#include <asm/hv_driver.h>
+#include <hv/drv_pcie_rc_intf.h>
+
+
+/*
+ * Initialization flow and process
+ * -------------------------------
+ *
+ * This files containes the routines to search for PCI buses,
+ * enumerate the buses, and configure any attached devices.
+ *
+ * There are two entry points here:
+ * 1) tile_pci_init
+ *    This sets up the pci_controller structs, and opens the
+ *    FDs to the hypervisor.  This is called from setup_arch() early
+ *    in the boot process.
+ * 2) pcibios_init
+ *    This probes the PCI bus(es) for any attached hardware.  It's
+ *    called by subsys_initcall.  All of the real work is done by the
+ *    generic Linux PCI layer.
+ *
+ */
+
+/*
+ * This flag tells if the platform is TILEmpower that needs
+ * special configuration for the PLX switch chip.
+ */
+int __write_once tile_plx_gen1;
+
+static struct pci_controller controllers[TILE_NUM_PCIE];
+static int num_controllers;
+
+static struct pci_ops tile_cfg_ops;
+
+
+/*
+ * We don't need to worry about the alignment of resources.
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+			    resource_size_t size, resource_size_t align)
+{
+	return res->start;
+}
+EXPORT_SYMBOL(pcibios_align_resource);
+
+/*
+ * Open a FD to the hypervisor PCI device.
+ *
+ * controller_id is the controller number, config type is 0 or 1 for
+ * config0 or config1 operations.
+ */
+static int __init tile_pcie_open(int controller_id, int config_type)
+{
+	char filename[32];
+	int fd;
+
+	sprintf(filename, "pcie/%d/config%d", controller_id, config_type);
+
+	fd = hv_dev_open((HV_VirtAddr)filename, 0);
+
+	return fd;
+}
+
+
+/*
+ * Get the IRQ numbers from the HV and set up the handlers for them.
+ */
+static int __init tile_init_irqs(int controller_id,
+				 struct pci_controller *controller)
+{
+	char filename[32];
+	int fd;
+	int ret;
+	int x;
+	struct pcie_rc_config rc_config;
+
+	sprintf(filename, "pcie/%d/ctl", controller_id);
+	fd = hv_dev_open((HV_VirtAddr)filename, 0);
+	if (fd < 0) {
+		pr_err("PCI: hv_dev_open(%s) failed\n", filename);
+		return -1;
+	}
+	ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config),
+			   sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF);
+	hv_dev_close(fd);
+	if (ret != sizeof(rc_config)) {
+		pr_err("PCI: wanted %zd bytes, got %d\n",
+		       sizeof(rc_config), ret);
+		return -1;
+	}
+	/* Record irq_base so that we can map INTx to IRQ # later. */
+	controller->irq_base = rc_config.intr;
+
+	for (x = 0; x < 4; x++)
+		tile_irq_activate(rc_config.intr + x,
+				  TILE_IRQ_HW_CLEAR);
+
+	if (rc_config.plx_gen1)
+		controller->plx_gen1 = 1;
+
+	return 0;
+}
+
+/*
+ * First initialization entry point, called from setup_arch().
+ *
+ * Find valid controllers and fill in pci_controller structs for each
+ * of them.
+ *
+ * Returns the number of controllers discovered.
+ */
+int __init tile_pci_init(void)
+{
+	int i;
+
+	pr_info("PCI: Searching for controllers...\n");
+
+	/* Do any configuration we need before using the PCIe */
+
+	for (i = 0; i < TILE_NUM_PCIE; i++) {
+		int hv_cfg_fd0 = -1;
+		int hv_cfg_fd1 = -1;
+		int hv_mem_fd = -1;
+		char name[32];
+		struct pci_controller *controller;
+
+		/*
+		 * Open the fd to the HV.  If it fails then this
+		 * device doesn't exist.
+		 */
+		hv_cfg_fd0 = tile_pcie_open(i, 0);
+		if (hv_cfg_fd0 < 0)
+			continue;
+		hv_cfg_fd1 = tile_pcie_open(i, 1);
+		if (hv_cfg_fd1 < 0) {
+			pr_err("PCI: Couldn't open config fd to HV "
+			    "for controller %d\n", i);
+			goto err_cont;
+		}
+
+		sprintf(name, "pcie/%d/mem", i);
+		hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0);
+		if (hv_mem_fd < 0) {
+			pr_err("PCI: Could not open mem fd to HV!\n");
+			goto err_cont;
+		}
+
+		pr_info("PCI: Found PCI controller #%d\n", i);
+
+		controller = &controllers[num_controllers];
+
+		if (tile_init_irqs(i, controller)) {
+			pr_err("PCI: Could not initialize "
+			       "IRQs, aborting.\n");
+			goto err_cont;
+		}
+
+		controller->index = num_controllers;
+		controller->hv_cfg_fd[0] = hv_cfg_fd0;
+		controller->hv_cfg_fd[1] = hv_cfg_fd1;
+		controller->hv_mem_fd = hv_mem_fd;
+		controller->first_busno = 0;
+		controller->last_busno = 0xff;
+		controller->ops = &tile_cfg_ops;
+
+		num_controllers++;
+		continue;
+
+err_cont:
+		if (hv_cfg_fd0 >= 0)
+			hv_dev_close(hv_cfg_fd0);
+		if (hv_cfg_fd1 >= 0)
+			hv_dev_close(hv_cfg_fd1);
+		if (hv_mem_fd >= 0)
+			hv_dev_close(hv_mem_fd);
+		continue;
+	}
+
+	/*
+	 * Before using the PCIe, see if we need to do any platform-specific
+	 * configuration, such as the PLX switch Gen 1 issue on TILEmpower.
+	 */
+	for (i = 0; i < num_controllers; i++) {
+		struct pci_controller *controller = &controllers[i];
+
+		if (controller->plx_gen1)
+			tile_plx_gen1 = 1;
+	}
+
+	return num_controllers;
+}
+
+/*
+ * (pin - 1) converts from the PCI standard's [1:4] convention to
+ * a normal [0:3] range.
+ */
+static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct pci_controller *controller =
+		(struct pci_controller *)dev->sysdata;
+	return (pin - 1) + controller->irq_base;
+}
+
+
+static void __init fixup_read_and_payload_sizes(void)
+{
+	struct pci_dev *dev = NULL;
+	int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */
+	int max_read_size = 0x2; /* Limit to 512 byte reads. */
+	u16 new_values;
+
+	/* Scan for the smallest maximum payload size. */
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		int pcie_caps_offset;
+		u32 devcap;
+		int max_payload;
+
+		pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
+		if (pcie_caps_offset == 0)
+			continue;
+
+		pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP,
+				      &devcap);
+		max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD;
+		if (max_payload < smallest_max_payload)
+			smallest_max_payload = max_payload;
+	}
+
+	/* Now, set the max_payload_size for all devices to that value. */
+	new_values = (max_read_size << 12) | (smallest_max_payload << 5);
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		int pcie_caps_offset;
+		u16 devctl;
+
+		pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
+		if (pcie_caps_offset == 0)
+			continue;
+
+		pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
+				     &devctl);
+		devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ);
+		devctl |= new_values;
+		pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
+				      devctl);
+	}
+}
+
+
+/*
+ * Second PCI initialization entry point, called by subsys_initcall.
+ *
+ * The controllers have been set up by the time we get here, by a call to
+ * tile_pci_init.
+ */
+static int __init pcibios_init(void)
+{
+	int i;
+
+	pr_info("PCI: Probing PCI hardware\n");
+
+	/*
+	 * Delay a bit in case devices aren't ready.  Some devices are
+	 * known to require at least 20ms here, but we use a more
+	 * conservative value.
+	 */
+	mdelay(250);
+
+	/* Scan all of the recorded PCI controllers.  */
+	for (i = 0; i < num_controllers; i++) {
+		struct pci_controller *controller = &controllers[i];
+		struct pci_bus *bus;
+
+		pr_info("PCI: initializing controller #%d\n", i);
+
+		/*
+		 * This comes from the generic Linux PCI driver.
+		 *
+		 * It reads the PCI tree for this bus into the Linux
+		 * data structures.
+		 *
+		 * This is inlined in linux/pci.h and calls into
+		 * pci_scan_bus_parented() in probe.c.
+		 */
+		bus = pci_scan_bus(0, controller->ops, controller);
+		controller->root_bus = bus;
+		controller->last_busno = bus->subordinate;
+
+	}
+
+	/* Do machine dependent PCI interrupt routing */
+	pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
+
+	/*
+	 * This comes from the generic Linux PCI driver.
+	 *
+	 * It allocates all of the resources (I/O memory, etc)
+	 * associated with the devices read in above.
+	 */
+
+	pci_assign_unassigned_resources();
+
+	/* Configure the max_read_size and max_payload_size values. */
+	fixup_read_and_payload_sizes();
+
+	/* Record the I/O resources in the PCI controller structure. */
+	for (i = 0; i < num_controllers; i++) {
+		struct pci_bus *root_bus = controllers[i].root_bus;
+		struct pci_bus *next_bus;
+		struct pci_dev *dev;
+
+		list_for_each_entry(dev, &root_bus->devices, bus_list) {
+			/* Find the PCI host controller, ie. the 1st bridge. */
+			if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
+				(PCI_SLOT(dev->devfn) == 0)) {
+				next_bus = dev->subordinate;
+				controllers[i].mem_resources[0] =
+					*next_bus->resource[0];
+				controllers[i].mem_resources[1] =
+					 *next_bus->resource[1];
+				controllers[i].mem_resources[2] =
+					 *next_bus->resource[2];
+
+				break;
+			}
+		}
+
+	}
+
+	return 0;
+}
+subsys_initcall(pcibios_init);
+
+/*
+ * No bus fixups needed.
+ */
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+	/* Nothing needs to be done. */
+}
+
+/*
+ * This can be called from the generic PCI layer, but doesn't need to
+ * do anything.
+ */
+char __devinit *pcibios_setup(char *str)
+{
+	/* Nothing needs to be done. */
+	return str;
+}
+
+/*
+ * This is called from the generic Linux layer.
+ */
+void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
+
+/*
+ * Enable memory and/or address decoding, as appropriate, for the
+ * device described by the 'dev' struct.
+ *
+ * This is called from the generic PCI layer, and can be called
+ * for bridges or endpoints.
+ */
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	u8 header_type;
+	int i;
+	struct resource *r;
+
+	pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+		/*
+		 * For bridges, we enable both memory and I/O decoding
+		 * in call cases.
+		 */
+		cmd |= PCI_COMMAND_IO;
+		cmd |= PCI_COMMAND_MEMORY;
+	} else {
+		/*
+		 * For endpoints, we enable memory and/or I/O decoding
+		 * only if they have a memory resource of that type.
+		 */
+		for (i = 0; i < 6; i++) {
+			r = &dev->resource[i];
+			if (r->flags & IORESOURCE_UNSET) {
+				pr_err("PCI: Device %s not available "
+				       "because of resource collisions\n",
+				       pci_name(dev));
+				return -EINVAL;
+			}
+			if (r->flags & IORESOURCE_IO)
+				cmd |= PCI_COMMAND_IO;
+			if (r->flags & IORESOURCE_MEM)
+				cmd |= PCI_COMMAND_MEMORY;
+		}
+	}
+
+	/*
+	 * We only write the command if it changed.
+	 */
+	if (cmd != old_cmd)
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	return 0;
+}
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
+{
+	unsigned long start = pci_resource_start(dev, bar);
+	unsigned long len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (!len)
+		return NULL;
+	if (max && len > max)
+		len = max;
+
+	if (!(flags & IORESOURCE_MEM)) {
+		pr_info("PCI: Trying to map invalid resource %#lx\n", flags);
+		start = 0;
+	}
+
+	return (void __iomem *)start;
+}
+EXPORT_SYMBOL(pci_iomap);
+
+
+/****************************************************************
+ *
+ * Tile PCI config space read/write routines
+ *
+ ****************************************************************/
+
+/*
+ * These are the normal read and write ops
+ * These are expanded with macros from  pci_bus_read_config_byte() etc.
+ *
+ * devfn is the combined PCI slot & function.
+ *
+ * offset is in bytes, from the start of config space for the
+ * specified bus & slot.
+ */
+
+static int __devinit tile_cfg_read(struct pci_bus *bus,
+				   unsigned int devfn,
+				   int offset,
+				   int size,
+				   u32 *val)
+{
+	struct pci_controller *controller = bus->sysdata;
+	int busnum = bus->number & 0xff;
+	int slot = (devfn >> 3) & 0x1f;
+	int function = devfn & 0x7;
+	u32 addr;
+	int config_mode = 1;
+
+	/*
+	 * There is no bridge between the Tile and bus 0, so we
+	 * use config0 to talk to bus 0.
+	 *
+	 * If we're talking to a bus other than zero then we
+	 * must have found a bridge.
+	 */
+	if (busnum == 0) {
+		/*
+		 * We fake an empty slot for (busnum == 0) && (slot > 0),
+		 * since there is only one slot on bus 0.
+		 */
+		if (slot) {
+			*val = 0xFFFFFFFF;
+			return 0;
+		}
+		config_mode = 0;
+	}
+
+	addr = busnum << 20;		/* Bus in 27:20 */
+	addr |= slot << 15;		/* Slot (device) in 19:15 */
+	addr |= function << 12;		/* Function is in 14:12 */
+	addr |= (offset & 0xFFF);	/* byte address in 0:11 */
+
+	return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0,
+			    (HV_VirtAddr)(val), size, addr);
+}
+
+
+/*
+ * See tile_cfg_read() for relevent comments.
+ * Note that "val" is the value to write, not a pointer to that value.
+ */
+static int __devinit tile_cfg_write(struct pci_bus *bus,
+				    unsigned int devfn,
+				    int offset,
+				    int size,
+				    u32 val)
+{
+	struct pci_controller *controller = bus->sysdata;
+	int busnum = bus->number & 0xff;
+	int slot = (devfn >> 3) & 0x1f;
+	int function = devfn & 0x7;
+	u32 addr;
+	int config_mode = 1;
+	HV_VirtAddr valp = (HV_VirtAddr)&val;
+
+	/*
+	 * For bus 0 slot 0 we use config 0 accesses.
+	 */
+	if (busnum == 0) {
+		/*
+		 * We fake an empty slot for (busnum == 0) && (slot > 0),
+		 * since there is only one slot on bus 0.
+		 */
+		if (slot)
+			return 0;
+		config_mode = 0;
+	}
+
+	addr = busnum << 20;		/* Bus in 27:20 */
+	addr |= slot << 15;		/* Slot (device) in 19:15 */
+	addr |= function << 12;		/* Function is in 14:12 */
+	addr |= (offset & 0xFFF);	/* byte address in 0:11 */
+
+#ifdef __BIG_ENDIAN
+	/* Point to the correct part of the 32-bit "val". */
+	valp += 4 - size;
+#endif
+
+	return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0,
+			     valp, size, addr);
+}
+
+
+static struct pci_ops tile_cfg_ops = {
+	.read =         tile_cfg_read,
+	.write =        tile_cfg_write,
+};
+
+
+/*
+ * In the following, each PCI controller's mem_resources[1]
+ * represents its (non-prefetchable) PCI memory resource.
+ * mem_resources[0] and mem_resources[2] refer to its PCI I/O and
+ * prefetchable PCI memory resources, respectively.
+ * For more details, see pci_setup_bridge() in setup-bus.c.
+ * By comparing the target PCI memory address against the
+ * end address of controller 0, we can determine the controller
+ * that should accept the PCI memory access.
+ */
+#define TILE_READ(size, type)						\
+type _tile_read##size(unsigned long addr)				\
+{									\
+	type val;							\
+	int idx = 0;							\
+	if (addr > controllers[0].mem_resources[1].end &&		\
+	    addr > controllers[0].mem_resources[2].end)			\
+		idx = 1;                                                \
+	if (hv_dev_pread(controllers[idx].hv_mem_fd, 0,			\
+			 (HV_VirtAddr)(&val), sizeof(type), addr))	\
+		pr_err("PCI: read %zd bytes at 0x%lX failed\n",		\
+		       sizeof(type), addr);				\
+	return val;							\
+}									\
+EXPORT_SYMBOL(_tile_read##size)
+
+TILE_READ(b, u8);
+TILE_READ(w, u16);
+TILE_READ(l, u32);
+TILE_READ(q, u64);
+
+#define TILE_WRITE(size, type)						\
+void _tile_write##size(type val, unsigned long addr)			\
+{									\
+	int idx = 0;							\
+	if (addr > controllers[0].mem_resources[1].end &&		\
+	    addr > controllers[0].mem_resources[2].end)			\
+		idx = 1;                                                \
+	if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0,		\
+			  (HV_VirtAddr)(&val), sizeof(type), addr))	\
+		pr_err("PCI: write %zd bytes at 0x%lX failed\n",	\
+		       sizeof(type), addr);				\
+}									\
+EXPORT_SYMBOL(_tile_write##size)
+
+TILE_WRITE(b, u8);
+TILE_WRITE(w, u16);
+TILE_WRITE(l, u32);
+TILE_WRITE(q, u64);
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index fb0b3cb..f185736 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -840,7 +840,7 @@
 	for_each_online_node(i)
 		register_one_node(i);
 
-	for_each_present_cpu(i)
+	for (i = 0; i < smp_height * smp_width; ++i)
 		register_cpu(&cpu_devices[i], i);
 
 	return 0;
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 687719d..757407e 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -16,7 +16,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 74d62d0..b949edc 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -18,7 +18,6 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
-#include <linux/smp_lock.h>
 #include <linux/bootmem.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
index 7e76466..e2187d2 100644
--- a/arch/tile/kernel/sys.c
+++ b/arch/tile/kernel/sys.c
@@ -20,7 +20,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c
index 6235283..cc3d9ba 100644
--- a/arch/tile/lib/memchr_32.c
+++ b/arch/tile/lib/memchr_32.c
@@ -18,12 +18,24 @@
 
 void *memchr(const void *s, int c, size_t n)
 {
+	const uint32_t *last_word_ptr;
+	const uint32_t *p;
+	const char *last_byte_ptr;
+	uintptr_t s_int;
+	uint32_t goal, before_mask, v, bits;
+	char *ret;
+
+	if (__builtin_expect(n == 0, 0)) {
+		/* Don't dereference any memory if the array is empty. */
+		return NULL;
+	}
+
 	/* Get an aligned pointer. */
-	const uintptr_t s_int = (uintptr_t) s;
-	const uint32_t *p = (const uint32_t *)(s_int & -4);
+	s_int = (uintptr_t) s;
+	p = (const uint32_t *)(s_int & -4);
 
 	/* Create four copies of the byte for which we are looking. */
-	const uint32_t goal = 0x01010101 * (uint8_t) c;
+	goal = 0x01010101 * (uint8_t) c;
 
 	/* Read the first word, but munge it so that bytes before the array
 	 * will not match goal.
@@ -31,23 +43,14 @@
 	 * Note that this shift count expression works because we know
 	 * shift counts are taken mod 32.
 	 */
-	const uint32_t before_mask = (1 << (s_int << 3)) - 1;
-	uint32_t v = (*p | before_mask) ^ (goal & before_mask);
+	before_mask = (1 << (s_int << 3)) - 1;
+	v = (*p | before_mask) ^ (goal & before_mask);
 
 	/* Compute the address of the last byte. */
-	const char *const last_byte_ptr = (const char *)s + n - 1;
+	last_byte_ptr = (const char *)s + n - 1;
 
 	/* Compute the address of the word containing the last byte. */
-	const uint32_t *const last_word_ptr =
-	    (const uint32_t *)((uintptr_t) last_byte_ptr & -4);
-
-	uint32_t bits;
-	char *ret;
-
-	if (__builtin_expect(n == 0, 0)) {
-		/* Don't dereference any memory if the array is empty. */
-		return NULL;
-	}
+	last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4);
 
 	while ((bits = __insn_seqb(v, goal)) == 0) {
 		if (__builtin_expect(p == last_word_ptr, 0)) {
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 485e24d..5cd1c40 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -167,23 +167,30 @@
 	 * when we compare them.
 	 */
 	u32 my_ticket_;
-
-	/* Take out the next ticket; this will also stop would-be readers. */
-	if (val & 1)
-		val = get_rwlock(rwlock);
-	rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT);
-
-	/* Extract my ticket value from the original word. */
-	my_ticket_ = val >> WR_NEXT_SHIFT;
+	u32 iterations = 0;
 
 	/*
-	 * Wait until the "current" field matches our ticket, and
-	 * there are no remaining readers.
+	 * Wait until there are no readers, then bump up the next
+	 * field and capture the ticket value.
 	 */
 	for (;;) {
+		if (!(val & 1)) {
+			if ((val >> RD_COUNT_SHIFT) == 0)
+				break;
+			rwlock->lock = val;
+		}
+		delay_backoff(iterations++);
+		val = __insn_tns((int *)&rwlock->lock);
+	}
+
+	/* Take out the next ticket and extract my ticket value. */
+	rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT);
+	my_ticket_ = val >> WR_NEXT_SHIFT;
+
+	/* Wait until the "current" field matches our ticket. */
+	for (;;) {
 		u32 curr_ = val >> WR_CURR_SHIFT;
-		u32 readers = val >> RD_COUNT_SHIFT;
-		u32 delta = ((my_ticket_ - curr_) & WR_MASK) + !!readers;
+		u32 delta = ((my_ticket_ - curr_) & WR_MASK);
 		if (likely(delta == 0))
 			break;
 
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index f295b4a..dcebfc8 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -24,7 +24,6 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/tty.h>
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 24688b6..201a582 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -21,7 +21,6 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/sysctl.h>
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 7f7338c..1664cce 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -727,6 +727,9 @@
 
 static void free_winch(struct winch *winch, int free_irq_ok)
 {
+	if (free_irq_ok)
+		free_irq(WINCH_IRQ, winch);
+
 	list_del(&winch->list);
 
 	if (winch->pid != -1)
@@ -735,8 +738,6 @@
 		os_close_file(winch->fd);
 	if (winch->stack != 0)
 		free_stack(winch->stack, 0);
-	if (free_irq_ok)
-		free_irq(WINCH_IRQ, winch);
 	kfree(winch);
 }
 
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 340268b..09bd7b5 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -5,7 +5,6 @@
 
 #include "linux/stddef.h"
 #include "linux/fs.h"
-#include "linux/smp_lock.h"
 #include "linux/ptrace.h"
 #include "linux/sched.h"
 #include "linux/slab.h"
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 849813f..5852519 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -28,7 +28,6 @@
 #include <linux/syscalls.h>
 #include <linux/times.h>
 #include <linux/utsname.h>
-#include <linux/smp_lock.h>
 #include <linux/mm.h>
 #include <linux/uio.h>
 #include <linux/poll.h>
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 4d293dc..9479a03 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -216,8 +216,8 @@
 }
 
 /* Return an pointer with offset calculated */
-static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx,
-				phys_addr_t phys, pgprot_t flags)
+static __always_inline unsigned long
+__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
 {
 	__set_fixmap(idx, phys, flags);
 	return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index e8506c1..1c10c88 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -61,9 +61,9 @@
 #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
 #endif
 
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT)
 
 /* Maximum number of virtual CPUs in multi-processor guests. */
 #define MAX_VIRT_CPUS 32
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index 42a7e00..8413688 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -32,6 +32,11 @@
 /* And the trap vector is... */
 #define TRAP_INSTR "int $0x82"
 
+#define __MACH2PHYS_VIRT_START 0xF5800000
+#define __MACH2PHYS_VIRT_END   0xF6800000
+
+#define __MACH2PHYS_SHIFT      2
+
 /*
  * Virtual addresses beyond this are not modifiable by guest OSes. The
  * machine->physical mapping table starts at this address, read-only.
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index 100d266..839a481 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -39,18 +39,7 @@
 #define __HYPERVISOR_VIRT_END   0xFFFF880000000000
 #define __MACH2PHYS_VIRT_START  0xFFFF800000000000
 #define __MACH2PHYS_VIRT_END    0xFFFF804000000000
-
-#ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
-#define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
-#endif
-
-#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
-#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
-#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
+#define __MACH2PHYS_SHIFT       3
 
 /*
  * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index dd8c141..8760cc6 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -5,6 +5,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/pfn.h>
+#include <linux/mm.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -35,6 +36,8 @@
 #define MAX_DOMAIN_PAGES						\
     ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
 
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int   machine_to_phys_order;
 
 extern unsigned long get_phys_to_machine(unsigned long pfn);
 extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -69,10 +72,8 @@
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return mfn;
 
-#if 0
 	if (unlikely((mfn >> machine_to_phys_order) != 0))
-		return max_mapnr;
-#endif
+		return ~0;
 
 	pfn = 0;
 	/*
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 1b7b31a..212a6a4 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -33,7 +33,6 @@
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/major.h>
 #include <linux/fs.h>
 #include <linux/device.h>
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index ec592ca..cd21b65 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -315,14 +315,18 @@
 		if (!breakinfo[i].enabled)
 			continue;
 		bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
-		if (bp->attr.disabled == 1)
+		if (!bp->attr.disabled) {
+			arch_uninstall_hw_breakpoint(bp);
+			bp->attr.disabled = 1;
 			continue;
+		}
 		if (dbg_is_early)
 			early_dr7 &= ~encode_dr7(i, breakinfo[i].len,
 						 breakinfo[i].type);
-		else
-			arch_uninstall_hw_breakpoint(bp);
-		bp->attr.disabled = 1;
+		else if (hw_break_release_slot(i))
+			printk(KERN_ERR "KGDB: hw bpt remove failed %lx\n",
+			       breakinfo[i].addr);
+		breakinfo[i].enabled = 0;
 	}
 }
 
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7bf2dc4..12fcbe2 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -30,7 +30,6 @@
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/major.h>
 #include <linux/fs.h>
 #include <linux/device.h>
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 82e144a..1ca1229 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3395,6 +3395,7 @@
 	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
 
 	load_host_msrs(vcpu);
+	kvm_load_ldt(ldt_selector);
 	loadsegment(fs, fs_selector);
 #ifdef CONFIG_X86_64
 	load_gs_index(gs_selector);
@@ -3402,7 +3403,6 @@
 #else
 	loadsegment(gs, gs_selector);
 #endif
-	kvm_load_ldt(ldt_selector);
 
 	reload_tss(vcpu);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8da0e45..ff21fdd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -821,10 +821,9 @@
 #endif
 
 #ifdef CONFIG_X86_64
-	if (is_long_mode(&vmx->vcpu)) {
-		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+	if (is_long_mode(&vmx->vcpu))
 		wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
-	}
 #endif
 	for (i = 0; i < vmx->save_nmsrs; ++i)
 		kvm_set_shared_msr(vmx->guest_msrs[i].index,
@@ -839,23 +838,23 @@
 
 	++vmx->vcpu.stat.host_state_reload;
 	vmx->host_state.loaded = 0;
-	if (vmx->host_state.fs_reload_needed)
-		loadsegment(fs, vmx->host_state.fs_sel);
+#ifdef CONFIG_X86_64
+	if (is_long_mode(&vmx->vcpu))
+		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+#endif
 	if (vmx->host_state.gs_ldt_reload_needed) {
 		kvm_load_ldt(vmx->host_state.ldt_sel);
 #ifdef CONFIG_X86_64
 		load_gs_index(vmx->host_state.gs_sel);
-		wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
 #else
 		loadsegment(gs, vmx->host_state.gs_sel);
 #endif
 	}
+	if (vmx->host_state.fs_reload_needed)
+		loadsegment(fs, vmx->host_state.fs_sel);
 	reload_tss();
 #ifdef CONFIG_X86_64
-	if (is_long_mode(&vmx->vcpu)) {
-		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
-		wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
-	}
+	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
 #endif
 	if (current_thread_info()->status & TS_USEDFPU)
 		clts();
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 15466c0..0972315 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -138,7 +138,6 @@
 	struct acpi_resource_address64 addr;
 	acpi_status status;
 	unsigned long flags;
-	struct resource *root, *conflict;
 	u64 start, end;
 
 	status = resource_to_addr(acpi_res, &addr);
@@ -146,12 +145,10 @@
 		return AE_OK;
 
 	if (addr.resource_type == ACPI_MEMORY_RANGE) {
-		root = &iomem_resource;
 		flags = IORESOURCE_MEM;
 		if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY)
 			flags |= IORESOURCE_PREFETCH;
 	} else if (addr.resource_type == ACPI_IO_RANGE) {
-		root = &ioport_resource;
 		flags = IORESOURCE_IO;
 	} else
 		return AE_OK;
@@ -172,27 +169,92 @@
 		return AE_OK;
 	}
 
-	conflict = insert_resource_conflict(root, res);
-	if (conflict) {
-		dev_err(&info->bridge->dev,
-			"address space collision: host bridge window %pR "
-			"conflicts with %s %pR\n",
-			res, conflict->name, conflict);
-	} else {
-		pci_bus_add_resource(info->bus, res, 0);
-		info->res_num++;
-		if (addr.translation_offset)
-			dev_info(&info->bridge->dev, "host bridge window %pR "
-				 "(PCI address [%#llx-%#llx])\n",
-				 res, res->start - addr.translation_offset,
-				 res->end - addr.translation_offset);
-		else
-			dev_info(&info->bridge->dev,
-				 "host bridge window %pR\n", res);
-	}
+	info->res_num++;
+	if (addr.translation_offset)
+		dev_info(&info->bridge->dev, "host bridge window %pR "
+			 "(PCI address [%#llx-%#llx])\n",
+			 res, res->start - addr.translation_offset,
+			 res->end - addr.translation_offset);
+	else
+		dev_info(&info->bridge->dev, "host bridge window %pR\n", res);
+
 	return AE_OK;
 }
 
+static bool resource_contains(struct resource *res, resource_size_t point)
+{
+	if (res->start <= point && point <= res->end)
+		return true;
+	return false;
+}
+
+static void coalesce_windows(struct pci_root_info *info, int type)
+{
+	int i, j;
+	struct resource *res1, *res2;
+
+	for (i = 0; i < info->res_num; i++) {
+		res1 = &info->res[i];
+		if (!(res1->flags & type))
+			continue;
+
+		for (j = i + 1; j < info->res_num; j++) {
+			res2 = &info->res[j];
+			if (!(res2->flags & type))
+				continue;
+
+			/*
+			 * I don't like throwing away windows because then
+			 * our resources no longer match the ACPI _CRS, but
+			 * the kernel resource tree doesn't allow overlaps.
+			 */
+			if (resource_contains(res1, res2->start) ||
+			    resource_contains(res1, res2->end) ||
+			    resource_contains(res2, res1->start) ||
+			    resource_contains(res2, res1->end)) {
+				res1->start = min(res1->start, res2->start);
+				res1->end = max(res1->end, res2->end);
+				dev_info(&info->bridge->dev,
+					 "host bridge window expanded to %pR; %pR ignored\n",
+					 res1, res2);
+				res2->flags = 0;
+			}
+		}
+	}
+}
+
+static void add_resources(struct pci_root_info *info)
+{
+	int i;
+	struct resource *res, *root, *conflict;
+
+	if (!pci_use_crs)
+		return;
+
+	coalesce_windows(info, IORESOURCE_MEM);
+	coalesce_windows(info, IORESOURCE_IO);
+
+	for (i = 0; i < info->res_num; i++) {
+		res = &info->res[i];
+
+		if (res->flags & IORESOURCE_MEM)
+			root = &iomem_resource;
+		else if (res->flags & IORESOURCE_IO)
+			root = &ioport_resource;
+		else
+			continue;
+
+		conflict = insert_resource_conflict(root, res);
+		if (conflict)
+			dev_err(&info->bridge->dev,
+				"address space collision: host bridge window %pR "
+				"conflicts with %s %pR\n",
+				res, conflict->name, conflict);
+		else
+			pci_bus_add_resource(info->bus, res, 0);
+	}
+}
+
 static void
 get_current_resources(struct acpi_device *device, int busnum,
 			int domain, struct pci_bus *bus)
@@ -224,6 +286,7 @@
 	acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
 				&info);
 
+	add_resources(&info);
 	return;
 
 name_alloc_fail:
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 235c0f4..02c710b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,6 +75,11 @@
 enum xen_domain_type xen_domain_type = XEN_NATIVE;
 EXPORT_SYMBOL_GPL(xen_domain_type);
 
+unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int   machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
 struct start_info *xen_start_info;
 EXPORT_SYMBOL_GPL(xen_start_info);
 
@@ -1090,6 +1095,8 @@
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
+	struct physdev_set_iopl set_iopl;
+	int rc;
 	pgd_t *pgd;
 
 	if (!xen_start_info)
@@ -1097,6 +1104,8 @@
 
 	xen_domain_type = XEN_PV_DOMAIN;
 
+	xen_setup_machphys_mapping();
+
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
 	pv_init_ops = xen_init_ops;
@@ -1191,8 +1200,6 @@
 	/* Allocate and initialize top and mid mfn levels for p2m structure */
 	xen_build_mfn_list_list();
 
-	init_mm.pgd = pgd;
-
 	/* keep using Xen gdt for now; no urgent need to change it */
 
 #ifdef CONFIG_X86_32
@@ -1202,10 +1209,18 @@
 #else
 	pv_info.kernel_rpl = 0;
 #endif
-
 	/* set the limit of our address space */
 	xen_reserve_top();
 
+	/* We used to do this in xen_arch_setup, but that is too late on AMD
+	 * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
+	 * which pokes 0xcf8 port.
+	 */
+	set_iopl.iopl = 1;
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+	if (rc != 0)
+		xen_raw_printk("physdev_op failed %d\n", rc);
+
 #ifdef CONFIG_X86_32
 	/* set up basic CPUID stuff */
 	cpu_detect(&new_cpu_data);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 21ed8d7..a1feff9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2034,6 +2034,20 @@
 	set_page_prot(pmd, PAGE_KERNEL_RO);
 }
 
+void __init xen_setup_machphys_mapping(void)
+{
+	struct xen_machphys_mapping mapping;
+	unsigned long machine_to_phys_nr_ents;
+
+	if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+		machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+		machine_to_phys_nr_ents = mapping.max_mfn + 1;
+	} else {
+		machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+	}
+	machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+}
+
 #ifdef CONFIG_X86_64
 static void convert_pfn_mfn(void *v)
 {
@@ -2119,44 +2133,83 @@
 	return pgd;
 }
 #else	/* !CONFIG_X86_64 */
-static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD);
+static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
+static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
+
+static __init void xen_write_cr3_init(unsigned long cr3)
+{
+	unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
+
+	BUG_ON(read_cr3() != __pa(initial_page_table));
+	BUG_ON(cr3 != __pa(swapper_pg_dir));
+
+	/*
+	 * We are switching to swapper_pg_dir for the first time (from
+	 * initial_page_table) and therefore need to mark that page
+	 * read-only and then pin it.
+	 *
+	 * Xen disallows sharing of kernel PMDs for PAE
+	 * guests. Therefore we must copy the kernel PMD from
+	 * initial_page_table into a new kernel PMD to be used in
+	 * swapper_pg_dir.
+	 */
+	swapper_kernel_pmd =
+		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
+	memcpy(swapper_kernel_pmd, initial_kernel_pmd,
+	       sizeof(pmd_t) * PTRS_PER_PMD);
+	swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
+		__pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
+	set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
+
+	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+	xen_write_cr3(cr3);
+	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
+
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
+			  PFN_DOWN(__pa(initial_page_table)));
+	set_page_prot(initial_page_table, PAGE_KERNEL);
+	set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
+
+	pv_mmu_ops.write_cr3 = &xen_write_cr3;
+}
 
 __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
 
-	level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
+	initial_kernel_pmd =
+		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
 
 	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
 				  xen_start_info->nr_pt_frames * PAGE_SIZE +
 				  512*1024);
 
 	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
-	memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+	memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
 
-	xen_map_identity_early(level2_kernel_pgt, max_pfn);
+	xen_map_identity_early(initial_kernel_pmd, max_pfn);
 
-	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
-	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
-			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+	memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+	initial_page_table[KERNEL_PGD_BOUNDARY] =
+		__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
 
-	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
-	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+	set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
+	set_page_prot(initial_page_table, PAGE_KERNEL_RO);
 	set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
 
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
-	xen_write_cr3(__pa(swapper_pg_dir));
-
-	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
+			  PFN_DOWN(__pa(initial_page_table)));
+	xen_write_cr3(__pa(initial_page_table));
 
 	memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
 		      __pa(xen_start_info->pt_base +
 			   xen_start_info->nr_pt_frames * PAGE_SIZE),
 		      "XEN PAGETABLES");
 
-	return swapper_pg_dir;
+	return initial_page_table;
 }
 #endif	/* CONFIG_X86_64 */
 
@@ -2290,7 +2343,11 @@
 	.write_cr2 = xen_write_cr2,
 
 	.read_cr3 = xen_read_cr3,
+#ifdef CONFIG_X86_32
+	.write_cr3 = xen_write_cr3_init,
+#else
 	.write_cr3 = xen_write_cr3,
+#endif
 
 	.flush_tlb_user = xen_flush_tlb,
 	.flush_tlb_kernel = xen_flush_tlb,
@@ -2627,7 +2684,8 @@
 
 	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
 
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
+				(VM_PFNMAP | VM_RESERVED | VM_IO)));
 
 	rmd.mfn = mfn;
 	rmd.prot = prot;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 769c4b0..01afd8a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -23,7 +23,6 @@
 #include <xen/interface/callback.h>
 #include <xen/interface/memory.h>
 #include <xen/interface/physdev.h>
-#include <xen/interface/memory.h>
 #include <xen/features.h>
 
 #include "xen-ops.h"
@@ -248,8 +247,7 @@
 	else
 		extra_pages = 0;
 
-	if (!xen_initial_domain())
-		xen_add_extra_mem(extra_pages);
+	xen_add_extra_mem(extra_pages);
 
 	return "Xen";
 }
@@ -337,9 +335,6 @@
 
 void __init xen_arch_setup(void)
 {
-	struct physdev_set_iopl set_iopl;
-	int rc;
-
 	xen_panic_handler_init();
 
 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
@@ -356,11 +351,6 @@
 	xen_enable_sysenter();
 	xen_enable_syscall();
 
-	set_iopl.iopl = 1;
-	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-	if (rc != 0)
-		printk(KERN_INFO "physdev_op failed %d\n", rc);
-
 #ifdef CONFIG_ACPI
 	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
 		printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 58c6ee5b..cc3eb78 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -8,7 +8,6 @@
 #include <linux/hdreg.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
-#include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
 
diff --git a/block/ioctl.c b/block/ioctl.c
index 3d866d0..a9a302e 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -5,7 +5,6 @@
 #include <linux/hdreg.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
 #include <linux/blktrace_api.h>
 #include <asm/uaccess.h>
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 3f91c01..66aa4be 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3166,8 +3166,8 @@
 
 /**
  *	ata_scsi_queuecmd - Issue SCSI cdb to libata-managed device
+ *	@shost: SCSI host of command to be sent
  *	@cmd: SCSI command to be sent
- *	@done: Completion function, called when command is complete
  *
  *	In some cases, this function translates SCSI commands into
  *	ATA taskfiles, and queues the taskfiles to be sent to
@@ -3177,37 +3177,36 @@
  *	ATA and ATAPI devices appearing as SCSI devices.
  *
  *	LOCKING:
- *	Releases scsi-layer-held lock, and obtains host lock.
+ *	ATA host lock
  *
  *	RETURNS:
  *	Return value from __ata_scsi_queuecmd() if @cmd can be queued,
  *	0 otherwise.
  */
-int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 {
 	struct ata_port *ap;
 	struct ata_device *dev;
 	struct scsi_device *scsidev = cmd->device;
-	struct Scsi_Host *shost = scsidev->host;
 	int rc = 0;
+	unsigned long irq_flags;
 
 	ap = ata_shost_to_port(shost);
 
-	spin_unlock(shost->host_lock);
-	spin_lock(ap->lock);
+	spin_lock_irqsave(ap->lock, irq_flags);
 
 	ata_scsi_dump_cdb(ap, cmd);
 
 	dev = ata_scsi_find_dev(ap, scsidev);
 	if (likely(dev))
-		rc = __ata_scsi_queuecmd(cmd, done, dev);
+		rc = __ata_scsi_queuecmd(cmd, cmd->scsi_done, dev);
 	else {
 		cmd->result = (DID_BAD_TARGET << 16);
-		done(cmd);
+		cmd->scsi_done(cmd);
 	}
 
-	spin_unlock(ap->lock);
-	spin_lock(shost->host_lock);
+	spin_unlock_irqrestore(ap->lock, irq_flags);
+
 	return rc;
 }
 
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index c215899..8b677bb 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -538,7 +538,7 @@
 	return 0;
 }
 
-static void svia_configure(struct pci_dev *pdev)
+static void svia_configure(struct pci_dev *pdev, int board_id)
 {
 	u8 tmp8;
 
@@ -577,7 +577,7 @@
 	}
 
 	/*
-	 * vt6421 has problems talking to some drives.  The following
+	 * vt6420/1 has problems talking to some drives.  The following
 	 * is the fix from Joseph Chan <JosephChan@via.com.tw>.
 	 *
 	 * When host issues HOLD, device may send up to 20DW of data
@@ -596,8 +596,9 @@
 	 *
 	 * https://bugzilla.kernel.org/show_bug.cgi?id=15173
 	 * http://article.gmane.org/gmane.linux.ide/46352
+	 * http://thread.gmane.org/gmane.linux.kernel/1062139
 	 */
-	if (pdev->device == 0x3249) {
+	if (board_id == vt6420 || board_id == vt6421) {
 		pci_read_config_byte(pdev, 0x52, &tmp8);
 		tmp8 |= 1 << 2;
 		pci_write_config_byte(pdev, 0x52, tmp8);
@@ -652,7 +653,7 @@
 	if (rc)
 		return rc;
 
-	svia_configure(pdev);
+	svia_configure(pdev, board_id);
 
 	pci_set_master(pdev);
 	return ata_host_activate(host, pdev->irq, ata_bmdma_interrupt,
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 31b5266..ead3e79 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -475,20 +475,33 @@
  */
 void dpm_resume_noirq(pm_message_t state)
 {
-	struct device *dev;
+	struct list_head list;
 	ktime_t starttime = ktime_get();
 
+	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
 	transition_started = false;
-	list_for_each_entry(dev, &dpm_list, power.entry)
+	while (!list_empty(&dpm_list)) {
+		struct device *dev = to_device(dpm_list.next);
+
+		get_device(dev);
 		if (dev->power.status > DPM_OFF) {
 			int error;
 
 			dev->power.status = DPM_OFF;
+			mutex_unlock(&dpm_list_mtx);
+
 			error = device_resume_noirq(dev, state);
+
+			mutex_lock(&dpm_list_mtx);
 			if (error)
 				pm_dev_err(dev, state, " early", error);
 		}
+		if (!list_empty(&dev->power.entry))
+			list_move_tail(&dev->power.entry, &list);
+		put_device(dev);
+	}
+	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
 	dpm_show_time(starttime, state, "early");
 	resume_device_irqs();
@@ -789,20 +802,33 @@
  */
 int dpm_suspend_noirq(pm_message_t state)
 {
-	struct device *dev;
+	struct list_head list;
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	INIT_LIST_HEAD(&list);
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
-	list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
+	while (!list_empty(&dpm_list)) {
+		struct device *dev = to_device(dpm_list.prev);
+
+		get_device(dev);
+		mutex_unlock(&dpm_list_mtx);
+
 		error = device_suspend_noirq(dev, state);
+
+		mutex_lock(&dpm_list_mtx);
 		if (error) {
 			pm_dev_err(dev, state, " late", error);
+			put_device(dev);
 			break;
 		}
 		dev->power.status = DPM_OFF_IRQ;
+		if (!list_empty(&dev->power.entry))
+			list_move(&dev->power.entry, &list);
+		put_device(dev);
 	}
+	list_splice_tail(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
 	if (error)
 		dpm_resume_noirq(resume_event(state));
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 575495f..727d022 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -62,8 +62,8 @@
 		int length, 	   /* length of data in buffer */
 		int func);	   /* 0 == read, 1 == write */
 
-static int cciss_scsi_queue_command (struct scsi_cmnd *cmd,
-		void (* done)(struct scsi_cmnd *));
+static int cciss_scsi_queue_command (struct Scsi_Host *h,
+				     struct scsi_cmnd *cmd);
 static int cciss_eh_device_reset_handler(struct scsi_cmnd *);
 static int cciss_eh_abort_handler(struct scsi_cmnd *);
 
@@ -1406,7 +1406,7 @@
 
 
 static int
-cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
+cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
 	ctlr_info_t *h;
 	int rc;
@@ -1504,6 +1504,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(cciss_scsi_queue_command)
+
 static void cciss_unregister_scsi(ctlr_info_t *h)
 {
 	struct cciss_scsi_adapter_data_t *sa;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index d299fe9..89d8a7c 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -36,7 +36,6 @@
 #include <linux/memcontrol.h>
 #include <linux/mm_inline.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/pkt_sched.h>
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b0551ba..47d223c 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -26,7 +26,6 @@
 #include <linux/module.h>
 #include <linux/drbd.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/memcontrol.h>
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index 43412c0..3cb4539 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -39,7 +39,6 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include "agp.h"
diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c
index c0bd6f4..6ee3348 100644
--- a/drivers/char/amiserial.c
+++ b/drivers/char/amiserial.c
@@ -81,7 +81,6 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/bitops.h>
 #include <linux/platform_device.h>
diff --git a/drivers/char/briq_panel.c b/drivers/char/briq_panel.c
index f6718f0..095ab90 100644
--- a/drivers/char/briq_panel.c
+++ b/drivers/char/briq_panel.c
@@ -6,7 +6,6 @@
 
 #include <linux/module.h>
 
-#include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/tty.h>
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 55b8667..7066e80 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/miscdevice.h>
 #include <linux/major.h>
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 788da05..2016aad 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -37,7 +37,6 @@
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/miscdevice.h>
 #include <linux/delay.h>
diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index f0863be..d72433f 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -120,7 +120,7 @@
 	int eax = regs->eax;
 
 #if defined(CONFIG_X86_64)
-	asm("pushq %%rax\n\t"
+	asm volatile("pushq %%rax\n\t"
 		"movl 0(%%rax),%%edx\n\t"
 		"pushq %%rdx\n\t"
 		"movl 4(%%rax),%%ebx\n\t"
@@ -142,11 +142,11 @@
 		"lahf\n\t"
 		"shrl $8,%%eax\n\t"
 		"andl $1,%%eax\n"
-		:"=a"(rc), "+m" (*regs)
+		:"=a"(rc)
 		:    "a"(regs)
 		:    "%ebx", "%ecx", "%edx", "%esi", "%edi", "memory");
 #else
-	asm("pushl %%eax\n\t"
+	asm volatile("pushl %%eax\n\t"
 	    "movl 0(%%eax),%%edx\n\t"
 	    "push %%edx\n\t"
 	    "movl 4(%%eax),%%ebx\n\t"
@@ -168,7 +168,7 @@
 	    "lahf\n\t"
 	    "shrl $8,%%eax\n\t"
 	    "andl $1,%%eax\n"
-	    :"=a"(rc), "+m" (*regs)
+	    :"=a"(rc)
 	    :    "a"(regs)
 	    :    "%ebx", "%ecx", "%edx", "%esi", "%edi", "memory");
 #endif
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 667abd23..7c6de4c 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
index f646725..748c3b0 100644
--- a/drivers/char/serial167.c
+++ b/drivers/char/serial167.c
@@ -52,7 +52,6 @@
 #include <linux/interrupt.h>
 #include <linux/serial.h>
 #include <linux/serialP.h>
-#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/fcntl.h>
 #include <linux/ptrace.h>
diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c
index 9f8495b..a7616d2 100644
--- a/drivers/char/specialix.c
+++ b/drivers/char/specialix.c
@@ -87,7 +87,6 @@
 #include <linux/tty_flip.h>
 #include <linux/mm.h>
 #include <linux/serial.h>
-#include <linux/smp_lock.h>
 #include <linux/fcntl.h>
 #include <linux/major.h>
 #include <linux/delay.h>
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 4bef6ab..461a5a0 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -40,7 +40,6 @@
 #include <linux/stallion.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/ctype.h>
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index e53f168..a786326 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -216,7 +216,6 @@
 #include <linux/eisa.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/miscdevice.h>
 #include <linux/bitops.h>
diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c
index 493b47a..956ebe2 100644
--- a/drivers/char/uv_mmtimer.c
+++ b/drivers/char/uv_mmtimer.c
@@ -23,7 +23,6 @@
 #include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/math64.h>
-#include <linux/smp_lock.h>
 
 #include <asm/genapic.h>
 #include <asm/uv/uv_hub.h>
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 6c1b676..896a2ce 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1547,31 +1547,16 @@
 	nr_queues = use_multiport(portdev) ? (nr_ports + 1) * 2 : 2;
 
 	vqs = kmalloc(nr_queues * sizeof(struct virtqueue *), GFP_KERNEL);
-	if (!vqs) {
-		err = -ENOMEM;
-		goto fail;
-	}
 	io_callbacks = kmalloc(nr_queues * sizeof(vq_callback_t *), GFP_KERNEL);
-	if (!io_callbacks) {
-		err = -ENOMEM;
-		goto free_vqs;
-	}
 	io_names = kmalloc(nr_queues * sizeof(char *), GFP_KERNEL);
-	if (!io_names) {
-		err = -ENOMEM;
-		goto free_callbacks;
-	}
 	portdev->in_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *),
 				  GFP_KERNEL);
-	if (!portdev->in_vqs) {
-		err = -ENOMEM;
-		goto free_names;
-	}
 	portdev->out_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *),
 				   GFP_KERNEL);
-	if (!portdev->out_vqs) {
+	if (!vqs || !io_callbacks || !io_names || !portdev->in_vqs ||
+			!portdev->out_vqs) {
 		err = -ENOMEM;
-		goto free_invqs;
+		goto free;
 	}
 
 	/*
@@ -1605,7 +1590,7 @@
 					      io_callbacks,
 					      (const char **)io_names);
 	if (err)
-		goto free_outvqs;
+		goto free;
 
 	j = 0;
 	portdev->in_vqs[0] = vqs[0];
@@ -1621,23 +1606,19 @@
 			portdev->out_vqs[i] = vqs[j + 1];
 		}
 	}
-	kfree(io_callbacks);
 	kfree(io_names);
+	kfree(io_callbacks);
 	kfree(vqs);
 
 	return 0;
 
-free_names:
-	kfree(io_names);
-free_callbacks:
-	kfree(io_callbacks);
-free_outvqs:
+free:
 	kfree(portdev->out_vqs);
-free_invqs:
 	kfree(portdev->in_vqs);
-free_vqs:
+	kfree(io_names);
+	kfree(io_callbacks);
 	kfree(vqs);
-fail:
+
 	return err;
 }
 
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index bfae4b3..afa576a 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1468,7 +1468,7 @@
 
 /* SCSI stack integration */
 
-static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done)
+static int sbp2_scsi_queuecommand_lck(struct scsi_cmnd *cmd, scsi_done_fn_t done)
 {
 	struct sbp2_logical_unit *lu = cmd->device->hostdata;
 	struct fw_device *device = target_device(lu->tgt);
@@ -1534,6 +1534,8 @@
 	return retval;
 }
 
+static DEF_SCSI_QCMD(sbp2_scsi_queuecommand)
+
 static int sbp2_scsi_slave_alloc(struct scsi_device *sdev)
 {
 	struct sbp2_logical_unit *lu = sdev->hostdata;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index b744dad..a39794b 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -37,7 +37,6 @@
 #include "drmP.h"
 #include <linux/poll.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 
 /* from BKL pushdown: note that nothing else serializes idr_find() */
 DEFINE_MUTEX(drm_global_mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 80745f8..f737960 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -150,7 +150,8 @@
 
 static const struct intel_device_info intel_ironlake_m_info = {
 	.gen = 5, .is_mobile = 1,
-	.need_gfx_hws = 1, .has_fbc = 1, .has_rc6 = 1, .has_hotplug = 1,
+	.need_gfx_hws = 1, .has_rc6 = 1, .has_hotplug = 1,
+	.has_fbc = 0, /* disabled due to buggy hardware */
 	.has_bsd_ring = 1,
 };
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 90414ae..409826d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1045,6 +1045,8 @@
 int i915_gem_object_set_domain(struct drm_gem_object *obj,
 			       uint32_t read_domains,
 			       uint32_t write_domain);
+int i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj,
+			      bool interruptible);
 int i915_gem_init_ringbuffer(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ef188e3..17b1cba 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -547,6 +547,19 @@
 	struct drm_i915_gem_object *obj_priv;
 	int ret = 0;
 
+	if (args->size == 0)
+		return 0;
+
+	if (!access_ok(VERIFY_WRITE,
+		       (char __user *)(uintptr_t)args->data_ptr,
+		       args->size))
+		return -EFAULT;
+
+	ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
+				       args->size);
+	if (ret)
+		return -EFAULT;
+
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ret;
@@ -564,23 +577,6 @@
 		goto out;
 	}
 
-	if (args->size == 0)
-		goto out;
-
-	if (!access_ok(VERIFY_WRITE,
-		       (char __user *)(uintptr_t)args->data_ptr,
-		       args->size)) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
-				       args->size);
-	if (ret) {
-		ret = -EFAULT;
-		goto out;
-	}
-
 	ret = i915_gem_object_get_pages_or_evict(obj);
 	if (ret)
 		goto out;
@@ -981,7 +977,20 @@
 	struct drm_i915_gem_pwrite *args = data;
 	struct drm_gem_object *obj;
 	struct drm_i915_gem_object *obj_priv;
-	int ret = 0;
+	int ret;
+
+	if (args->size == 0)
+		return 0;
+
+	if (!access_ok(VERIFY_READ,
+		       (char __user *)(uintptr_t)args->data_ptr,
+		       args->size))
+		return -EFAULT;
+
+	ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
+				      args->size);
+	if (ret)
+		return -EFAULT;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -994,30 +1003,12 @@
 	}
 	obj_priv = to_intel_bo(obj);
 
-
 	/* Bounds check destination. */
 	if (args->offset > obj->size || args->size > obj->size - args->offset) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	if (args->size == 0)
-		goto out;
-
-	if (!access_ok(VERIFY_READ,
-		       (char __user *)(uintptr_t)args->data_ptr,
-		       args->size)) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
-				      args->size);
-	if (ret) {
-		ret = -EFAULT;
-		goto out;
-	}
-
 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
 	 * it would end up going through the fenced access, and we'll get
 	 * different detiling behavior between reading and writing.
@@ -2907,6 +2898,20 @@
 	return 0;
 }
 
+int
+i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj,
+			  bool interruptible)
+{
+	if (!obj->active)
+		return 0;
+
+	if (obj->base.write_domain & I915_GEM_GPU_DOMAINS)
+		i915_gem_flush_ring(obj->base.dev, NULL, obj->ring,
+				    0, obj->base.write_domain);
+
+	return i915_gem_object_wait_rendering(&obj->base, interruptible);
+}
+
 /**
  * Moves a single object to the CPU read, and possibly write domain.
  *
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index c55c770..8df5743 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -34,6 +34,25 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 
+/* Here's the desired hotplug mode */
+#define ADPA_HOTPLUG_BITS (ADPA_CRT_HOTPLUG_PERIOD_128 |		\
+			   ADPA_CRT_HOTPLUG_WARMUP_10MS |		\
+			   ADPA_CRT_HOTPLUG_SAMPLE_4S |			\
+			   ADPA_CRT_HOTPLUG_VOLTAGE_50 |		\
+			   ADPA_CRT_HOTPLUG_VOLREF_325MV |		\
+			   ADPA_CRT_HOTPLUG_ENABLE)
+
+struct intel_crt {
+	struct intel_encoder base;
+	bool force_hotplug_required;
+};
+
+static struct intel_crt *intel_attached_crt(struct drm_connector *connector)
+{
+	return container_of(intel_attached_encoder(connector),
+			    struct intel_crt, base);
+}
+
 static void intel_crt_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
@@ -129,7 +148,7 @@
 			   dpll_md & ~DPLL_MD_UDI_MULTIPLIER_MASK);
 	}
 
-	adpa = 0;
+	adpa = ADPA_HOTPLUG_BITS;
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
 		adpa |= ADPA_HSYNC_ACTIVE_HIGH;
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
@@ -157,53 +176,44 @@
 static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
+	struct intel_crt *crt = intel_attached_crt(connector);
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 adpa, temp;
+	u32 adpa;
 	bool ret;
-	bool turn_off_dac = false;
 
-	temp = adpa = I915_READ(PCH_ADPA);
+	/* The first time through, trigger an explicit detection cycle */
+	if (crt->force_hotplug_required) {
+		bool turn_off_dac = HAS_PCH_SPLIT(dev);
+		u32 save_adpa;
 
-	if (HAS_PCH_SPLIT(dev))
-		turn_off_dac = true;
+		crt->force_hotplug_required = 0;
 
-	adpa &= ~ADPA_CRT_HOTPLUG_MASK;
-	if (turn_off_dac)
-		adpa &= ~ADPA_DAC_ENABLE;
+		save_adpa = adpa = I915_READ(PCH_ADPA);
+		DRM_DEBUG_KMS("trigger hotplug detect cycle: adpa=0x%x\n", adpa);
 
-	/* disable HPD first */
-	I915_WRITE(PCH_ADPA, adpa);
-	(void)I915_READ(PCH_ADPA);
+		adpa |= ADPA_CRT_HOTPLUG_FORCE_TRIGGER;
+		if (turn_off_dac)
+			adpa &= ~ADPA_DAC_ENABLE;
 
-	adpa |= (ADPA_CRT_HOTPLUG_PERIOD_128 |
-			ADPA_CRT_HOTPLUG_WARMUP_10MS |
-			ADPA_CRT_HOTPLUG_SAMPLE_4S |
-			ADPA_CRT_HOTPLUG_VOLTAGE_50 | /* default */
-			ADPA_CRT_HOTPLUG_VOLREF_325MV |
-			ADPA_CRT_HOTPLUG_ENABLE |
-			ADPA_CRT_HOTPLUG_FORCE_TRIGGER);
+		I915_WRITE(PCH_ADPA, adpa);
 
-	DRM_DEBUG_KMS("pch crt adpa 0x%x", adpa);
-	I915_WRITE(PCH_ADPA, adpa);
+		if (wait_for((I915_READ(PCH_ADPA) & ADPA_CRT_HOTPLUG_FORCE_TRIGGER) == 0,
+			     1000))
+			DRM_DEBUG_KMS("timed out waiting for FORCE_TRIGGER");
 
-	if (wait_for((I915_READ(PCH_ADPA) & ADPA_CRT_HOTPLUG_FORCE_TRIGGER) == 0,
-		     1000))
-		DRM_DEBUG_KMS("timed out waiting for FORCE_TRIGGER");
-
-	if (turn_off_dac) {
-		/* Make sure hotplug is enabled */
-		I915_WRITE(PCH_ADPA, temp | ADPA_CRT_HOTPLUG_ENABLE);
-		(void)I915_READ(PCH_ADPA);
+		if (turn_off_dac) {
+			I915_WRITE(PCH_ADPA, save_adpa);
+			POSTING_READ(PCH_ADPA);
+		}
 	}
 
 	/* Check the status to see if both blue and green are on now */
 	adpa = I915_READ(PCH_ADPA);
-	adpa &= ADPA_CRT_HOTPLUG_MONITOR_MASK;
-	if ((adpa == ADPA_CRT_HOTPLUG_MONITOR_COLOR) ||
-		(adpa == ADPA_CRT_HOTPLUG_MONITOR_MONO))
+	if ((adpa & ADPA_CRT_HOTPLUG_MONITOR_MASK) != 0)
 		ret = true;
 	else
 		ret = false;
+	DRM_DEBUG_KMS("ironlake hotplug adpa=0x%x, result %d\n", adpa, ret);
 
 	return ret;
 }
@@ -277,13 +287,12 @@
 	return i2c_transfer(&dev_priv->gmbus[ddc_bus].adapter, msgs, 1) == 1;
 }
 
-static bool intel_crt_detect_ddc(struct drm_encoder *encoder)
+static bool intel_crt_detect_ddc(struct intel_crt *crt)
 {
-	struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
-	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
+	struct drm_i915_private *dev_priv = crt->base.base.dev->dev_private;
 
 	/* CRT should always be at 0, but check anyway */
-	if (intel_encoder->type != INTEL_OUTPUT_ANALOG)
+	if (crt->base.type != INTEL_OUTPUT_ANALOG)
 		return false;
 
 	if (intel_crt_ddc_probe(dev_priv, dev_priv->crt_ddc_pin)) {
@@ -291,7 +300,7 @@
 		return true;
 	}
 
-	if (intel_ddc_probe(intel_encoder, dev_priv->crt_ddc_pin)) {
+	if (intel_ddc_probe(&crt->base, dev_priv->crt_ddc_pin)) {
 		DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n");
 		return true;
 	}
@@ -300,9 +309,9 @@
 }
 
 static enum drm_connector_status
-intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder)
+intel_crt_load_detect(struct drm_crtc *crtc, struct intel_crt *crt)
 {
-	struct drm_encoder *encoder = &intel_encoder->base;
+	struct drm_encoder *encoder = &crt->base.base;
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -434,7 +443,7 @@
 intel_crt_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_device *dev = connector->dev;
-	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	struct intel_crt *crt = intel_attached_crt(connector);
 	struct drm_crtc *crtc;
 	int dpms_mode;
 	enum drm_connector_status status;
@@ -443,28 +452,31 @@
 		if (intel_crt_detect_hotplug(connector)) {
 			DRM_DEBUG_KMS("CRT detected via hotplug\n");
 			return connector_status_connected;
-		} else
+		} else {
+			DRM_DEBUG_KMS("CRT not detected via hotplug\n");
 			return connector_status_disconnected;
+		}
 	}
 
-	if (intel_crt_detect_ddc(&encoder->base))
+	if (intel_crt_detect_ddc(crt))
 		return connector_status_connected;
 
 	if (!force)
 		return connector->status;
 
 	/* for pre-945g platforms use load detect */
-	if (encoder->base.crtc && encoder->base.crtc->enabled) {
-		status = intel_crt_load_detect(encoder->base.crtc, encoder);
+	crtc = crt->base.base.crtc;
+	if (crtc && crtc->enabled) {
+		status = intel_crt_load_detect(crtc, crt);
 	} else {
-		crtc = intel_get_load_detect_pipe(encoder, connector,
+		crtc = intel_get_load_detect_pipe(&crt->base, connector,
 						  NULL, &dpms_mode);
 		if (crtc) {
-			if (intel_crt_detect_ddc(&encoder->base))
+			if (intel_crt_detect_ddc(crt))
 				status = connector_status_connected;
 			else
-				status = intel_crt_load_detect(crtc, encoder);
-			intel_release_load_detect_pipe(encoder,
+				status = intel_crt_load_detect(crtc, crt);
+			intel_release_load_detect_pipe(&crt->base,
 						       connector, dpms_mode);
 		} else
 			status = connector_status_unknown;
@@ -536,17 +548,17 @@
 void intel_crt_init(struct drm_device *dev)
 {
 	struct drm_connector *connector;
-	struct intel_encoder *intel_encoder;
+	struct intel_crt *crt;
 	struct intel_connector *intel_connector;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	intel_encoder = kzalloc(sizeof(struct intel_encoder), GFP_KERNEL);
-	if (!intel_encoder)
+	crt = kzalloc(sizeof(struct intel_crt), GFP_KERNEL);
+	if (!crt)
 		return;
 
 	intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL);
 	if (!intel_connector) {
-		kfree(intel_encoder);
+		kfree(crt);
 		return;
 	}
 
@@ -554,20 +566,20 @@
 	drm_connector_init(dev, &intel_connector->base,
 			   &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
 
-	drm_encoder_init(dev, &intel_encoder->base, &intel_crt_enc_funcs,
+	drm_encoder_init(dev, &crt->base.base, &intel_crt_enc_funcs,
 			 DRM_MODE_ENCODER_DAC);
 
-	intel_connector_attach_encoder(intel_connector, intel_encoder);
+	intel_connector_attach_encoder(intel_connector, &crt->base);
 
-	intel_encoder->type = INTEL_OUTPUT_ANALOG;
-	intel_encoder->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
-				   (1 << INTEL_ANALOG_CLONE_BIT) |
-				   (1 << INTEL_SDVO_LVDS_CLONE_BIT);
-	intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+	crt->base.type = INTEL_OUTPUT_ANALOG;
+	crt->base.clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT |
+				1 << INTEL_ANALOG_CLONE_BIT |
+				1 << INTEL_SDVO_LVDS_CLONE_BIT);
+	crt->base.crtc_mask = (1 << 0) | (1 << 1);
 	connector->interlace_allowed = 1;
 	connector->doublescan_allowed = 0;
 
-	drm_encoder_helper_add(&intel_encoder->base, &intel_crt_helper_funcs);
+	drm_encoder_helper_add(&crt->base.base, &intel_crt_helper_funcs);
 	drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs);
 
 	drm_sysfs_connector_add(connector);
@@ -577,5 +589,22 @@
 	else
 		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 
+	/*
+	 * Configure the automatic hotplug detection stuff
+	 */
+	crt->force_hotplug_required = 0;
+	if (HAS_PCH_SPLIT(dev)) {
+		u32 adpa;
+
+		adpa = I915_READ(PCH_ADPA);
+		adpa &= ~ADPA_CRT_HOTPLUG_MASK;
+		adpa |= ADPA_HOTPLUG_BITS;
+		I915_WRITE(PCH_ADPA, adpa);
+		POSTING_READ(PCH_ADPA);
+
+		DRM_DEBUG_KMS("pch crt adpa set to 0x%x\n", adpa);
+		crt->force_hotplug_required = 1;
+	}
+
 	dev_priv->hotplug_supported_mask |= CRT_HOTPLUG_INT_STATUS;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 48d8fd6..bee24b1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1611,6 +1611,18 @@
 
 		wait_event(dev_priv->pending_flip_queue,
 			   atomic_read(&obj_priv->pending_flip) == 0);
+
+		/* Big Hammer, we also need to ensure that any pending
+		 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
+		 * current scanout is retired before unpinning the old
+		 * framebuffer.
+		 */
+		ret = i915_gem_object_flush_gpu(obj_priv, false);
+		if (ret) {
+			i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
 	}
 
 	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 2be4f72..3dba086 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -160,7 +160,7 @@
 	};
 	struct intel_gpio *gpio;
 
-	if (pin < 1 || pin > 7)
+	if (pin >= ARRAY_SIZE(map_pin_to_reg) || !map_pin_to_reg[pin])
 		return NULL;
 
 	gpio = kzalloc(sizeof(struct intel_gpio), GFP_KERNEL);
@@ -172,7 +172,8 @@
 		gpio->reg += PCH_GPIOA - GPIOA;
 	gpio->dev_priv = dev_priv;
 
-	snprintf(gpio->adapter.name, I2C_NAME_SIZE, "GPIO%c", "?BACDEF?"[pin]);
+	snprintf(gpio->adapter.name, sizeof(gpio->adapter.name),
+		 "i915 GPIO%c", "?BACDE?F"[pin]);
 	gpio->adapter.owner = THIS_MODULE;
 	gpio->adapter.algo_data	= &gpio->algo;
 	gpio->adapter.dev.parent = &dev_priv->dev->pdev->dev;
@@ -349,7 +350,7 @@
 		"panel",
 		"dpc",
 		"dpb",
-		"reserved"
+		"reserved",
 		"dpd",
 	};
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -366,8 +367,8 @@
 		bus->adapter.owner = THIS_MODULE;
 		bus->adapter.class = I2C_CLASS_DDC;
 		snprintf(bus->adapter.name,
-			 I2C_NAME_SIZE,
-			 "gmbus %s",
+			 sizeof(bus->adapter.name),
+			 "i915 gmbus %s",
 			 names[i]);
 
 		bus->adapter.dev.parent = &dev->pdev->dev;
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 406228f..b14c811 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/acpi.h>
 
 #include "drmP.h"
 #include "nouveau_drv.h"
@@ -136,6 +137,14 @@
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
+#ifdef CONFIG_ACPI
+	if (acpi_video_backlight_support()) {
+		NV_INFO(dev, "ACPI backlight interface available, "
+			     "not registering our own\n");
+		return 0;
+	}
+#endif
+
 	switch (dev_priv->card_type) {
 	case NV_40:
 		return nouveau_nv40_backlight_init(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 5f21030..b229357 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -6829,7 +6829,7 @@
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	unsigned htotal;
 
-	if (dev_priv->chipset >= NV_50) {
+	if (dev_priv->card_type >= NV_50) {
 		if (NVReadVgaCrtc(dev, 0, 0x00) == 0 &&
 		    NVReadVgaCrtc(dev, 0, 0x1a) == 0)
 			return false;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 80353e2..c41e1c2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -143,8 +143,10 @@
 	nvbo->no_vm = no_vm;
 	nvbo->tile_mode = tile_mode;
 	nvbo->tile_flags = tile_flags;
+	nvbo->bo.bdev = &dev_priv->ttm.bdev;
 
-	nouveau_bo_fixup_align(dev, tile_mode, tile_flags, &align, &size);
+	nouveau_bo_fixup_align(dev, tile_mode, nouveau_bo_tile_layout(nvbo),
+			       &align, &size);
 	align >>= PAGE_SHIFT;
 
 	nouveau_bo_placement_set(nvbo, flags, 0);
@@ -176,6 +178,31 @@
 		pl[(*n)++] = TTM_PL_FLAG_SYSTEM | flags;
 }
 
+static void
+set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
+{
+	struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev);
+
+	if (dev_priv->card_type == NV_10 &&
+	    nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM)) {
+		/*
+		 * Make sure that the color and depth buffers are handled
+		 * by independent memory controller units. Up to a 9x
+		 * speed up when alpha-blending and depth-test are enabled
+		 * at the same time.
+		 */
+		int vram_pages = dev_priv->vram_size >> PAGE_SHIFT;
+
+		if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) {
+			nvbo->placement.fpfn = vram_pages / 2;
+			nvbo->placement.lpfn = ~0;
+		} else {
+			nvbo->placement.fpfn = 0;
+			nvbo->placement.lpfn = vram_pages / 2;
+		}
+	}
+}
+
 void
 nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
 {
@@ -190,6 +217,8 @@
 	pl->busy_placement = nvbo->busy_placements;
 	set_placement_list(nvbo->busy_placements, &pl->num_busy_placement,
 			   type | busy, flags);
+
+	set_placement_range(nvbo, type);
 }
 
 int
@@ -525,7 +554,8 @@
 		stride  = 16 * 4;
 		height  = amount / stride;
 
-		if (new_mem->mem_type == TTM_PL_VRAM && nvbo->tile_flags) {
+		if (new_mem->mem_type == TTM_PL_VRAM &&
+		    nouveau_bo_tile_layout(nvbo)) {
 			ret = RING_SPACE(chan, 8);
 			if (ret)
 				return ret;
@@ -546,7 +576,8 @@
 			BEGIN_RING(chan, NvSubM2MF, 0x0200, 1);
 			OUT_RING  (chan, 1);
 		}
-		if (old_mem->mem_type == TTM_PL_VRAM && nvbo->tile_flags) {
+		if (old_mem->mem_type == TTM_PL_VRAM &&
+		    nouveau_bo_tile_layout(nvbo)) {
 			ret = RING_SPACE(chan, 8);
 			if (ret)
 				return ret;
@@ -753,7 +784,8 @@
 	if (dev_priv->card_type == NV_50) {
 		ret = nv50_mem_vm_bind_linear(dev,
 					      offset + dev_priv->vm_vram_base,
-					      new_mem->size, nvbo->tile_flags,
+					      new_mem->size,
+					      nouveau_bo_tile_layout(nvbo),
 					      offset);
 		if (ret)
 			return ret;
@@ -894,7 +926,8 @@
 	 * nothing to do here.
 	 */
 	if (bo->mem.mem_type != TTM_PL_VRAM) {
-		if (dev_priv->card_type < NV_50 || !nvbo->tile_flags)
+		if (dev_priv->card_type < NV_50 ||
+		    !nouveau_bo_tile_layout(nvbo))
 			return 0;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 0871495..52c356e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -281,7 +281,7 @@
 	nv_encoder = find_encoder_by_type(connector, OUTPUT_ANALOG);
 	if (!nv_encoder && !nouveau_tv_disable)
 		nv_encoder = find_encoder_by_type(connector, OUTPUT_TV);
-	if (nv_encoder) {
+	if (nv_encoder && force) {
 		struct drm_encoder *encoder = to_drm_encoder(nv_encoder);
 		struct drm_encoder_helper_funcs *helper =
 						encoder->helper_private;
@@ -641,11 +641,28 @@
 	return ret;
 }
 
+static unsigned
+get_tmds_link_bandwidth(struct drm_connector *connector)
+{
+	struct nouveau_connector *nv_connector = nouveau_connector(connector);
+	struct drm_nouveau_private *dev_priv = connector->dev->dev_private;
+	struct dcb_entry *dcb = nv_connector->detected_encoder->dcb;
+
+	if (dcb->location != DCB_LOC_ON_CHIP ||
+	    dev_priv->chipset >= 0x46)
+		return 165000;
+	else if (dev_priv->chipset >= 0x40)
+		return 155000;
+	else if (dev_priv->chipset >= 0x18)
+		return 135000;
+	else
+		return 112000;
+}
+
 static int
 nouveau_connector_mode_valid(struct drm_connector *connector,
 			     struct drm_display_mode *mode)
 {
-	struct drm_nouveau_private *dev_priv = connector->dev->dev_private;
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_encoder *nv_encoder = nv_connector->detected_encoder;
 	struct drm_encoder *encoder = to_drm_encoder(nv_encoder);
@@ -663,11 +680,9 @@
 		max_clock = 400000;
 		break;
 	case OUTPUT_TMDS:
-		if ((dev_priv->card_type >= NV_50 && !nouveau_duallink) ||
-		    !nv_encoder->dcb->duallink_possible)
-			max_clock = 165000;
-		else
-			max_clock = 330000;
+		max_clock = get_tmds_link_bandwidth(connector);
+		if (nouveau_duallink && nv_encoder->dcb->duallink_possible)
+			max_clock *= 2;
 		break;
 	case OUTPUT_ANALOG:
 		max_clock = nv_encoder->dcb->crtconf.maxfreq;
@@ -709,44 +724,6 @@
 	return NULL;
 }
 
-void
-nouveau_connector_set_polling(struct drm_connector *connector)
-{
-	struct drm_device *dev = connector->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct drm_crtc *crtc;
-	bool spare_crtc = false;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		spare_crtc |= !crtc->enabled;
-
-	connector->polled = 0;
-
-	switch (connector->connector_type) {
-	case DRM_MODE_CONNECTOR_VGA:
-	case DRM_MODE_CONNECTOR_TV:
-		if (dev_priv->card_type >= NV_50 ||
-		    (nv_gf4_disp_arch(dev) && spare_crtc))
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
-		break;
-
-	case DRM_MODE_CONNECTOR_DVII:
-	case DRM_MODE_CONNECTOR_DVID:
-	case DRM_MODE_CONNECTOR_HDMIA:
-	case DRM_MODE_CONNECTOR_DisplayPort:
-	case DRM_MODE_CONNECTOR_eDP:
-		if (dev_priv->card_type >= NV_50)
-			connector->polled = DRM_CONNECTOR_POLL_HPD;
-		else if (connector->connector_type == DRM_MODE_CONNECTOR_DVID ||
-			 spare_crtc)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
-		break;
-
-	default:
-		break;
-	}
-}
-
 static const struct drm_connector_helper_funcs
 nouveau_connector_helper_funcs = {
 	.get_modes = nouveau_connector_get_modes,
@@ -872,6 +849,7 @@
 					dev->mode_config.scaling_mode_property,
 					nv_connector->scaling_mode);
 		}
+		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 		/* fall-through */
 	case DCB_CONNECTOR_TV_0:
 	case DCB_CONNECTOR_TV_1:
@@ -888,11 +866,16 @@
 				dev->mode_config.dithering_mode_property,
 				nv_connector->use_dithering ?
 				DRM_MODE_DITHERING_ON : DRM_MODE_DITHERING_OFF);
+
+		if (dcb->type != DCB_CONNECTOR_LVDS) {
+			if (dev_priv->card_type >= NV_50)
+				connector->polled = DRM_CONNECTOR_POLL_HPD;
+			else
+				connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		}
 		break;
 	}
 
-	nouveau_connector_set_polling(connector);
-
 	drm_sysfs_connector_add(connector);
 	dcb->drm = connector;
 	return dcb->drm;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index c21ed6b..711b1e9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -52,9 +52,6 @@
 struct drm_connector *
 nouveau_connector_create(struct drm_device *, int index);
 
-void
-nouveau_connector_set_polling(struct drm_connector *);
-
 int
 nouveau_connector_bpp(struct drm_connector *);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 3a07e58..1c7db64 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -100,6 +100,9 @@
 	int pin_refcnt;
 };
 
+#define nouveau_bo_tile_layout(nvbo)				\
+	((nvbo)->tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK)
+
 static inline struct nouveau_bo *
 nouveau_bo(struct ttm_buffer_object *bo)
 {
@@ -304,6 +307,7 @@
 	void (*destroy_context)(struct nouveau_channel *);
 	int  (*load_context)(struct nouveau_channel *);
 	int  (*unload_context)(struct drm_device *);
+	void (*tlb_flush)(struct drm_device *dev);
 };
 
 struct nouveau_pgraph_object_method {
@@ -336,6 +340,7 @@
 	void (*destroy_context)(struct nouveau_channel *);
 	int  (*load_context)(struct nouveau_channel *);
 	int  (*unload_context)(struct drm_device *);
+	void (*tlb_flush)(struct drm_device *dev);
 
 	void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
 				  uint32_t size, uint32_t pitch);
@@ -485,13 +490,13 @@
 };
 
 struct nv04_crtc_reg {
-	unsigned char MiscOutReg;     /* */
+	unsigned char MiscOutReg;
 	uint8_t CRTC[0xa0];
 	uint8_t CR58[0x10];
 	uint8_t Sequencer[5];
 	uint8_t Graphics[9];
 	uint8_t Attribute[21];
-	unsigned char DAC[768];       /* Internal Colorlookuptable */
+	unsigned char DAC[768];
 
 	/* PCRTC regs */
 	uint32_t fb_start;
@@ -539,43 +544,9 @@
 };
 
 struct nv04_mode_state {
-	uint32_t bpp;
-	uint32_t width;
-	uint32_t height;
-	uint32_t interlace;
-	uint32_t repaint0;
-	uint32_t repaint1;
-	uint32_t screen;
-	uint32_t scale;
-	uint32_t dither;
-	uint32_t extra;
-	uint32_t fifo;
-	uint32_t pixel;
-	uint32_t horiz;
-	int arbitration0;
-	int arbitration1;
-	uint32_t pll;
-	uint32_t pllB;
-	uint32_t vpll;
-	uint32_t vpll2;
-	uint32_t vpllB;
-	uint32_t vpll2B;
+	struct nv04_crtc_reg crtc_reg[2];
 	uint32_t pllsel;
 	uint32_t sel_clk;
-	uint32_t general;
-	uint32_t crtcOwner;
-	uint32_t head;
-	uint32_t head2;
-	uint32_t cursorConfig;
-	uint32_t cursor0;
-	uint32_t cursor1;
-	uint32_t cursor2;
-	uint32_t timingH;
-	uint32_t timingV;
-	uint32_t displayV;
-	uint32_t crtcSync;
-
-	struct nv04_crtc_reg crtc_reg[2];
 };
 
 enum nouveau_card_type {
@@ -613,6 +584,12 @@
 	struct work_struct irq_work;
 	struct work_struct hpd_work;
 
+	struct {
+		spinlock_t lock;
+		uint32_t hpd0_bits;
+		uint32_t hpd1_bits;
+	} hpd_state;
+
 	struct list_head vbl_waiting;
 
 	struct {
@@ -1045,6 +1022,7 @@
 extern void nv50_fifo_destroy_context(struct nouveau_channel *);
 extern int  nv50_fifo_load_context(struct nouveau_channel *);
 extern int  nv50_fifo_unload_context(struct drm_device *);
+extern void nv50_fifo_tlb_flush(struct drm_device *dev);
 
 /* nvc0_fifo.c */
 extern int  nvc0_fifo_init(struct drm_device *);
@@ -1122,6 +1100,8 @@
 extern int  nv50_graph_unload_context(struct drm_device *);
 extern void nv50_graph_context_switch(struct drm_device *);
 extern int  nv50_grctx_init(struct nouveau_grctx *);
+extern void nv50_graph_tlb_flush(struct drm_device *dev);
+extern void nv86_graph_tlb_flush(struct drm_device *dev);
 
 /* nvc0_graph.c */
 extern int  nvc0_graph_init(struct drm_device *);
@@ -1239,7 +1219,6 @@
 extern void nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val);
 extern u32 nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index);
 extern void nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val);
-extern int nouveau_bo_sync_gpu(struct nouveau_bo *, struct nouveau_channel *);
 
 /* nouveau_fence.c */
 struct nouveau_fence;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 441b124..ab1bbfb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -249,6 +249,7 @@
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_semaphore *sema;
+	int ret;
 
 	if (!USE_SEMA(dev))
 		return NULL;
@@ -257,10 +258,14 @@
 	if (!sema)
 		goto fail;
 
+	ret = drm_mm_pre_get(&dev_priv->fence.heap);
+	if (ret)
+		goto fail;
+
 	spin_lock(&dev_priv->fence.lock);
 	sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0);
 	if (sema->mem)
-		sema->mem = drm_mm_get_block(sema->mem, 4, 0);
+		sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0);
 	spin_unlock(&dev_priv->fence.lock);
 
 	if (!sema->mem)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 5c4c929..9a1fdcf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -107,23 +107,29 @@
 }
 
 static bool
-nouveau_gem_tile_flags_valid(struct drm_device *dev, uint32_t tile_flags) {
-	switch (tile_flags) {
-	case 0x0000:
-	case 0x1800:
-	case 0x2800:
-	case 0x4800:
-	case 0x7000:
-	case 0x7400:
-	case 0x7a00:
-	case 0xe000:
-		break;
-	default:
-		NV_ERROR(dev, "bad page flags: 0x%08x\n", tile_flags);
-		return false;
+nouveau_gem_tile_flags_valid(struct drm_device *dev, uint32_t tile_flags)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	if (dev_priv->card_type >= NV_50) {
+		switch (tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK) {
+		case 0x0000:
+		case 0x1800:
+		case 0x2800:
+		case 0x4800:
+		case 0x7000:
+		case 0x7400:
+		case 0x7a00:
+		case 0xe000:
+			return true;
+		}
+	} else {
+		if (!(tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK))
+			return true;
 	}
 
-	return true;
+	NV_ERROR(dev, "bad page flags: 0x%08x\n", tile_flags);
+	return false;
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nouveau_hw.c b/drivers/gpu/drm/nouveau/nouveau_hw.c
index bed669a..b9672a0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hw.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hw.c
@@ -519,11 +519,11 @@
 
 	struct pll_lims pll_lim;
 	struct nouveau_pll_vals pv;
-	uint32_t pllreg = head ? NV_RAMDAC_VPLL2 : NV_PRAMDAC_VPLL_COEFF;
+	enum pll_types pll = head ? PLL_VPLL1 : PLL_VPLL0;
 
-	if (get_pll_limits(dev, pllreg, &pll_lim))
+	if (get_pll_limits(dev, pll, &pll_lim))
 		return;
-	nouveau_hw_get_pllvals(dev, pllreg, &pv);
+	nouveau_hw_get_pllvals(dev, pll, &pv);
 
 	if (pv.M1 >= pll_lim.vco1.min_m && pv.M1 <= pll_lim.vco1.max_m &&
 	    pv.N1 >= pll_lim.vco1.min_n && pv.N1 <= pll_lim.vco1.max_n &&
@@ -536,7 +536,7 @@
 	pv.M1 = pll_lim.vco1.max_m;
 	pv.N1 = pll_lim.vco1.min_n;
 	pv.log2P = pll_lim.max_usable_log2p;
-	nouveau_hw_setpll(dev, pllreg, &pv);
+	nouveau_hw_setpll(dev, pll_lim.reg, &pv);
 }
 
 /*
diff --git a/drivers/gpu/drm/nouveau/nouveau_hw.h b/drivers/gpu/drm/nouveau/nouveau_hw.h
index 869130f..2989090 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hw.h
+++ b/drivers/gpu/drm/nouveau/nouveau_hw.h
@@ -416,6 +416,25 @@
 }
 
 static inline void
+nv_set_crtc_base(struct drm_device *dev, int head, uint32_t offset)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	NVWriteCRTC(dev, head, NV_PCRTC_START, offset);
+
+	if (dev_priv->card_type == NV_04) {
+		/*
+		 * Hilarious, the 24th bit doesn't want to stick to
+		 * PCRTC_START...
+		 */
+		int cre_heb = NVReadVgaCrtc(dev, head, NV_CIO_CRE_HEB__INDEX);
+
+		NVWriteVgaCrtc(dev, head, NV_CIO_CRE_HEB__INDEX,
+			       (cre_heb & ~0x40) | ((offset >> 18) & 0x40));
+	}
+}
+
+static inline void
 nv_show_cursor(struct drm_device *dev, int head, bool show)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/nouveau/nouveau_i2c.c b/drivers/gpu/drm/nouveau/nouveau_i2c.c
index fdd7e3d..cb389d0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_i2c.c
+++ b/drivers/gpu/drm/nouveau/nouveau_i2c.c
@@ -256,7 +256,7 @@
 	if (index >= DCB_MAX_NUM_I2C_ENTRIES)
 		return NULL;
 
-	if (dev_priv->chipset >= NV_50 && (i2c->entry & 0x00000100)) {
+	if (dev_priv->card_type >= NV_50 && (i2c->entry & 0x00000100)) {
 		uint32_t reg = 0xe500, val;
 
 		if (i2c->port_type == 6) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
index 6fd51a5..7bfd9e6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_irq.c
+++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
@@ -42,6 +42,13 @@
 #include "nouveau_connector.h"
 #include "nv50_display.h"
 
+static DEFINE_RATELIMIT_STATE(nouveau_ratelimit_state, 3 * HZ, 20);
+
+static int nouveau_ratelimit(void)
+{
+	return __ratelimit(&nouveau_ratelimit_state);
+}
+
 void
 nouveau_irq_preinstall(struct drm_device *dev)
 {
@@ -53,6 +60,7 @@
 	if (dev_priv->card_type >= NV_50) {
 		INIT_WORK(&dev_priv->irq_work, nv50_display_irq_handler_bh);
 		INIT_WORK(&dev_priv->hpd_work, nv50_display_irq_hotplug_bh);
+		spin_lock_init(&dev_priv->hpd_state.lock);
 		INIT_LIST_HEAD(&dev_priv->vbl_waiting);
 	}
 }
@@ -202,8 +210,8 @@
 		}
 
 		if (status & NV_PFIFO_INTR_DMA_PUSHER) {
-			u32 get = nv_rd32(dev, 0x003244);
-			u32 put = nv_rd32(dev, 0x003240);
+			u32 dma_get = nv_rd32(dev, 0x003244);
+			u32 dma_put = nv_rd32(dev, 0x003240);
 			u32 push = nv_rd32(dev, 0x003220);
 			u32 state = nv_rd32(dev, 0x003228);
 
@@ -213,16 +221,18 @@
 				u32 ib_get = nv_rd32(dev, 0x003334);
 				u32 ib_put = nv_rd32(dev, 0x003330);
 
-				NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%02x%08x "
+				if (nouveau_ratelimit())
+					NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%02x%08x "
 					     "Put 0x%02x%08x IbGet 0x%08x IbPut 0x%08x "
 					     "State 0x%08x Push 0x%08x\n",
-					chid, ho_get, get, ho_put, put, ib_get, ib_put,
-					state, push);
+						chid, ho_get, dma_get, ho_put,
+						dma_put, ib_get, ib_put, state,
+						push);
 
 				/* METHOD_COUNT, in DMA_STATE on earlier chipsets */
 				nv_wr32(dev, 0x003364, 0x00000000);
-				if (get != put || ho_get != ho_put) {
-					nv_wr32(dev, 0x003244, put);
+				if (dma_get != dma_put || ho_get != ho_put) {
+					nv_wr32(dev, 0x003244, dma_put);
 					nv_wr32(dev, 0x003328, ho_put);
 				} else
 				if (ib_get != ib_put) {
@@ -231,10 +241,10 @@
 			} else {
 				NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%08x "
 					     "Put 0x%08x State 0x%08x Push 0x%08x\n",
-					chid, get, put, state, push);
+					chid, dma_get, dma_put, state, push);
 
-				if (get != put)
-					nv_wr32(dev, 0x003244, put);
+				if (dma_get != dma_put)
+					nv_wr32(dev, 0x003244, dma_put);
 			}
 
 			nv_wr32(dev, 0x003228, 0x00000000);
@@ -266,8 +276,9 @@
 		}
 
 		if (status) {
-			NV_INFO(dev, "PFIFO_INTR 0x%08x - Ch %d\n",
-				status, chid);
+			if (nouveau_ratelimit())
+				NV_INFO(dev, "PFIFO_INTR 0x%08x - Ch %d\n",
+					status, chid);
 			nv_wr32(dev, NV03_PFIFO_INTR_0, status);
 			status = 0;
 		}
@@ -544,13 +555,6 @@
 		nouveau_graph_dump_trap_info(dev, "PGRAPH_NOTIFY", &trap);
 }
 
-static DEFINE_RATELIMIT_STATE(nouveau_ratelimit_state, 3 * HZ, 20);
-
-static int nouveau_ratelimit(void)
-{
-	return __ratelimit(&nouveau_ratelimit_state);
-}
-
 
 static inline void
 nouveau_pgraph_intr_error(struct drm_device *dev, uint32_t nsource)
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index a163c7c..fe4a30d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -33,9 +33,9 @@
 #include "drmP.h"
 #include "drm.h"
 #include "drm_sarea.h"
-#include "nouveau_drv.h"
 
-#define MIN(a,b) a < b ? a : b
+#include "nouveau_drv.h"
+#include "nouveau_pm.h"
 
 /*
  * NV10-NV40 tiling helpers
@@ -175,11 +175,10 @@
 			}
 		}
 	}
-	dev_priv->engine.instmem.flush(dev);
 
-	nv50_vm_flush(dev, 5);
-	nv50_vm_flush(dev, 0);
-	nv50_vm_flush(dev, 4);
+	dev_priv->engine.instmem.flush(dev);
+	dev_priv->engine.fifo.tlb_flush(dev);
+	dev_priv->engine.graph.tlb_flush(dev);
 	nv50_vm_flush(dev, 6);
 	return 0;
 }
@@ -209,11 +208,10 @@
 			pte++;
 		}
 	}
-	dev_priv->engine.instmem.flush(dev);
 
-	nv50_vm_flush(dev, 5);
-	nv50_vm_flush(dev, 0);
-	nv50_vm_flush(dev, 4);
+	dev_priv->engine.instmem.flush(dev);
+	dev_priv->engine.fifo.tlb_flush(dev);
+	dev_priv->engine.graph.tlb_flush(dev);
 	nv50_vm_flush(dev, 6);
 }
 
@@ -653,6 +651,7 @@
 void
 nouveau_mem_timing_init(struct drm_device *dev)
 {
+	/* cards < NVC0 only */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	struct nouveau_pm_memtimings *memtimings = &pm->memtimings;
@@ -719,14 +718,14 @@
 		tUNK_19 = 1;
 		tUNK_20 = 0;
 		tUNK_21 = 0;
-		switch (MIN(recordlen,21)) {
-		case 21:
+		switch (min(recordlen, 22)) {
+		case 22:
 			tUNK_21 = entry[21];
-		case 20:
+		case 21:
 			tUNK_20 = entry[20];
-		case 19:
+		case 20:
 			tUNK_19 = entry[19];
-		case 18:
+		case 19:
 			tUNK_18 = entry[18];
 		default:
 			tUNK_0  = entry[0];
@@ -756,24 +755,30 @@
 		timing->reg_100228 = (tUNK_12 << 16 | tUNK_11 << 8 | tUNK_10);
 		if(recordlen > 19) {
 			timing->reg_100228 += (tUNK_19 - 1) << 24;
-		} else {
+		}/* I cannot back-up this else-statement right now
+			 else {
 			timing->reg_100228 += tUNK_12 << 24;
-		}
+		}*/
 
 		/* XXX: reg_10022c */
+		timing->reg_10022c = tUNK_2 - 1;
 
 		timing->reg_100230 = (tUNK_20 << 24 | tUNK_21 << 16 |
 				      tUNK_13 << 8  | tUNK_13);
 
 		/* XXX: +6? */
 		timing->reg_100234 = (tRAS << 24 | (tUNK_19 + 6) << 8 | tRC);
-		if(tUNK_10 > tUNK_11) {
-			timing->reg_100234 += tUNK_10 << 16;
-		} else {
-			timing->reg_100234 += tUNK_11 << 16;
+		timing->reg_100234 += max(tUNK_10,tUNK_11) << 16;
+
+		/* XXX; reg_100238, reg_10023c
+		 * reg: 0x00??????
+		 * reg_10023c:
+		 *      0 for pre-NV50 cards
+		 *      0x????0202 for NV50+ cards (empirical evidence) */
+		if(dev_priv->card_type >= NV_50) {
+			timing->reg_10023c = 0x202;
 		}
 
-		/* XXX; reg_100238, reg_10023c */
 		NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x %08x\n", i,
 			 timing->reg_100220, timing->reg_100224,
 			 timing->reg_100228, timing->reg_10022c);
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index 896cf86..dd572ad 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -129,7 +129,7 @@
 			if (ramin == NULL) {
 				spin_unlock(&dev_priv->ramin_lock);
 				nouveau_gpuobj_ref(NULL, &gpuobj);
-				return ret;
+				return -ENOMEM;
 			}
 
 			ramin = drm_mm_get_block_atomic(ramin, size, align);
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index 1c99c55..9f7b158 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -284,6 +284,7 @@
 	}
 }
 
+#ifdef CONFIG_HWMON
 static ssize_t
 nouveau_hwmon_show_temp(struct device *d, struct device_attribute *a, char *buf)
 {
@@ -395,10 +396,12 @@
 static const struct attribute_group hwmon_attrgroup = {
 	.attrs = hwmon_attributes,
 };
+#endif
 
 static int
 nouveau_hwmon_init(struct drm_device *dev)
 {
+#ifdef CONFIG_HWMON
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	struct device *hwmon_dev;
@@ -425,13 +428,14 @@
 	}
 
 	pm->hwmon = hwmon_dev;
-
+#endif
 	return 0;
 }
 
 static void
 nouveau_hwmon_fini(struct drm_device *dev)
 {
+#ifdef CONFIG_HWMON
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 
@@ -439,6 +443,7 @@
 		sysfs_remove_group(&pm->hwmon->kobj, &hwmon_attrgroup);
 		hwmon_device_unregister(pm->hwmon);
 	}
+#endif
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nouveau_ramht.c b/drivers/gpu/drm/nouveau/nouveau_ramht.c
index 7f16697..2d85809 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ramht.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ramht.c
@@ -153,26 +153,42 @@
 	return -ENOMEM;
 }
 
+static struct nouveau_ramht_entry *
+nouveau_ramht_remove_entry(struct nouveau_channel *chan, u32 handle)
+{
+	struct nouveau_ramht *ramht = chan ? chan->ramht : NULL;
+	struct nouveau_ramht_entry *entry;
+	unsigned long flags;
+
+	if (!ramht)
+		return NULL;
+
+	spin_lock_irqsave(&ramht->lock, flags);
+	list_for_each_entry(entry, &ramht->entries, head) {
+		if (entry->channel == chan &&
+		    (!handle || entry->handle == handle)) {
+			list_del(&entry->head);
+			spin_unlock_irqrestore(&ramht->lock, flags);
+
+			return entry;
+		}
+	}
+	spin_unlock_irqrestore(&ramht->lock, flags);
+
+	return NULL;
+}
+
 static void
-nouveau_ramht_remove_locked(struct nouveau_channel *chan, u32 handle)
+nouveau_ramht_remove_hash(struct nouveau_channel *chan, u32 handle)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
 	struct nouveau_gpuobj *ramht = chan->ramht->gpuobj;
-	struct nouveau_ramht_entry *entry, *tmp;
+	unsigned long flags;
 	u32 co, ho;
 
-	list_for_each_entry_safe(entry, tmp, &chan->ramht->entries, head) {
-		if (entry->channel != chan || entry->handle != handle)
-			continue;
-
-		nouveau_gpuobj_ref(NULL, &entry->gpuobj);
-		list_del(&entry->head);
-		kfree(entry);
-		break;
-	}
-
+	spin_lock_irqsave(&chan->ramht->lock, flags);
 	co = ho = nouveau_ramht_hash_handle(chan, handle);
 	do {
 		if (nouveau_ramht_entry_valid(dev, ramht, co) &&
@@ -184,7 +200,7 @@
 			nv_wo32(ramht, co + 0, 0x00000000);
 			nv_wo32(ramht, co + 4, 0x00000000);
 			instmem->flush(dev);
-			return;
+			goto out;
 		}
 
 		co += 8;
@@ -194,17 +210,22 @@
 
 	NV_ERROR(dev, "RAMHT entry not found. ch=%d, handle=0x%08x\n",
 		 chan->id, handle);
+out:
+	spin_unlock_irqrestore(&chan->ramht->lock, flags);
 }
 
 void
 nouveau_ramht_remove(struct nouveau_channel *chan, u32 handle)
 {
-	struct nouveau_ramht *ramht = chan->ramht;
-	unsigned long flags;
+	struct nouveau_ramht_entry *entry;
 
-	spin_lock_irqsave(&ramht->lock, flags);
-	nouveau_ramht_remove_locked(chan, handle);
-	spin_unlock_irqrestore(&ramht->lock, flags);
+	entry = nouveau_ramht_remove_entry(chan, handle);
+	if (!entry)
+		return;
+
+	nouveau_ramht_remove_hash(chan, entry->handle);
+	nouveau_gpuobj_ref(NULL, &entry->gpuobj);
+	kfree(entry);
 }
 
 struct nouveau_gpuobj *
@@ -265,23 +286,19 @@
 nouveau_ramht_ref(struct nouveau_ramht *ref, struct nouveau_ramht **ptr,
 		  struct nouveau_channel *chan)
 {
-	struct nouveau_ramht_entry *entry, *tmp;
+	struct nouveau_ramht_entry *entry;
 	struct nouveau_ramht *ramht;
-	unsigned long flags;
 
 	if (ref)
 		kref_get(&ref->refcount);
 
 	ramht = *ptr;
 	if (ramht) {
-		spin_lock_irqsave(&ramht->lock, flags);
-		list_for_each_entry_safe(entry, tmp, &ramht->entries, head) {
-			if (entry->channel != chan)
-				continue;
-
-			nouveau_ramht_remove_locked(chan, entry->handle);
+		while ((entry = nouveau_ramht_remove_entry(chan, 0))) {
+			nouveau_ramht_remove_hash(chan, entry->handle);
+			nouveau_gpuobj_ref(NULL, &entry->gpuobj);
+			kfree(entry);
 		}
-		spin_unlock_irqrestore(&ramht->lock, flags);
 
 		kref_put(&ramht->refcount, nouveau_ramht_del);
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 288baca..d4ac970 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -120,8 +120,8 @@
 	dev_priv->engine.instmem.flush(nvbe->dev);
 
 	if (dev_priv->card_type == NV_50) {
-		nv50_vm_flush(dev, 5); /* PGRAPH */
-		nv50_vm_flush(dev, 0); /* PFIFO */
+		dev_priv->engine.fifo.tlb_flush(dev);
+		dev_priv->engine.graph.tlb_flush(dev);
 	}
 
 	nvbe->bound = true;
@@ -162,8 +162,8 @@
 	dev_priv->engine.instmem.flush(nvbe->dev);
 
 	if (dev_priv->card_type == NV_50) {
-		nv50_vm_flush(dev, 5);
-		nv50_vm_flush(dev, 0);
+		dev_priv->engine.fifo.tlb_flush(dev);
+		dev_priv->engine.graph.tlb_flush(dev);
 	}
 
 	nvbe->bound = false;
@@ -224,7 +224,11 @@
 	int i, ret;
 
 	if (dev_priv->card_type < NV_50) {
-		aper_size = (64 * 1024 * 1024);
+		if(dev_priv->ramin_rsvd_vram < 2 * 1024 * 1024)
+			aper_size = 64 * 1024 * 1024;
+		else
+			aper_size = 512 * 1024 * 1024;
+
 		obj_size  = (aper_size >> NV_CTXDMA_PAGE_SHIFT) * 4;
 		obj_size += 8; /* ctxdma header */
 	} else {
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index ed7757f..049f755 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -354,6 +354,15 @@
 		engine->graph.destroy_context	= nv50_graph_destroy_context;
 		engine->graph.load_context	= nv50_graph_load_context;
 		engine->graph.unload_context	= nv50_graph_unload_context;
+		if (dev_priv->chipset != 0x86)
+			engine->graph.tlb_flush	= nv50_graph_tlb_flush;
+		else {
+			/* from what i can see nvidia do this on every
+			 * pre-NVA3 board except NVAC, but, we've only
+			 * ever seen problems on NV86
+			 */
+			engine->graph.tlb_flush	= nv86_graph_tlb_flush;
+		}
 		engine->fifo.channels		= 128;
 		engine->fifo.init		= nv50_fifo_init;
 		engine->fifo.takedown		= nv50_fifo_takedown;
@@ -365,6 +374,7 @@
 		engine->fifo.destroy_context	= nv50_fifo_destroy_context;
 		engine->fifo.load_context	= nv50_fifo_load_context;
 		engine->fifo.unload_context	= nv50_fifo_unload_context;
+		engine->fifo.tlb_flush		= nv50_fifo_tlb_flush;
 		engine->display.early_init	= nv50_display_early_init;
 		engine->display.late_takedown	= nv50_display_late_takedown;
 		engine->display.create		= nv50_display_create;
@@ -1041,6 +1051,9 @@
 	case NOUVEAU_GETPARAM_PTIMER_TIME:
 		getparam->value = dev_priv->engine.timer.read(dev);
 		break;
+	case NOUVEAU_GETPARAM_HAS_BO_USAGE:
+		getparam->value = 1;
+		break;
 	case NOUVEAU_GETPARAM_GRAPH_UNITS:
 		/* NV40 and NV50 versions are quite different, but register
 		 * address is the same. User is supposed to know the card
@@ -1051,7 +1064,7 @@
 		}
 		/* FALLTHRU */
 	default:
-		NV_ERROR(dev, "unknown parameter %lld\n", getparam->param);
+		NV_DEBUG(dev, "unknown parameter %lld\n", getparam->param);
 		return -EINVAL;
 	}
 
@@ -1066,7 +1079,7 @@
 
 	switch (setparam->param) {
 	default:
-		NV_ERROR(dev, "unknown parameter %lld\n", setparam->param);
+		NV_DEBUG(dev, "unknown parameter %lld\n", setparam->param);
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_temp.c b/drivers/gpu/drm/nouveau/nouveau_temp.c
index 16bbbf1..7ecc4ad 100644
--- a/drivers/gpu/drm/nouveau/nouveau_temp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_temp.c
@@ -191,7 +191,7 @@
 	int offset = sensor->offset_mult / sensor->offset_div;
 	int core_temp;
 
-	if (dev_priv->chipset >= 0x50) {
+	if (dev_priv->card_type >= NV_50) {
 		core_temp = nv_rd32(dev, 0x20008);
 	} else {
 		core_temp = nv_rd32(dev, 0x0015b4) & 0x1fff;
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index c71abc2..40e1807 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -158,7 +158,6 @@
 {
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
-	struct drm_connector *connector;
 	unsigned char seq1 = 0, crtc17 = 0;
 	unsigned char crtc1A;
 
@@ -213,10 +212,6 @@
 	NVVgaSeqReset(dev, nv_crtc->index, false);
 
 	NVWriteVgaCrtc(dev, nv_crtc->index, NV_CIO_CRE_RPC1_INDEX, crtc1A);
-
-	/* Update connector polling modes */
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
-		nouveau_connector_set_polling(connector);
 }
 
 static bool
@@ -831,7 +826,7 @@
 	/* Update the framebuffer location. */
 	regp->fb_start = nv_crtc->fb.offset & ~3;
 	regp->fb_start += (y * drm_fb->pitch) + (x * drm_fb->bits_per_pixel / 8);
-	NVWriteCRTC(dev, nv_crtc->index, NV_PCRTC_START, regp->fb_start);
+	nv_set_crtc_base(dev, nv_crtc->index, regp->fb_start);
 
 	/* Update the arbitration parameters. */
 	nouveau_calc_arb(dev, crtc->mode.clock, drm_fb->bits_per_pixel,
diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
index c936403..ef23550 100644
--- a/drivers/gpu/drm/nouveau/nv04_dfp.c
+++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
@@ -185,14 +185,15 @@
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_connector *nv_connector = nouveau_encoder_connector_get(nv_encoder);
 
-	/* For internal panels and gpu scaling on DVI we need the native mode */
-	if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
-		if (!nv_connector->native_mode)
-			return false;
+	if (!nv_connector->native_mode ||
+	    nv_connector->scaling_mode == DRM_MODE_SCALE_NONE ||
+	    mode->hdisplay > nv_connector->native_mode->hdisplay ||
+	    mode->vdisplay > nv_connector->native_mode->vdisplay) {
+		nv_encoder->mode = *adjusted_mode;
+
+	} else {
 		nv_encoder->mode = *nv_connector->native_mode;
 		adjusted_mode->clock = nv_connector->native_mode->clock;
-	} else {
-		nv_encoder->mode = *adjusted_mode;
 	}
 
 	return true;
diff --git a/drivers/gpu/drm/nouveau/nv04_pm.c b/drivers/gpu/drm/nouveau/nv04_pm.c
index 6a6eb69..eb1c70d 100644
--- a/drivers/gpu/drm/nouveau/nv04_pm.c
+++ b/drivers/gpu/drm/nouveau/nv04_pm.c
@@ -76,6 +76,15 @@
 		reg += 4;
 
 	nouveau_hw_setpll(dev, reg, &state->calc);
+
+	if (dev_priv->card_type < NV_30 && reg == NV_PRAMDAC_MPLL_COEFF) {
+		if (dev_priv->card_type == NV_20)
+			nv_mask(dev, 0x1002c4, 0, 1 << 20);
+
+		/* Reset the DLLs */
+		nv_mask(dev, 0x1002c0, 0, 1 << 8);
+	}
+
 	kfree(state);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_calc.c b/drivers/gpu/drm/nouveau/nv50_calc.c
index 2cdc2bf..de81151 100644
--- a/drivers/gpu/drm/nouveau/nv50_calc.c
+++ b/drivers/gpu/drm/nouveau/nv50_calc.c
@@ -51,24 +51,28 @@
 	       int *N, int *fN, int *M, int *P)
 {
 	fixed20_12 fb_div, a, b;
+	u32 refclk = pll->refclk / 10;
+	u32 max_vco_freq = pll->vco1.maxfreq / 10;
+	u32 max_vco_inputfreq = pll->vco1.max_inputfreq / 10;
+	clk /= 10;
 
-	*P = pll->vco1.maxfreq / clk;
+	*P = max_vco_freq / clk;
 	if (*P > pll->max_p)
 		*P = pll->max_p;
 	if (*P < pll->min_p)
 		*P = pll->min_p;
 
-	/* *M = ceil(refclk / pll->vco.max_inputfreq); */
-	a.full = dfixed_const(pll->refclk);
-	b.full = dfixed_const(pll->vco1.max_inputfreq);
+	/* *M = floor((refclk + max_vco_inputfreq) / max_vco_inputfreq); */
+	a.full = dfixed_const(refclk + max_vco_inputfreq);
+	b.full = dfixed_const(max_vco_inputfreq);
 	a.full = dfixed_div(a, b);
-	a.full = dfixed_ceil(a);
+	a.full = dfixed_floor(a);
 	*M = dfixed_trunc(a);
 
 	/* fb_div = (vco * *M) / refclk; */
 	fb_div.full = dfixed_const(clk * *P);
 	fb_div.full = dfixed_mul(fb_div, a);
-	a.full = dfixed_const(pll->refclk);
+	a.full = dfixed_const(refclk);
 	fb_div.full = dfixed_div(fb_div, a);
 
 	/* *N = floor(fb_div); */
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index 16380d5..56476d0 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -546,7 +546,7 @@
 	}
 
 	nv_crtc->fb.offset = fb->nvbo->bo.offset - dev_priv->vm_vram_base;
-	nv_crtc->fb.tile_flags = fb->nvbo->tile_flags;
+	nv_crtc->fb.tile_flags = nouveau_bo_tile_layout(fb->nvbo);
 	nv_crtc->fb.cpp = drm_fb->bits_per_pixel / 8;
 	if (!nv_crtc->fb.blanked && dev_priv->chipset != 0x50) {
 		ret = RING_SPACE(evo, 2);
@@ -578,7 +578,7 @@
 				  fb->nvbo->tile_mode);
 	}
 	if (dev_priv->chipset == 0x50)
-		OUT_RING(evo, (fb->nvbo->tile_flags << 8) | format);
+		OUT_RING(evo, (nv_crtc->fb.tile_flags << 8) | format);
 	else
 		OUT_RING(evo, format);
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 55c9663..f624c61 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -1032,11 +1032,18 @@
 	struct drm_connector *connector;
 	const uint32_t gpio_reg[4] = { 0xe104, 0xe108, 0xe280, 0xe284 };
 	uint32_t unplug_mask, plug_mask, change_mask;
-	uint32_t hpd0, hpd1 = 0;
+	uint32_t hpd0, hpd1;
 
-	hpd0 = nv_rd32(dev, 0xe054) & nv_rd32(dev, 0xe050);
+	spin_lock_irq(&dev_priv->hpd_state.lock);
+	hpd0 = dev_priv->hpd_state.hpd0_bits;
+	dev_priv->hpd_state.hpd0_bits = 0;
+	hpd1 = dev_priv->hpd_state.hpd1_bits;
+	dev_priv->hpd_state.hpd1_bits = 0;
+	spin_unlock_irq(&dev_priv->hpd_state.lock);
+
+	hpd0 &= nv_rd32(dev, 0xe050);
 	if (dev_priv->chipset >= 0x90)
-		hpd1 = nv_rd32(dev, 0xe074) & nv_rd32(dev, 0xe070);
+		hpd1 &= nv_rd32(dev, 0xe070);
 
 	plug_mask   = (hpd0 & 0x0000ffff) | (hpd1 << 16);
 	unplug_mask = (hpd0 >> 16) | (hpd1 & 0xffff0000);
@@ -1078,10 +1085,6 @@
 			helper->dpms(connector->encoder, DRM_MODE_DPMS_OFF);
 	}
 
-	nv_wr32(dev, 0xe054, nv_rd32(dev, 0xe054));
-	if (dev_priv->chipset >= 0x90)
-		nv_wr32(dev, 0xe074, nv_rd32(dev, 0xe074));
-
 	drm_helper_hpd_irq_event(dev);
 }
 
@@ -1092,8 +1095,22 @@
 	uint32_t delayed = 0;
 
 	if (nv_rd32(dev, NV50_PMC_INTR_0) & NV50_PMC_INTR_0_HOTPLUG) {
-		if (!work_pending(&dev_priv->hpd_work))
-			queue_work(dev_priv->wq, &dev_priv->hpd_work);
+		uint32_t hpd0_bits, hpd1_bits = 0;
+
+		hpd0_bits = nv_rd32(dev, 0xe054);
+		nv_wr32(dev, 0xe054, hpd0_bits);
+
+		if (dev_priv->chipset >= 0x90) {
+			hpd1_bits = nv_rd32(dev, 0xe074);
+			nv_wr32(dev, 0xe074, hpd1_bits);
+		}
+
+		spin_lock(&dev_priv->hpd_state.lock);
+		dev_priv->hpd_state.hpd0_bits |= hpd0_bits;
+		dev_priv->hpd_state.hpd1_bits |= hpd1_bits;
+		spin_unlock(&dev_priv->hpd_state.lock);
+
+		queue_work(dev_priv->wq, &dev_priv->hpd_work);
 	}
 
 	while (nv_rd32(dev, NV50_PMC_INTR_0) & NV50_PMC_INTR_0_DISPLAY) {
diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index a46a961..1da65bd 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -464,3 +464,8 @@
 	return 0;
 }
 
+void
+nv50_fifo_tlb_flush(struct drm_device *dev)
+{
+	nv50_vm_flush(dev, 5);
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index cbf5ae2..8b669d0 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -402,3 +402,55 @@
 	{ 0x8597, false, NULL }, /* tesla (nva3, nva5, nva8) */
 	{}
 };
+
+void
+nv50_graph_tlb_flush(struct drm_device *dev)
+{
+	nv50_vm_flush(dev, 0);
+}
+
+void
+nv86_graph_tlb_flush(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
+	bool idle, timeout = false;
+	unsigned long flags;
+	u64 start;
+	u32 tmp;
+
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, 0x400500, 0x00000001, 0x00000000);
+
+	start = ptimer->read(dev);
+	do {
+		idle = true;
+
+		for (tmp = nv_rd32(dev, 0x400380); tmp && idle; tmp >>= 3) {
+			if ((tmp & 7) == 1)
+				idle = false;
+		}
+
+		for (tmp = nv_rd32(dev, 0x400384); tmp && idle; tmp >>= 3) {
+			if ((tmp & 7) == 1)
+				idle = false;
+		}
+
+		for (tmp = nv_rd32(dev, 0x400388); tmp && idle; tmp >>= 3) {
+			if ((tmp & 7) == 1)
+				idle = false;
+		}
+	} while (!idle && !(timeout = ptimer->read(dev) - start > 2000000000));
+
+	if (timeout) {
+		NV_ERROR(dev, "PGRAPH TLB flush idle timeout fail: "
+			      "0x%08x 0x%08x 0x%08x 0x%08x\n",
+			 nv_rd32(dev, 0x400700), nv_rd32(dev, 0x400380),
+			 nv_rd32(dev, 0x400384), nv_rd32(dev, 0x400388));
+	}
+
+	nv50_vm_flush(dev, 0);
+
+	nv_mask(dev, 0x400500, 0x00000001, 0x00000001);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index a53fc97..b773229 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -402,7 +402,6 @@
 	}
 	dev_priv->engine.instmem.flush(dev);
 
-	nv50_vm_flush(dev, 4);
 	nv50_vm_flush(dev, 6);
 
 	gpuobj->im_bound = 1;
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 488c36c..4dc5b47 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1650,7 +1650,36 @@
 		}
 	}
 
-	rdev->config.evergreen.tile_config = gb_addr_config;
+	/* setup tiling info dword.  gb_addr_config is not adequate since it does
+	 * not have bank info, so create a custom tiling dword.
+	 * bits 3:0   num_pipes
+	 * bits 7:4   num_banks
+	 * bits 11:8  group_size
+	 * bits 15:12 row_size
+	 */
+	rdev->config.evergreen.tile_config = 0;
+	switch (rdev->config.evergreen.max_tile_pipes) {
+	case 1:
+	default:
+		rdev->config.evergreen.tile_config |= (0 << 0);
+		break;
+	case 2:
+		rdev->config.evergreen.tile_config |= (1 << 0);
+		break;
+	case 4:
+		rdev->config.evergreen.tile_config |= (2 << 0);
+		break;
+	case 8:
+		rdev->config.evergreen.tile_config |= (3 << 0);
+		break;
+	}
+	rdev->config.evergreen.tile_config |=
+		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
+	rdev->config.evergreen.tile_config |=
+		((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) << 8;
+	rdev->config.evergreen.tile_config |=
+		((gb_addr_config & 0x30000000) >> 28) << 12;
+
 	WREG32(GB_BACKEND_MAP, gb_backend_map);
 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index ac3b6dd..e0e5901 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -459,7 +459,7 @@
 	obj_size += evergreen_ps_size * 4;
 	obj_size = ALIGN(obj_size, 256);
 
-	r = radeon_bo_create(rdev, NULL, obj_size, true, RADEON_GEM_DOMAIN_VRAM,
+	r = radeon_bo_create(rdev, NULL, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
 				&rdev->r600_blit.shader_obj);
 	if (r) {
 		DRM_ERROR("evergreen failed to allocate shader\n");
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 0f806cc..a355259 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2718,7 +2718,7 @@
 	/* Allocate ring buffer */
 	if (rdev->ih.ring_obj == NULL) {
 		r = radeon_bo_create(rdev, NULL, rdev->ih.ring_size,
-				     true,
+				     PAGE_SIZE, true,
 				     RADEON_GEM_DOMAIN_GTT,
 				     &rdev->ih.ring_obj);
 		if (r) {
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 8362974..86e5aa0 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -501,7 +501,7 @@
 	obj_size += r6xx_ps_size * 4;
 	obj_size = ALIGN(obj_size, 256);
 
-	r = radeon_bo_create(rdev, NULL, obj_size, true, RADEON_GEM_DOMAIN_VRAM,
+	r = radeon_bo_create(rdev, NULL, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
 				&rdev->r600_blit.shader_obj);
 	if (r) {
 		DRM_ERROR("r600 failed to allocate shader\n");
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 37cc2aa..9bebac1 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -50,6 +50,7 @@
 	u32			nsamples;
 	u32			cb_color_base_last[8];
 	struct radeon_bo	*cb_color_bo[8];
+	u64			cb_color_bo_mc[8];
 	u32			cb_color_bo_offset[8];
 	struct radeon_bo	*cb_color_frag_bo[8];
 	struct radeon_bo	*cb_color_tile_bo[8];
@@ -67,6 +68,7 @@
 	u32			db_depth_size;
 	u32			db_offset;
 	struct radeon_bo	*db_bo;
+	u64			db_bo_mc;
 };
 
 static inline int r600_bpe_from_format(u32 *bpe, u32 format)
@@ -140,6 +142,68 @@
 	return 0;
 }
 
+struct array_mode_checker {
+	int array_mode;
+	u32 group_size;
+	u32 nbanks;
+	u32 npipes;
+	u32 nsamples;
+	u32 bpe;
+};
+
+/* returns alignment in pixels for pitch/height/depth and bytes for base */
+static inline int r600_get_array_mode_alignment(struct array_mode_checker *values,
+						u32 *pitch_align,
+						u32 *height_align,
+						u32 *depth_align,
+						u64 *base_align)
+{
+	u32 tile_width = 8;
+	u32 tile_height = 8;
+	u32 macro_tile_width = values->nbanks;
+	u32 macro_tile_height = values->npipes;
+	u32 tile_bytes = tile_width * tile_height * values->bpe * values->nsamples;
+	u32 macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;
+
+	switch (values->array_mode) {
+	case ARRAY_LINEAR_GENERAL:
+		/* technically tile_width/_height for pitch/height */
+		*pitch_align = 1; /* tile_width */
+		*height_align = 1; /* tile_height */
+		*depth_align = 1;
+		*base_align = 1;
+		break;
+	case ARRAY_LINEAR_ALIGNED:
+		*pitch_align = max((u32)64, (u32)(values->group_size / values->bpe));
+		*height_align = tile_height;
+		*depth_align = 1;
+		*base_align = values->group_size;
+		break;
+	case ARRAY_1D_TILED_THIN1:
+		*pitch_align = max((u32)tile_width,
+				   (u32)(values->group_size /
+					 (tile_height * values->bpe * values->nsamples)));
+		*height_align = tile_height;
+		*depth_align = 1;
+		*base_align = values->group_size;
+		break;
+	case ARRAY_2D_TILED_THIN1:
+		*pitch_align = max((u32)macro_tile_width,
+				  (u32)(((values->group_size / tile_height) /
+					 (values->bpe * values->nsamples)) *
+					values->nbanks)) * tile_width;
+		*height_align = macro_tile_height * tile_height;
+		*depth_align = 1;
+		*base_align = max(macro_tile_bytes,
+				  (*pitch_align) * values->bpe * (*height_align) * values->nsamples);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static void r600_cs_track_init(struct r600_cs_track *track)
 {
 	int i;
@@ -153,10 +217,12 @@
 		track->cb_color_info[i] = 0;
 		track->cb_color_bo[i] = NULL;
 		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
+		track->cb_color_bo_mc[i] = 0xFFFFFFFF;
 	}
 	track->cb_target_mask = 0xFFFFFFFF;
 	track->cb_shader_mask = 0xFFFFFFFF;
 	track->db_bo = NULL;
+	track->db_bo_mc = 0xFFFFFFFF;
 	/* assume the biggest format and that htile is enabled */
 	track->db_depth_info = 7 | (1 << 25);
 	track->db_depth_view = 0xFFFFC000;
@@ -168,7 +234,10 @@
 static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 {
 	struct r600_cs_track *track = p->track;
-	u32 bpe = 0, pitch, slice_tile_max, size, tmp, height, pitch_align;
+	u32 bpe = 0, slice_tile_max, size, tmp;
+	u32 height, height_align, pitch, pitch_align, depth_align;
+	u64 base_offset, base_align;
+	struct array_mode_checker array_check;
 	volatile u32 *ib = p->ib->ptr;
 	unsigned array_mode;
 
@@ -183,60 +252,40 @@
 			i, track->cb_color_info[i]);
 		return -EINVAL;
 	}
-	/* pitch is the number of 8x8 tiles per row */
-	pitch = G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1;
+	/* pitch in pixels */
+	pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) * 8;
 	slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1;
 	slice_tile_max *= 64;
-	height = slice_tile_max / (pitch * 8);
+	height = slice_tile_max / pitch;
 	if (height > 8192)
 		height = 8192;
 	array_mode = G_0280A0_ARRAY_MODE(track->cb_color_info[i]);
+
+	base_offset = track->cb_color_bo_mc[i] + track->cb_color_bo_offset[i];
+	array_check.array_mode = array_mode;
+	array_check.group_size = track->group_size;
+	array_check.nbanks = track->nbanks;
+	array_check.npipes = track->npipes;
+	array_check.nsamples = track->nsamples;
+	array_check.bpe = bpe;
+	if (r600_get_array_mode_alignment(&array_check,
+					  &pitch_align, &height_align, &depth_align, &base_align)) {
+		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
+			 G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
+			 track->cb_color_info[i]);
+		return -EINVAL;
+	}
 	switch (array_mode) {
 	case V_0280A0_ARRAY_LINEAR_GENERAL:
-		/* technically height & 0x7 */
 		break;
 	case V_0280A0_ARRAY_LINEAR_ALIGNED:
-		pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8;
-		if (!IS_ALIGNED(pitch, pitch_align)) {
-			dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
-				 __func__, __LINE__, pitch);
-			return -EINVAL;
-		}
-		if (!IS_ALIGNED(height, 8)) {
-			dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
-				 __func__, __LINE__, height);
-			return -EINVAL;
-		}
 		break;
 	case V_0280A0_ARRAY_1D_TILED_THIN1:
-		pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe * track->nsamples))) / 8;
-		if (!IS_ALIGNED(pitch, pitch_align)) {
-			dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
-				 __func__, __LINE__, pitch);
-			return -EINVAL;
-		}
 		/* avoid breaking userspace */
 		if (height > 7)
 			height &= ~0x7;
-		if (!IS_ALIGNED(height, 8)) {
-			dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
-				 __func__, __LINE__, height);
-			return -EINVAL;
-		}
 		break;
 	case V_0280A0_ARRAY_2D_TILED_THIN1:
-		pitch_align = max((u32)track->nbanks,
-				  (u32)(((track->group_size / 8) / (bpe * track->nsamples)) * track->nbanks)) / 8;
-		if (!IS_ALIGNED(pitch, pitch_align)) {
-			dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
-				__func__, __LINE__, pitch);
-			return -EINVAL;
-		}
-		if (!IS_ALIGNED((height / 8), track->npipes)) {
-			dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
-				 __func__, __LINE__, height);
-			return -EINVAL;
-		}
 		break;
 	default:
 		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
@@ -244,13 +293,29 @@
 			track->cb_color_info[i]);
 		return -EINVAL;
 	}
+
+	if (!IS_ALIGNED(pitch, pitch_align)) {
+		dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
+			 __func__, __LINE__, pitch);
+		return -EINVAL;
+	}
+	if (!IS_ALIGNED(height, height_align)) {
+		dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
+			 __func__, __LINE__, height);
+		return -EINVAL;
+	}
+	if (!IS_ALIGNED(base_offset, base_align)) {
+		dev_warn(p->dev, "%s offset[%d] 0x%llx not aligned\n", __func__, i, base_offset);
+		return -EINVAL;
+	}
+
 	/* check offset */
-	tmp = height * pitch * 8 * bpe;
+	tmp = height * pitch * bpe;
 	if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
 		if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) {
 			/* the initial DDX does bad things with the CB size occasionally */
 			/* it rounds up height too far for slice tile max but the BO is smaller */
-			tmp = (height - 7) * 8 * bpe;
+			tmp = (height - 7) * pitch * bpe;
 			if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
 				dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i]));
 				return -EINVAL;
@@ -260,15 +325,11 @@
 			return -EINVAL;
 		}
 	}
-	if (!IS_ALIGNED(track->cb_color_bo_offset[i], track->group_size)) {
-		dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->cb_color_bo_offset[i]);
-		return -EINVAL;
-	}
 	/* limit max tile */
-	tmp = (height * pitch * 8) >> 6;
+	tmp = (height * pitch) >> 6;
 	if (tmp < slice_tile_max)
 		slice_tile_max = tmp;
-	tmp = S_028060_PITCH_TILE_MAX(pitch - 1) |
+	tmp = S_028060_PITCH_TILE_MAX((pitch / 8) - 1) |
 		S_028060_SLICE_TILE_MAX(slice_tile_max - 1);
 	ib[track->cb_color_size_idx[i]] = tmp;
 	return 0;
@@ -310,7 +371,12 @@
 	/* Check depth buffer */
 	if (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
 		G_028800_Z_ENABLE(track->db_depth_control)) {
-		u32 nviews, bpe, ntiles, pitch, pitch_align, height, size, slice_tile_max;
+		u32 nviews, bpe, ntiles, size, slice_tile_max;
+		u32 height, height_align, pitch, pitch_align, depth_align;
+		u64 base_offset, base_align;
+		struct array_mode_checker array_check;
+		int array_mode;
+
 		if (track->db_bo == NULL) {
 			dev_warn(p->dev, "z/stencil with no depth buffer\n");
 			return -EINVAL;
@@ -353,41 +419,34 @@
 			ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
 		} else {
 			size = radeon_bo_size(track->db_bo);
-			pitch = G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1;
+			/* pitch in pixels */
+			pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8;
 			slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
 			slice_tile_max *= 64;
-			height = slice_tile_max / (pitch * 8);
+			height = slice_tile_max / pitch;
 			if (height > 8192)
 				height = 8192;
-			switch (G_028010_ARRAY_MODE(track->db_depth_info)) {
+			base_offset = track->db_bo_mc + track->db_offset;
+			array_mode = G_028010_ARRAY_MODE(track->db_depth_info);
+			array_check.array_mode = array_mode;
+			array_check.group_size = track->group_size;
+			array_check.nbanks = track->nbanks;
+			array_check.npipes = track->npipes;
+			array_check.nsamples = track->nsamples;
+			array_check.bpe = bpe;
+			if (r600_get_array_mode_alignment(&array_check,
+							  &pitch_align, &height_align, &depth_align, &base_align)) {
+				dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
+					 G_028010_ARRAY_MODE(track->db_depth_info),
+					 track->db_depth_info);
+				return -EINVAL;
+			}
+			switch (array_mode) {
 			case V_028010_ARRAY_1D_TILED_THIN1:
-				pitch_align = (max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8);
-				if (!IS_ALIGNED(pitch, pitch_align)) {
-					dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n",
-						 __func__, __LINE__, pitch);
-					return -EINVAL;
-				}
 				/* don't break userspace */
 				height &= ~0x7;
-				if (!IS_ALIGNED(height, 8)) {
-					dev_warn(p->dev, "%s:%d db height (%d) invalid\n",
-						 __func__, __LINE__, height);
-					return -EINVAL;
-				}
 				break;
 			case V_028010_ARRAY_2D_TILED_THIN1:
-				pitch_align = max((u32)track->nbanks,
-						  (u32)(((track->group_size / 8) / bpe) * track->nbanks)) / 8;
-				if (!IS_ALIGNED(pitch, pitch_align)) {
-					dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n",
-						 __func__, __LINE__, pitch);
-					return -EINVAL;
-				}
-				if (!IS_ALIGNED((height / 8), track->npipes)) {
-					dev_warn(p->dev, "%s:%d db height (%d) invalid\n",
-						 __func__, __LINE__, height);
-					return -EINVAL;
-				}
 				break;
 			default:
 				dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
@@ -395,15 +454,27 @@
 					 track->db_depth_info);
 				return -EINVAL;
 			}
-			if (!IS_ALIGNED(track->db_offset, track->group_size)) {
-				dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->db_offset);
+
+			if (!IS_ALIGNED(pitch, pitch_align)) {
+				dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n",
+					 __func__, __LINE__, pitch);
 				return -EINVAL;
 			}
+			if (!IS_ALIGNED(height, height_align)) {
+				dev_warn(p->dev, "%s:%d db height (%d) invalid\n",
+					 __func__, __LINE__, height);
+				return -EINVAL;
+			}
+			if (!IS_ALIGNED(base_offset, base_align)) {
+				dev_warn(p->dev, "%s offset[%d] 0x%llx not aligned\n", __func__, i, base_offset);
+				return -EINVAL;
+			}
+
 			ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
 			nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
 			tmp = ntiles * bpe * 64 * nviews;
 			if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) {
-				dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %d -> %d have %ld)\n",
+				dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %d -> %u have %lu)\n",
 						track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset,
 						radeon_bo_size(track->db_bo));
 				return -EINVAL;
@@ -954,6 +1025,7 @@
 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
 		track->cb_color_base_last[tmp] = ib[idx];
 		track->cb_color_bo[tmp] = reloc->robj;
+		track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset;
 		break;
 	case DB_DEPTH_BASE:
 		r = r600_cs_packet_next_reloc(p, &reloc);
@@ -965,6 +1037,7 @@
 		track->db_offset = radeon_get_ib_value(p, idx) << 8;
 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
 		track->db_bo = reloc->robj;
+		track->db_bo_mc = reloc->lobj.gpu_offset;
 		break;
 	case DB_HTILE_DATA_BASE:
 	case SQ_PGM_START_FS:
@@ -1086,16 +1159,25 @@
 static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 					      struct radeon_bo *texture,
 					      struct radeon_bo *mipmap,
+					      u64 base_offset,
+					      u64 mip_offset,
 					      u32 tiling_flags)
 {
 	struct r600_cs_track *track = p->track;
 	u32 nfaces, nlevels, blevel, w0, h0, d0, bpe = 0;
-	u32 word0, word1, l0_size, mipmap_size, pitch, pitch_align;
+	u32 word0, word1, l0_size, mipmap_size;
+	u32 height_align, pitch, pitch_align, depth_align;
+	u64 base_align;
+	struct array_mode_checker array_check;
 
 	/* on legacy kernel we don't perform advanced check */
 	if (p->rdev == NULL)
 		return 0;
 
+	/* convert to bytes */
+	base_offset <<= 8;
+	mip_offset <<= 8;
+
 	word0 = radeon_get_ib_value(p, idx + 0);
 	if (tiling_flags & RADEON_TILING_MACRO)
 		word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
@@ -1128,46 +1210,38 @@
 		return -EINVAL;
 	}
 
-	pitch = G_038000_PITCH(word0) + 1;
-	switch (G_038000_TILE_MODE(word0)) {
-	case V_038000_ARRAY_LINEAR_GENERAL:
-		pitch_align = 1;
-		/* XXX check height align */
-		break;
-	case V_038000_ARRAY_LINEAR_ALIGNED:
-		pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8;
-		if (!IS_ALIGNED(pitch, pitch_align)) {
-			dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
-				 __func__, __LINE__, pitch);
-			return -EINVAL;
-		}
-		/* XXX check height align */
-		break;
-	case V_038000_ARRAY_1D_TILED_THIN1:
-		pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8;
-		if (!IS_ALIGNED(pitch, pitch_align)) {
-			dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
-				 __func__, __LINE__, pitch);
-			return -EINVAL;
-		}
-		/* XXX check height align */
-		break;
-	case V_038000_ARRAY_2D_TILED_THIN1:
-		pitch_align = max((u32)track->nbanks,
-				  (u32)(((track->group_size / 8) / bpe) * track->nbanks)) / 8;
-		if (!IS_ALIGNED(pitch, pitch_align)) {
-			dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
-				__func__, __LINE__, pitch);
-			return -EINVAL;
-		}
-		/* XXX check height align */
-		break;
-	default:
-		dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
-			 G_038000_TILE_MODE(word0), word0);
+	/* pitch in texels */
+	pitch = (G_038000_PITCH(word0) + 1) * 8;
+	array_check.array_mode = G_038000_TILE_MODE(word0);
+	array_check.group_size = track->group_size;
+	array_check.nbanks = track->nbanks;
+	array_check.npipes = track->npipes;
+	array_check.nsamples = 1;
+	array_check.bpe = bpe;
+	if (r600_get_array_mode_alignment(&array_check,
+					  &pitch_align, &height_align, &depth_align, &base_align)) {
+		dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n",
+			 __func__, __LINE__, G_038000_TILE_MODE(word0));
 		return -EINVAL;
 	}
-	/* XXX check offset align */
+
+	/* XXX check height as well... */
+
+	if (!IS_ALIGNED(pitch, pitch_align)) {
+		dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
+			 __func__, __LINE__, pitch);
+		return -EINVAL;
+	}
+	if (!IS_ALIGNED(base_offset, base_align)) {
+		dev_warn(p->dev, "%s:%d tex base offset (0x%llx) invalid\n",
+			 __func__, __LINE__, base_offset);
+		return -EINVAL;
+	}
+	if (!IS_ALIGNED(mip_offset, base_align)) {
+		dev_warn(p->dev, "%s:%d tex mip offset (0x%llx) invalid\n",
+			 __func__, __LINE__, mip_offset);
+		return -EINVAL;
+	}
 
 	word0 = radeon_get_ib_value(p, idx + 4);
 	word1 = radeon_get_ib_value(p, idx + 5);
@@ -1402,7 +1476,10 @@
 				mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
 				mipmap = reloc->robj;
 				r = r600_check_texture_resource(p,  idx+(i*7)+1,
-								texture, mipmap, reloc->lobj.tiling_flags);
+								texture, mipmap,
+								base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2),
+								mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3),
+								reloc->lobj.tiling_flags);
 				if (r)
 					return r;
 				ib[idx+1+(i*7)+2] += base_offset;
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 966a793..bff4dc4 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -51,6 +51,12 @@
 #define PTE_READABLE				(1 << 5)
 #define PTE_WRITEABLE				(1 << 6)
 
+/* tiling bits */
+#define     ARRAY_LINEAR_GENERAL              0x00000000
+#define     ARRAY_LINEAR_ALIGNED              0x00000001
+#define     ARRAY_1D_TILED_THIN1              0x00000002
+#define     ARRAY_2D_TILED_THIN1              0x00000004
+
 /* Registers */
 #define	ARB_POP						0x2418
 #define 	ENABLE_TC128					(1 << 30)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 73f600d3..3a70957 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1262,6 +1262,10 @@
 		(rdev->family == CHIP_RS400) ||			\
 		(rdev->family == CHIP_RS480))
 #define ASIC_IS_AVIVO(rdev) ((rdev->family >= CHIP_RS600))
+#define ASIC_IS_DCE2(rdev) ((rdev->family == CHIP_RS600)  ||	\
+			    (rdev->family == CHIP_RS690)  ||	\
+			    (rdev->family == CHIP_RS740)  ||	\
+			    (rdev->family >= CHIP_R600))
 #define ASIC_IS_DCE3(rdev) ((rdev->family >= CHIP_RV620))
 #define ASIC_IS_DCE32(rdev) ((rdev->family >= CHIP_RV730))
 #define ASIC_IS_DCE4(rdev) ((rdev->family >= CHIP_CEDAR))
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 7932dc4..c558685 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -41,7 +41,7 @@
 
 	size = bsize;
 	n = 1024;
-	r = radeon_bo_create(rdev, NULL, size, true, sdomain, &sobj);
+	r = radeon_bo_create(rdev, NULL, size, PAGE_SIZE, true, sdomain, &sobj);
 	if (r) {
 		goto out_cleanup;
 	}
@@ -53,7 +53,7 @@
 	if (r) {
 		goto out_cleanup;
 	}
-	r = radeon_bo_create(rdev, NULL, size, true, ddomain, &dobj);
+	r = radeon_bo_create(rdev, NULL, size, PAGE_SIZE, true, ddomain, &dobj);
 	if (r) {
 		goto out_cleanup;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 7b7ea26..3bddea5 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -571,6 +571,7 @@
 	}
 
 	if (clk_mask && data_mask) {
+		/* system specific masks */
 		i2c.mask_clk_mask = clk_mask;
 		i2c.mask_data_mask = data_mask;
 		i2c.a_clk_mask = clk_mask;
@@ -579,7 +580,19 @@
 		i2c.en_data_mask = data_mask;
 		i2c.y_clk_mask = clk_mask;
 		i2c.y_data_mask = data_mask;
+	} else if ((ddc_line == RADEON_GPIOPAD_MASK) ||
+		   (ddc_line == RADEON_MDGPIO_MASK)) {
+		/* default gpiopad masks */
+		i2c.mask_clk_mask = (0x20 << 8);
+		i2c.mask_data_mask = 0x80;
+		i2c.a_clk_mask = (0x20 << 8);
+		i2c.a_data_mask = 0x80;
+		i2c.en_clk_mask = (0x20 << 8);
+		i2c.en_data_mask = 0x80;
+		i2c.y_clk_mask = (0x20 << 8);
+		i2c.y_data_mask = 0x80;
 	} else {
+		/* default masks for ddc pads */
 		i2c.mask_clk_mask = RADEON_GPIO_EN_1;
 		i2c.mask_data_mask = RADEON_GPIO_EN_0;
 		i2c.a_clk_mask = RADEON_GPIO_A_1;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index fe6c747..3bef9f6 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1008,9 +1008,21 @@
 static int radeon_dp_get_modes(struct drm_connector *connector)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	struct radeon_connector_atom_dig *radeon_dig_connector = radeon_connector->con_priv;
 	int ret;
 
+	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+		if (!radeon_dig_connector->edp_on)
+			atombios_set_edp_panel_power(connector,
+						     ATOM_TRANSMITTER_ACTION_POWER_ON);
+	}
 	ret = radeon_ddc_get_modes(radeon_connector);
+	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+		if (!radeon_dig_connector->edp_on)
+			atombios_set_edp_panel_power(connector,
+						     ATOM_TRANSMITTER_ACTION_POWER_OFF);
+	}
+
 	return ret;
 }
 
@@ -1029,8 +1041,14 @@
 	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
 		/* eDP is always DP */
 		radeon_dig_connector->dp_sink_type = CONNECTOR_OBJECT_ID_DISPLAYPORT;
+		if (!radeon_dig_connector->edp_on)
+			atombios_set_edp_panel_power(connector,
+						     ATOM_TRANSMITTER_ACTION_POWER_ON);
 		if (radeon_dp_getdpcd(radeon_connector))
 			ret = connector_status_connected;
+		if (!radeon_dig_connector->edp_on)
+			atombios_set_edp_panel_power(connector,
+						     ATOM_TRANSMITTER_ACTION_POWER_OFF);
 	} else {
 		radeon_dig_connector->dp_sink_type = radeon_dp_getsinktype(radeon_connector);
 		if (radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 8adfedf..d8ac184 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -180,7 +180,7 @@
 	int r;
 
 	if (rdev->wb.wb_obj == NULL) {
-		r = radeon_bo_create(rdev, NULL, RADEON_GPU_PAGE_SIZE, true,
+		r = radeon_bo_create(rdev, NULL, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
 				RADEON_GEM_DOMAIN_GTT, &rdev->wb.wb_obj);
 		if (r) {
 			dev_warn(rdev->dev, "(%d) create WB bo failed\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index f678257..041943d 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -176,6 +176,7 @@
 		return false;
 	}
 }
+
 void
 radeon_link_encoder_connector(struct drm_device *dev)
 {
@@ -228,6 +229,27 @@
 	return NULL;
 }
 
+struct drm_encoder *radeon_atom_get_external_encoder(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_encoder *other_encoder;
+	struct radeon_encoder *other_radeon_encoder;
+
+	if (radeon_encoder->is_ext_encoder)
+		return NULL;
+
+	list_for_each_entry(other_encoder, &dev->mode_config.encoder_list, head) {
+		if (other_encoder == encoder)
+			continue;
+		other_radeon_encoder = to_radeon_encoder(other_encoder);
+		if (other_radeon_encoder->is_ext_encoder &&
+		    (radeon_encoder->devices & other_radeon_encoder->devices))
+			return other_encoder;
+	}
+	return NULL;
+}
+
 void radeon_panel_mode_fixup(struct drm_encoder *encoder,
 			     struct drm_display_mode *adjusted_mode)
 {
@@ -426,52 +448,49 @@
 
 }
 
+union dvo_encoder_control {
+	ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION ext_tmds;
+	DVO_ENCODER_CONTROL_PS_ALLOCATION dvo;
+	DVO_ENCODER_CONTROL_PS_ALLOCATION_V3 dvo_v3;
+};
+
 void
-atombios_external_tmds_setup(struct drm_encoder *encoder, int action)
+atombios_dvo_setup(struct drm_encoder *encoder, int action)
 {
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION args;
-	int index = 0;
+	union dvo_encoder_control args;
+	int index = GetIndexIntoMasterTable(COMMAND, DVOEncoderControl);
 
 	memset(&args, 0, sizeof(args));
 
-	index = GetIndexIntoMasterTable(COMMAND, DVOEncoderControl);
+	if (ASIC_IS_DCE3(rdev)) {
+		/* DCE3+ */
+		args.dvo_v3.ucAction = action;
+		args.dvo_v3.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+		args.dvo_v3.ucDVOConfig = 0; /* XXX */
+	} else if (ASIC_IS_DCE2(rdev)) {
+		/* DCE2 (pre-DCE3 R6xx, RS600/690/740 */
+		args.dvo.sDVOEncoder.ucAction = action;
+		args.dvo.sDVOEncoder.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+		/* DFP1, CRT1, TV1 depending on the type of port */
+		args.dvo.sDVOEncoder.ucDeviceType = ATOM_DEVICE_DFP1_INDEX;
 
-	args.sXTmdsEncoder.ucEnable = action;
+		if (radeon_encoder->pixel_clock > 165000)
+			args.dvo.sDVOEncoder.usDevAttr.sDigAttrib.ucAttribute |= PANEL_ENCODER_MISC_DUAL;
+	} else {
+		/* R4xx, R5xx */
+		args.ext_tmds.sXTmdsEncoder.ucEnable = action;
 
-	if (radeon_encoder->pixel_clock > 165000)
-		args.sXTmdsEncoder.ucMisc = PANEL_ENCODER_MISC_DUAL;
+		if (radeon_encoder->pixel_clock > 165000)
+			args.ext_tmds.sXTmdsEncoder.ucMisc |= PANEL_ENCODER_MISC_DUAL;
 
-	/*if (pScrn->rgbBits == 8)*/
-	args.sXTmdsEncoder.ucMisc |= (1 << 1);
+		/*if (pScrn->rgbBits == 8)*/
+		args.ext_tmds.sXTmdsEncoder.ucMisc |= ATOM_PANEL_MISC_888RGB;
+	}
 
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
-
-}
-
-static void
-atombios_ddia_setup(struct drm_encoder *encoder, int action)
-{
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	DVO_ENCODER_CONTROL_PS_ALLOCATION args;
-	int index = 0;
-
-	memset(&args, 0, sizeof(args));
-
-	index = GetIndexIntoMasterTable(COMMAND, DVOEncoderControl);
-
-	args.sDVOEncoder.ucAction = action;
-	args.sDVOEncoder.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
-
-	if (radeon_encoder->pixel_clock > 165000)
-		args.sDVOEncoder.usDevAttr.sDigAttrib.ucAttribute = PANEL_ENCODER_MISC_DUAL;
-
-	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
-
 }
 
 union lvds_encoder_control {
@@ -532,14 +551,14 @@
 				if (dig->lcd_misc & ATOM_PANEL_MISC_DUAL)
 					args.v1.ucMisc |= PANEL_ENCODER_MISC_DUAL;
 				if (dig->lcd_misc & ATOM_PANEL_MISC_888RGB)
-					args.v1.ucMisc |= (1 << 1);
+					args.v1.ucMisc |= ATOM_PANEL_MISC_888RGB;
 			} else {
 				if (dig->linkb)
 					args.v1.ucMisc |= PANEL_ENCODER_MISC_TMDS_LINKB;
 				if (radeon_encoder->pixel_clock > 165000)
 					args.v1.ucMisc |= PANEL_ENCODER_MISC_DUAL;
 				/*if (pScrn->rgbBits == 8) */
-				args.v1.ucMisc |= (1 << 1);
+				args.v1.ucMisc |= ATOM_PANEL_MISC_888RGB;
 			}
 			break;
 		case 2:
@@ -595,6 +614,7 @@
 int
 atombios_get_encoder_mode(struct drm_encoder *encoder)
 {
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct drm_connector *connector;
@@ -602,9 +622,20 @@
 	struct radeon_connector_atom_dig *dig_connector;
 
 	connector = radeon_get_connector_for_encoder(encoder);
-	if (!connector)
-		return 0;
-
+	if (!connector) {
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+			return ATOM_ENCODER_MODE_DVI;
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+		default:
+			return ATOM_ENCODER_MODE_CRT;
+		}
+	}
 	radeon_connector = to_radeon_connector(connector);
 
 	switch (connector->connector_type) {
@@ -834,6 +865,9 @@
 	memset(&args, 0, sizeof(args));
 
 	switch (radeon_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+		index = GetIndexIntoMasterTable(COMMAND, DVOOutputControl);
+		break;
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
@@ -978,6 +1012,105 @@
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
+void
+atombios_set_edp_panel_power(struct drm_connector *connector, int action)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	struct drm_device *dev = radeon_connector->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	union dig_transmitter_control args;
+	int index = GetIndexIntoMasterTable(COMMAND, UNIPHYTransmitterControl);
+	uint8_t frev, crev;
+
+	if (connector->connector_type != DRM_MODE_CONNECTOR_eDP)
+		return;
+
+	if (!ASIC_IS_DCE4(rdev))
+		return;
+
+	if ((action != ATOM_TRANSMITTER_ACTION_POWER_ON) ||
+	    (action != ATOM_TRANSMITTER_ACTION_POWER_OFF))
+		return;
+
+	if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev))
+		return;
+
+	memset(&args, 0, sizeof(args));
+
+	args.v1.ucAction = action;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+union external_encoder_control {
+	EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION v1;
+};
+
+static void
+atombios_external_encoder_setup(struct drm_encoder *encoder,
+				struct drm_encoder *ext_encoder,
+				int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	union external_encoder_control args;
+	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
+	int index = GetIndexIntoMasterTable(COMMAND, ExternalEncoderControl);
+	u8 frev, crev;
+	int dp_clock = 0;
+	int dp_lane_count = 0;
+	int connector_object_id = 0;
+
+	if (connector) {
+		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+		struct radeon_connector_atom_dig *dig_connector =
+			radeon_connector->con_priv;
+
+		dp_clock = dig_connector->dp_clock;
+		dp_lane_count = dig_connector->dp_lane_count;
+		connector_object_id =
+			(radeon_connector->connector_object_id & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+	}
+
+	memset(&args, 0, sizeof(args));
+
+	if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev))
+		return;
+
+	switch (frev) {
+	case 1:
+		/* no params on frev 1 */
+		break;
+	case 2:
+		switch (crev) {
+		case 1:
+		case 2:
+			args.v1.sDigEncoder.ucAction = action;
+			args.v1.sDigEncoder.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+			args.v1.sDigEncoder.ucEncoderMode = atombios_get_encoder_mode(encoder);
+
+			if (args.v1.sDigEncoder.ucEncoderMode == ATOM_ENCODER_MODE_DP) {
+				if (dp_clock == 270000)
+					args.v1.sDigEncoder.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+				args.v1.sDigEncoder.ucLaneNum = dp_lane_count;
+			} else if (radeon_encoder->pixel_clock > 165000)
+				args.v1.sDigEncoder.ucLaneNum = 8;
+			else
+				args.v1.sDigEncoder.ucLaneNum = 4;
+			break;
+		default:
+			DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
+			return;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
+		return;
+	}
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
 static void
 atombios_yuv_setup(struct drm_encoder *encoder, bool enable)
 {
@@ -1021,6 +1154,7 @@
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_encoder *ext_encoder = radeon_atom_get_external_encoder(encoder);
 	DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION args;
 	int index = 0;
 	bool is_dig = false;
@@ -1043,9 +1177,14 @@
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
-	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
 		index = GetIndexIntoMasterTable(COMMAND, DVOOutputControl);
 		break;
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+		if (ASIC_IS_DCE3(rdev))
+			is_dig = true;
+		else
+			index = GetIndexIntoMasterTable(COMMAND, DVOOutputControl);
+		break;
 	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
 		index = GetIndexIntoMasterTable(COMMAND, LCD1OutputControl);
 		break;
@@ -1082,34 +1221,85 @@
 			if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_DP) {
 				struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
 
+				if (connector &&
+				    (connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
+					struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+					struct radeon_connector_atom_dig *radeon_dig_connector =
+						radeon_connector->con_priv;
+					atombios_set_edp_panel_power(connector,
+								     ATOM_TRANSMITTER_ACTION_POWER_ON);
+					radeon_dig_connector->edp_on = true;
+				}
 				dp_link_train(encoder, connector);
 				if (ASIC_IS_DCE4(rdev))
 					atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON);
 			}
+			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+				atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0);
 			break;
 		case DRM_MODE_DPMS_STANDBY:
 		case DRM_MODE_DPMS_SUSPEND:
 		case DRM_MODE_DPMS_OFF:
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE_OUTPUT, 0, 0);
 			if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_DP) {
+				struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
+
 				if (ASIC_IS_DCE4(rdev))
 					atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_OFF);
+				if (connector &&
+				    (connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
+					struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+					struct radeon_connector_atom_dig *radeon_dig_connector =
+						radeon_connector->con_priv;
+					atombios_set_edp_panel_power(connector,
+								     ATOM_TRANSMITTER_ACTION_POWER_OFF);
+					radeon_dig_connector->edp_on = false;
+				}
 			}
+			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+				atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
 			break;
 		}
 	} else {
 		switch (mode) {
 		case DRM_MODE_DPMS_ON:
 			args.ucAction = ATOM_ENABLE;
+			atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+				args.ucAction = ATOM_LCD_BLON;
+				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+			}
 			break;
 		case DRM_MODE_DPMS_STANDBY:
 		case DRM_MODE_DPMS_SUSPEND:
 		case DRM_MODE_DPMS_OFF:
 			args.ucAction = ATOM_DISABLE;
+			atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+				args.ucAction = ATOM_LCD_BLOFF;
+				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+			}
 			break;
 		}
-		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 	}
+
+	if (ext_encoder) {
+		int action;
+
+		switch (mode) {
+		case DRM_MODE_DPMS_ON:
+		default:
+			action = ATOM_ENABLE;
+			break;
+		case DRM_MODE_DPMS_STANDBY:
+		case DRM_MODE_DPMS_SUSPEND:
+		case DRM_MODE_DPMS_OFF:
+			action = ATOM_DISABLE;
+			break;
+		}
+		atombios_external_encoder_setup(encoder, ext_encoder, action);
+	}
+
 	radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 
 }
@@ -1242,7 +1432,7 @@
 		break;
 	default:
 		DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
-		break;
+		return;
 	}
 
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
@@ -1357,6 +1547,7 @@
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_encoder *ext_encoder = radeon_atom_get_external_encoder(encoder);
 
 	radeon_encoder->pixel_clock = adjusted_mode->clock;
 
@@ -1400,11 +1591,9 @@
 		}
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
-		atombios_ddia_setup(encoder, ATOM_ENABLE);
-		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
-		atombios_external_tmds_setup(encoder, ATOM_ENABLE);
+		atombios_dvo_setup(encoder, ATOM_ENABLE);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
@@ -1419,6 +1608,11 @@
 		}
 		break;
 	}
+
+	if (ext_encoder) {
+		atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE);
+	}
+
 	atombios_apply_encoder_quirks(encoder, adjusted_mode);
 
 	if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
@@ -1595,11 +1789,9 @@
 		}
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
-		atombios_ddia_setup(encoder, ATOM_DISABLE);
-		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
-		atombios_external_tmds_setup(encoder, ATOM_DISABLE);
+		atombios_dvo_setup(encoder, ATOM_DISABLE);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
@@ -1621,6 +1813,53 @@
 	radeon_encoder->active_device = 0;
 }
 
+/* these are handled by the primary encoders */
+static void radeon_atom_ext_prepare(struct drm_encoder *encoder)
+{
+
+}
+
+static void radeon_atom_ext_commit(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+radeon_atom_ext_mode_set(struct drm_encoder *encoder,
+			 struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode)
+{
+
+}
+
+static void radeon_atom_ext_disable(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+radeon_atom_ext_dpms(struct drm_encoder *encoder, int mode)
+{
+
+}
+
+static bool radeon_atom_ext_mode_fixup(struct drm_encoder *encoder,
+				       struct drm_display_mode *mode,
+				       struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static const struct drm_encoder_helper_funcs radeon_atom_ext_helper_funcs = {
+	.dpms = radeon_atom_ext_dpms,
+	.mode_fixup = radeon_atom_ext_mode_fixup,
+	.prepare = radeon_atom_ext_prepare,
+	.mode_set = radeon_atom_ext_mode_set,
+	.commit = radeon_atom_ext_commit,
+	.disable = radeon_atom_ext_disable,
+	/* no detect for TMDS/LVDS yet */
+};
+
 static const struct drm_encoder_helper_funcs radeon_atom_dig_helper_funcs = {
 	.dpms = radeon_atom_encoder_dpms,
 	.mode_fixup = radeon_atom_mode_fixup,
@@ -1730,6 +1969,7 @@
 	radeon_encoder->devices = supported_device;
 	radeon_encoder->rmx_type = RMX_OFF;
 	radeon_encoder->underscan_type = UNDERSCAN_OFF;
+	radeon_encoder->is_ext_encoder = false;
 
 	switch (radeon_encoder->encoder_id) {
 	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
@@ -1771,6 +2011,9 @@
 			radeon_encoder->rmx_type = RMX_FULL;
 			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_LVDS);
 			radeon_encoder->enc_priv = radeon_atombios_get_lvds_info(radeon_encoder);
+		} else if (radeon_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_DAC);
+			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
 		} else {
 			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
 			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
@@ -1779,5 +2022,22 @@
 		}
 		drm_encoder_helper_add(encoder, &radeon_atom_dig_helper_funcs);
 		break;
+	case ENCODER_OBJECT_ID_SI170B:
+	case ENCODER_OBJECT_ID_CH7303:
+	case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
+	case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
+	case ENCODER_OBJECT_ID_TITFP513:
+	case ENCODER_OBJECT_ID_VT1623:
+	case ENCODER_OBJECT_ID_HDMI_SI1930:
+		/* these are handled by the primary encoders */
+		radeon_encoder->is_ext_encoder = true;
+		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_LVDS);
+		else if (radeon_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_DAC);
+		else
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
+		drm_encoder_helper_add(encoder, &radeon_atom_ext_helper_funcs);
+		break;
 	}
 }
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index e65b9031..6501611 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -79,8 +79,8 @@
 
 	if (rdev->gart.table.vram.robj == NULL) {
 		r = radeon_bo_create(rdev, NULL, rdev->gart.table_size,
-					true, RADEON_GEM_DOMAIN_VRAM,
-					&rdev->gart.table.vram.robj);
+				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
+				     &rdev->gart.table.vram.robj);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index d1e595d..df95eb8 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -67,7 +67,7 @@
 	if (alignment < PAGE_SIZE) {
 		alignment = PAGE_SIZE;
 	}
-	r = radeon_bo_create(rdev, gobj, size, kernel, initial_domain, &robj);
+	r = radeon_bo_create(rdev, gobj, size, alignment, kernel, initial_domain, &robj);
 	if (r) {
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("Failed to allocate GEM object (%d, %d, %u, %d)\n",
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 0cfbba0..ded2a45 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -896,7 +896,8 @@
 	     ((rdev->family <= CHIP_RS480) ||
 	      ((rdev->family >= CHIP_RV515) && (rdev->family <= CHIP_R580))))) {
 		/* set the radeon hw i2c adapter */
-		sprintf(i2c->adapter.name, "Radeon i2c hw bus %s", name);
+		snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
+			 "Radeon i2c hw bus %s", name);
 		i2c->adapter.algo = &radeon_i2c_algo;
 		ret = i2c_add_adapter(&i2c->adapter);
 		if (ret) {
@@ -905,7 +906,8 @@
 		}
 	} else {
 		/* set the radeon bit adapter */
-		sprintf(i2c->adapter.name, "Radeon i2c bit bus %s", name);
+		snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
+			 "Radeon i2c bit bus %s", name);
 		i2c->adapter.algo_data = &i2c->algo.bit;
 		i2c->algo.bit.pre_xfer = pre_xfer;
 		i2c->algo.bit.post_xfer = post_xfer;
@@ -946,6 +948,8 @@
 	i2c->rec = *rec;
 	i2c->adapter.owner = THIS_MODULE;
 	i2c->dev = dev;
+	snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
+		 "Radeon aux bus %s", name);
 	i2c_set_adapdata(&i2c->adapter, i2c);
 	i2c->adapter.algo_data = &i2c->algo.dp;
 	i2c->algo.dp.aux_ch = radeon_dp_i2c_aux_ch;
diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c
index 2f349a3..465746b 100644
--- a/drivers/gpu/drm/radeon/radeon_irq.c
+++ b/drivers/gpu/drm/radeon/radeon_irq.c
@@ -76,7 +76,7 @@
 		default:
 			DRM_ERROR("tried to enable vblank on non-existent crtc %d\n",
 				  crtc);
-			return EINVAL;
+			return -EINVAL;
 		}
 	} else {
 		switch (crtc) {
@@ -89,7 +89,7 @@
 		default:
 			DRM_ERROR("tried to enable vblank on non-existent crtc %d\n",
 				  crtc);
-			return EINVAL;
+			return -EINVAL;
 		}
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 0b83970..59f834b 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -670,7 +670,7 @@
 
 	if (rdev->is_atom_bios) {
 		radeon_encoder->pixel_clock = adjusted_mode->clock;
-		atombios_external_tmds_setup(encoder, ATOM_ENABLE);
+		atombios_dvo_setup(encoder, ATOM_ENABLE);
 		fp2_gen_cntl = RREG32(RADEON_FP2_GEN_CNTL);
 	} else {
 		fp2_gen_cntl = RREG32(RADEON_FP2_GEN_CNTL);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 680f576..e301c6f 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -375,6 +375,7 @@
 	int hdmi_config_offset;
 	int hdmi_audio_workaround;
 	int hdmi_buffer_status;
+	bool is_ext_encoder;
 };
 
 struct radeon_connector_atom_dig {
@@ -385,6 +386,7 @@
 	u8 dp_sink_type;
 	int dp_clock;
 	int dp_lane_count;
+	bool edp_on;
 };
 
 struct radeon_gpio_rec {
@@ -523,9 +525,10 @@
 struct drm_encoder *radeon_encoder_legacy_tv_dac_add(struct drm_device *dev, int bios_index, int with_tv);
 struct drm_encoder *radeon_encoder_legacy_tmds_int_add(struct drm_device *dev, int bios_index);
 struct drm_encoder *radeon_encoder_legacy_tmds_ext_add(struct drm_device *dev, int bios_index);
-extern void atombios_external_tmds_setup(struct drm_encoder *encoder, int action);
+extern void atombios_dvo_setup(struct drm_encoder *encoder, int action);
 extern void atombios_digital_setup(struct drm_encoder *encoder, int action);
 extern int atombios_get_encoder_mode(struct drm_encoder *encoder);
+extern void atombios_set_edp_panel_power(struct drm_connector *connector, int action);
 extern void radeon_encoder_set_active_device(struct drm_encoder *encoder);
 
 extern void radeon_crtc_load_lut(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 8eb1834..1d06774 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -86,11 +86,12 @@
 }
 
 int radeon_bo_create(struct radeon_device *rdev, struct drm_gem_object *gobj,
-			unsigned long size, bool kernel, u32 domain,
-			struct radeon_bo **bo_ptr)
+		     unsigned long size, int byte_align, bool kernel, u32 domain,
+		     struct radeon_bo **bo_ptr)
 {
 	struct radeon_bo *bo;
 	enum ttm_bo_type type;
+	int page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
 	int r;
 
 	if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
@@ -115,7 +116,7 @@
 	/* Kernel allocation are uninterruptible */
 	mutex_lock(&rdev->vram_mutex);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
-			&bo->placement, 0, 0, !kernel, NULL, size,
+			&bo->placement, page_align, 0, !kernel, NULL, size,
 			&radeon_ttm_bo_destroy);
 	mutex_unlock(&rdev->vram_mutex);
 	if (unlikely(r != 0)) {
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 3481bc7..d143702 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -137,9 +137,10 @@
 }
 
 extern int radeon_bo_create(struct radeon_device *rdev,
-				struct drm_gem_object *gobj, unsigned long size,
-				bool kernel, u32 domain,
-				struct radeon_bo **bo_ptr);
+			    struct drm_gem_object *gobj, unsigned long size,
+			    int byte_align,
+			    bool kernel, u32 domain,
+			    struct radeon_bo **bo_ptr);
 extern int radeon_bo_kmap(struct radeon_bo *bo, void **ptr);
 extern void radeon_bo_kunmap(struct radeon_bo *bo);
 extern void radeon_bo_unref(struct radeon_bo **bo);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 6ea798c..06e7982 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -176,8 +176,8 @@
 	INIT_LIST_HEAD(&rdev->ib_pool.bogus_ib);
 	/* Allocate 1M object buffer */
 	r = radeon_bo_create(rdev, NULL,  RADEON_IB_POOL_SIZE*64*1024,
-				true, RADEON_GEM_DOMAIN_GTT,
-				&rdev->ib_pool.robj);
+			     PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
+			     &rdev->ib_pool.robj);
 	if (r) {
 		DRM_ERROR("radeon: failed to ib pool (%d).\n", r);
 		return r;
@@ -332,7 +332,7 @@
 	rdev->cp.ring_size = ring_size;
 	/* Allocate ring buffer */
 	if (rdev->cp.ring_obj == NULL) {
-		r = radeon_bo_create(rdev, NULL, rdev->cp.ring_size, true,
+		r = radeon_bo_create(rdev, NULL, rdev->cp.ring_size, PAGE_SIZE, true,
 					RADEON_GEM_DOMAIN_GTT,
 					&rdev->cp.ring_obj);
 		if (r) {
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 313c96b..5b44f65 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -52,7 +52,7 @@
 		goto out_cleanup;
 	}
 
-	r = radeon_bo_create(rdev, NULL, size, true, RADEON_GEM_DOMAIN_VRAM,
+	r = radeon_bo_create(rdev, NULL, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
 				&vram_obj);
 	if (r) {
 		DRM_ERROR("Failed to create VRAM object\n");
@@ -71,7 +71,7 @@
 		void **gtt_start, **gtt_end;
 		void **vram_start, **vram_end;
 
-		r = radeon_bo_create(rdev, NULL, size, true,
+		r = radeon_bo_create(rdev, NULL, size, PAGE_SIZE, true,
 					 RADEON_GEM_DOMAIN_GTT, gtt_obj + i);
 		if (r) {
 			DRM_ERROR("Failed to create GTT object %d\n", i);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 01c2c73..1272e4b 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -529,7 +529,7 @@
 		DRM_ERROR("Failed initializing VRAM heap.\n");
 		return r;
 	}
-	r = radeon_bo_create(rdev, NULL, 256 * 1024, true,
+	r = radeon_bo_create(rdev, NULL, 256 * 1024, PAGE_SIZE, true,
 				RADEON_GEM_DOMAIN_VRAM,
 				&rdev->stollen_vga_memory);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 245374e..4dfead8 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -915,8 +915,8 @@
 
 	if (rdev->vram_scratch.robj == NULL) {
 		r = radeon_bo_create(rdev, NULL, RADEON_GPU_PAGE_SIZE,
-					true, RADEON_GEM_DOMAIN_VRAM,
-					&rdev->vram_scratch.robj);
+				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
+				     &rdev->vram_scratch.robj);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3ca77dc..148a322 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -224,6 +224,9 @@
 	int ret;
 
 	while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) {
+		/**
+		 * Deadlock avoidance for multi-bo reserving.
+		 */
 		if (use_sequence && bo->seq_valid &&
 			(sequence - bo->val_seq < (1 << 31))) {
 			return -EAGAIN;
@@ -241,6 +244,14 @@
 	}
 
 	if (use_sequence) {
+		/**
+		 * Wake up waiters that may need to recheck for deadlock,
+		 * if we decreased the sequence number.
+		 */
+		if (unlikely((bo->val_seq - sequence < (1 << 31))
+			     || !bo->seq_valid))
+			wake_up_all(&bo->event_queue);
+
 		bo->val_seq = sequence;
 		bo->seq_valid = true;
 	} else {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 36e129f..5408b1b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -862,7 +862,7 @@
 			      &vmw_vram_sys_placement, true,
 			      &vmw_user_dmabuf_destroy);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_no_dmabuf;
 
 	tmp = ttm_bo_reference(&vmw_user_bo->dma.base);
 	ret = ttm_base_object_init(vmw_fpriv(file_priv)->tfile,
@@ -870,19 +870,21 @@
 				   false,
 				   ttm_buffer_type,
 				   &vmw_user_dmabuf_release, NULL);
-	if (unlikely(ret != 0)) {
-		ttm_bo_unref(&tmp);
-	} else {
+	if (unlikely(ret != 0))
+		goto out_no_base_object;
+	else {
 		rep->handle = vmw_user_bo->base.hash.key;
 		rep->map_handle = vmw_user_bo->dma.base.addr_space_offset;
 		rep->cur_gmr_id = vmw_user_bo->base.hash.key;
 		rep->cur_gmr_offset = 0;
 	}
-	ttm_bo_unref(&tmp);
 
+out_no_base_object:
+	ttm_bo_unref(&tmp);
+out_no_dmabuf:
 	ttm_read_unlock(&vmaster->lock);
 
-	return 0;
+	return ret;
 }
 
 int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 8a4b32d..e1f0748 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -32,7 +32,6 @@
 #include <linux/hid.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 
 #include <linux/hidraw.h>
 
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index fedd88d..984feb3 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -29,7 +29,6 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <linux/input.h>
 #include <linux/usb.h>
 #include <linux/hid.h>
diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c
index fa9708c..4033974 100644
--- a/drivers/hwmon/amc6821.c
+++ b/drivers/hwmon/amc6821.c
@@ -4,7 +4,7 @@
 	Copyright (C) 2009 T. Mertelj <tomaz.mertelj@guest.arnes.si>
 
 	Based on max6650.c:
-	Copyright (C) 2007 Hans J. Koch <hjk@linutronix.de>
+	Copyright (C) 2007 Hans J. Koch <hjk@hansjkoch.de>
 
 	This program is free software; you can redistribute it and/or modify
 	it under the terms of the GNU General Public License as published by
diff --git a/drivers/hwmon/i5k_amb.c b/drivers/hwmon/i5k_amb.c
index 9379834..c4c40be 100644
--- a/drivers/hwmon/i5k_amb.c
+++ b/drivers/hwmon/i5k_amb.c
@@ -497,12 +497,14 @@
 	0
 };
 
+#ifdef MODULE
 static struct pci_device_id i5k_amb_ids[] __devinitdata = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, i5k_amb_ids);
+#endif
 
 static int __devinit i5k_amb_probe(struct platform_device *pdev)
 {
diff --git a/drivers/hwmon/lis3lv02d_i2c.c b/drivers/hwmon/lis3lv02d_i2c.c
index 9f4bae0..8853afc 100644
--- a/drivers/hwmon/lis3lv02d_i2c.c
+++ b/drivers/hwmon/lis3lv02d_i2c.c
@@ -186,7 +186,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int lis3lv02d_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
@@ -213,12 +213,9 @@
 
 	return 0;
 }
-#else
-#define lis3lv02d_i2c_suspend	NULL
-#define lis3lv02d_i2c_resume	NULL
-#define lis3lv02d_i2c_shutdown	NULL
-#endif
+#endif /* CONFIG_PM_SLEEP */
 
+#ifdef CONFIG_PM_RUNTIME
 static int lis3_i2c_runtime_suspend(struct device *dev)
 {
 	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
@@ -236,6 +233,7 @@
 	lis3lv02d_poweron(lis3);
 	return 0;
 }
+#endif /* CONFIG_PM_RUNTIME */
 
 static const struct i2c_device_id lis3lv02d_id[] = {
 	{"lis3lv02d", 0 },
diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c
index 6669255..c9ed14e 100644
--- a/drivers/hwmon/lm93.c
+++ b/drivers/hwmon/lm93.c
@@ -20,7 +20,7 @@
     Adapted to 2.6.20 by Carsten Emde <cbe@osadl.org>
         Copyright (c) 2006 Carsten Emde, Open Source Automation Development Lab
 
-    Modified for mainline integration by Hans J. Koch <hjk@linutronix.de>
+    Modified for mainline integration by Hans J. Koch <hjk@hansjkoch.de>
         Copyright (c) 2007 Hans J. Koch, Linutronix GmbH
 
     This program is free software; you can redistribute it and/or modify
@@ -2629,7 +2629,7 @@
 }
 
 MODULE_AUTHOR("Mark M. Hoffman <mhoffman@lightlink.com>, "
-		"Hans J. Koch <hjk@linutronix.de");
+		"Hans J. Koch <hjk@hansjkoch.de>");
 MODULE_DESCRIPTION("LM93 driver");
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/hwmon/lm95241.c b/drivers/hwmon/lm95241.c
index 464340f..4546d82 100644
--- a/drivers/hwmon/lm95241.c
+++ b/drivers/hwmon/lm95241.c
@@ -128,9 +128,12 @@
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct lm95241_data *data = i2c_get_clientdata(client);
+	unsigned long val;
 
-	strict_strtol(buf, 10, &data->interval);
-	data->interval = data->interval * HZ / 1000;
+	if (strict_strtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	data->interval = val * HZ / 1000;
 
 	return count;
 }
@@ -188,7 +191,9 @@
 	struct lm95241_data *data = i2c_get_clientdata(client); \
 \
 	long val; \
-	strict_strtol(buf, 10, &val); \
+\
+	if (strict_strtol(buf, 10, &val) < 0) \
+		return -EINVAL; \
 \
 	if ((val == 1) || (val == 2)) { \
 \
@@ -227,7 +232,9 @@
 	struct lm95241_data *data = i2c_get_clientdata(client); \
 \
 	long val; \
-	strict_strtol(buf, 10, &val); \
+\
+	if (strict_strtol(buf, 10, &val) < 0) \
+		return -EINVAL;\
 \
 	mutex_lock(&data->update_lock); \
 \
@@ -256,7 +263,9 @@
 	struct lm95241_data *data = i2c_get_clientdata(client); \
 \
 	long val; \
-	strict_strtol(buf, 10, &val); \
+\
+	if (strict_strtol(buf, 10, &val) < 0) \
+		return -EINVAL; \
 \
 	mutex_lock(&data->update_lock); \
 \
diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c
index a0160ee..9a11532 100644
--- a/drivers/hwmon/max6650.c
+++ b/drivers/hwmon/max6650.c
@@ -2,7 +2,7 @@
  * max6650.c - Part of lm_sensors, Linux kernel modules for hardware
  *             monitoring.
  *
- * (C) 2007 by Hans J. Koch <hjk@linutronix.de>
+ * (C) 2007 by Hans J. Koch <hjk@hansjkoch.de>
  *
  * based on code written by John Morris <john.morris@spirentcom.com>
  * Copyright (c) 2003 Spirent Communications
diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c
index 1d840aa..cdbc744 100644
--- a/drivers/hwmon/w83795.c
+++ b/drivers/hwmon/w83795.c
@@ -165,10 +165,14 @@
 
 #define W83795_REG_VID_CTRL		0x6A
 
+#define W83795_REG_ALARM_CTRL		0x40
+#define ALARM_CTRL_RTSACS		(1 << 7)
 #define W83795_REG_ALARM(index)		(0x41 + (index))
+#define W83795_REG_CLR_CHASSIS		0x4D
 #define W83795_REG_BEEP(index)		(0x50 + (index))
 
-#define W83795_REG_CLR_CHASSIS		0x4D
+#define W83795_REG_OVT_CFG		0x58
+#define OVT_CFG_SEL			(1 << 7)
 
 
 #define W83795_REG_FCMS1		0x201
@@ -178,6 +182,14 @@
 
 #define W83795_REG_TSS(index)		(0x209 + (index))
 
+#define TSS_MAP_RESERVED		0xff
+static const u8 tss_map[4][6] = {
+	{ 0,  1,  2,  3,  4,  5},
+	{ 6,  7,  8,  9,  0,  1},
+	{10, 11, 12, 13,  2,  3},
+	{ 4,  5,  4,  5, TSS_MAP_RESERVED, TSS_MAP_RESERVED},
+};
+
 #define PWM_OUTPUT			0
 #define PWM_FREQ			1
 #define PWM_START			2
@@ -369,6 +381,7 @@
 	u8 setup_pwm[3];	/* Register value */
 
 	u8 alarms[6];		/* Register value */
+	u8 enable_beep;
 	u8 beeps[6];		/* Register value */
 
 	char valid;
@@ -499,8 +512,11 @@
 	}
 
 	/* Read beep settings */
-	for (i = 0; i < ARRAY_SIZE(data->beeps); i++)
-		data->beeps[i] = w83795_read(client, W83795_REG_BEEP(i));
+	if (data->enable_beep) {
+		for (i = 0; i < ARRAY_SIZE(data->beeps); i++)
+			data->beeps[i] =
+				w83795_read(client, W83795_REG_BEEP(i));
+	}
 
 	data->valid_limits = 1;
 }
@@ -577,6 +593,7 @@
 	struct i2c_client *client = to_i2c_client(dev);
 	struct w83795_data *data = i2c_get_clientdata(client);
 	u16 tmp;
+	u8 intrusion;
 	int i;
 
 	mutex_lock(&data->update_lock);
@@ -648,9 +665,24 @@
 		    w83795_read(client, W83795_REG_PWM(i, PWM_OUTPUT));
 	}
 
-	/* update alarm */
+	/* Update intrusion and alarms
+	 * It is important to read intrusion first, because reading from
+	 * register SMI STS6 clears the interrupt status temporarily. */
+	tmp = w83795_read(client, W83795_REG_ALARM_CTRL);
+	/* Switch to interrupt status for intrusion if needed */
+	if (tmp & ALARM_CTRL_RTSACS)
+		w83795_write(client, W83795_REG_ALARM_CTRL,
+			     tmp & ~ALARM_CTRL_RTSACS);
+	intrusion = w83795_read(client, W83795_REG_ALARM(5)) & (1 << 6);
+	/* Switch to real-time alarms */
+	w83795_write(client, W83795_REG_ALARM_CTRL, tmp | ALARM_CTRL_RTSACS);
 	for (i = 0; i < ARRAY_SIZE(data->alarms); i++)
 		data->alarms[i] = w83795_read(client, W83795_REG_ALARM(i));
+	data->alarms[5] |= intrusion;
+	/* Restore original configuration if needed */
+	if (!(tmp & ALARM_CTRL_RTSACS))
+		w83795_write(client, W83795_REG_ALARM_CTRL,
+			     tmp & ~ALARM_CTRL_RTSACS);
 
 	data->last_updated = jiffies;
 	data->valid = 1;
@@ -730,6 +762,10 @@
 	val = w83795_read(client, W83795_REG_CLR_CHASSIS);
 	val |= 0x80;
 	w83795_write(client, W83795_REG_CLR_CHASSIS, val);
+
+	/* Clear status and force cache refresh */
+	w83795_read(client, W83795_REG_ALARM(5));
+	data->valid = 0;
 	mutex_unlock(&data->update_lock);
 	return count;
 }
@@ -857,20 +893,20 @@
 	int index = sensor_attr->index;
 	u8 tmp;
 
-	if (1 == (data->pwm_fcms[0] & (1 << index))) {
+	/* Speed cruise mode */
+	if (data->pwm_fcms[0] & (1 << index)) {
 		tmp = 2;
 		goto out;
 	}
+	/* Thermal cruise or SmartFan IV mode */
 	for (tmp = 0; tmp < 6; tmp++) {
 		if (data->pwm_tfmr[tmp] & (1 << index)) {
 			tmp = 3;
 			goto out;
 		}
 	}
-	if (data->pwm_fomc & (1 << index))
-		tmp = 0;
-	else
-		tmp = 1;
+	/* Manual mode */
+	tmp = 1;
 
 out:
 	return sprintf(buf, "%u\n", tmp);
@@ -890,23 +926,21 @@
 
 	if (strict_strtoul(buf, 10, &val) < 0)
 		return -EINVAL;
-	if (val > 2)
+	if (val < 1 || val > 2)
 		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
 	switch (val) {
-	case 0:
 	case 1:
+		/* Clear speed cruise mode bits */
 		data->pwm_fcms[0] &= ~(1 << index);
 		w83795_write(client, W83795_REG_FCMS1, data->pwm_fcms[0]);
+		/* Clear thermal cruise mode bits */
 		for (i = 0; i < 6; i++) {
 			data->pwm_tfmr[i] &= ~(1 << index);
 			w83795_write(client, W83795_REG_TFMR(i),
 				data->pwm_tfmr[i]);
 		}
-		data->pwm_fomc |= 1 << index;
-		data->pwm_fomc ^= val << index;
-		w83795_write(client, W83795_REG_FOMC, data->pwm_fomc);
 		break;
 	case 2:
 		data->pwm_fcms[0] |= (1 << index);
@@ -918,23 +952,60 @@
 }
 
 static ssize_t
+show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct w83795_data *data = w83795_update_pwm_config(dev);
+	int index = to_sensor_dev_attr_2(attr)->index;
+	unsigned int mode;
+
+	if (data->pwm_fomc & (1 << index))
+		mode = 0;	/* DC */
+	else
+		mode = 1;	/* PWM */
+
+	return sprintf(buf, "%u\n", mode);
+}
+
+/*
+ * Check whether a given temperature source can ever be useful.
+ * Returns the number of selectable temperature channels which are
+ * enabled.
+ */
+static int w83795_tss_useful(const struct w83795_data *data, int tsrc)
+{
+	int useful = 0, i;
+
+	for (i = 0; i < 4; i++) {
+		if (tss_map[i][tsrc] == TSS_MAP_RESERVED)
+			continue;
+		if (tss_map[i][tsrc] < 6)	/* Analog */
+			useful += (data->has_temp >> tss_map[i][tsrc]) & 1;
+		else				/* Digital */
+			useful += (data->has_dts >> (tss_map[i][tsrc] - 6)) & 1;
+	}
+
+	return useful;
+}
+
+static ssize_t
 show_temp_src(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct sensor_device_attribute_2 *sensor_attr =
 	    to_sensor_dev_attr_2(attr);
 	struct w83795_data *data = w83795_update_pwm_config(dev);
 	int index = sensor_attr->index;
-	u8 val = index / 2;
-	u8 tmp = data->temp_src[val];
+	u8 tmp = data->temp_src[index / 2];
 
 	if (index & 1)
-		val = 4;
+		tmp >>= 4;	/* Pick high nibble */
 	else
-		val = 0;
-	tmp >>= val;
-	tmp &= 0x0f;
+		tmp &= 0x0f;	/* Pick low nibble */
 
-	return sprintf(buf, "%u\n", tmp);
+	/* Look-up the actual temperature channel number */
+	if (tmp >= 4 || tss_map[tmp][index] == TSS_MAP_RESERVED)
+		return -EINVAL;		/* Shouldn't happen */
+
+	return sprintf(buf, "%u\n", (unsigned int)tss_map[tmp][index] + 1);
 }
 
 static ssize_t
@@ -946,12 +1017,21 @@
 	struct sensor_device_attribute_2 *sensor_attr =
 	    to_sensor_dev_attr_2(attr);
 	int index = sensor_attr->index;
-	unsigned long tmp;
+	int tmp;
+	unsigned long channel;
 	u8 val = index / 2;
 
-	if (strict_strtoul(buf, 10, &tmp) < 0)
+	if (strict_strtoul(buf, 10, &channel) < 0 ||
+	    channel < 1 || channel > 14)
 		return -EINVAL;
-	tmp = SENSORS_LIMIT(tmp, 0, 15);
+
+	/* Check if request can be fulfilled */
+	for (tmp = 0; tmp < 4; tmp++) {
+		if (tss_map[tmp][index] == channel - 1)
+			break;
+	}
+	if (tmp == 4)	/* No match */
+		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
 	if (index & 1) {
@@ -1515,7 +1595,7 @@
 
 #define NOT_USED			-1
 
-/* Don't change the attribute order, _max and _min are accessed by index
+/* Don't change the attribute order, _max, _min and _beep are accessed by index
  * somewhere else in the code */
 #define SENSOR_ATTR_IN(index) {						\
 	SENSOR_ATTR_2(in##index##_input, S_IRUGO, show_in, NULL,	\
@@ -1530,6 +1610,8 @@
 		show_alarm_beep, store_beep, BEEP_ENABLE,		\
 		index + ((index > 14) ? 1 : 0)) }
 
+/* Don't change the attribute order, _beep is accessed by index
+ * somewhere else in the code */
 #define SENSOR_ATTR_FAN(index) {					\
 	SENSOR_ATTR_2(fan##index##_input, S_IRUGO, show_fan,		\
 		NULL, FAN_INPUT, index - 1), \
@@ -1553,9 +1635,13 @@
 		show_pwm, store_pwm, PWM_FREQ, index - 1),	 \
 	SENSOR_ATTR_2(pwm##index##_enable, S_IWUSR | S_IRUGO,		\
 		show_pwm_enable, store_pwm_enable, NOT_USED, index - 1), \
+	SENSOR_ATTR_2(pwm##index##_mode, S_IRUGO,			\
+		show_pwm_mode, NULL, NOT_USED, index - 1),		\
 	SENSOR_ATTR_2(fan##index##_target, S_IWUSR | S_IRUGO, \
 		show_fanin, store_fanin, FANIN_TARGET, index - 1) }
 
+/* Don't change the attribute order, _beep is accessed by index
+ * somewhere else in the code */
 #define SENSOR_ATTR_DTS(index) {					\
 	SENSOR_ATTR_2(temp##index##_type, S_IRUGO ,		\
 		show_dts_mode, NULL, NOT_USED, index - 7),	\
@@ -1574,6 +1660,8 @@
 	SENSOR_ATTR_2(temp##index##_beep, S_IWUSR | S_IRUGO,		\
 		show_alarm_beep, store_beep, BEEP_ENABLE, index + 17) }
 
+/* Don't change the attribute order, _beep is accessed by index
+ * somewhere else in the code */
 #define SENSOR_ATTR_TEMP(index) {					\
 	SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 4 ? S_IWUSR : 0), \
 		show_temp_mode, store_temp_mode, NOT_USED, index - 1),	\
@@ -1593,8 +1681,6 @@
 	SENSOR_ATTR_2(temp##index##_beep, S_IWUSR | S_IRUGO,		\
 		show_alarm_beep, store_beep, BEEP_ENABLE,		\
 		index + (index > 4 ? 11 : 17)),				\
-	SENSOR_ATTR_2(temp##index##_source_sel, S_IWUSR | S_IRUGO,	\
-		show_temp_src, store_temp_src, NOT_USED, index - 1),	\
 	SENSOR_ATTR_2(temp##index##_pwm_enable, S_IWUSR | S_IRUGO,	\
 		show_temp_pwm_enable, store_temp_pwm_enable,		\
 		TEMP_PWM_ENABLE, index - 1),				\
@@ -1680,7 +1766,7 @@
 	SENSOR_ATTR_FAN(14),
 };
 
-static const struct sensor_device_attribute_2 w83795_temp[][29] = {
+static const struct sensor_device_attribute_2 w83795_temp[][28] = {
 	SENSOR_ATTR_TEMP(1),
 	SENSOR_ATTR_TEMP(2),
 	SENSOR_ATTR_TEMP(3),
@@ -1700,7 +1786,7 @@
 	SENSOR_ATTR_DTS(14),
 };
 
-static const struct sensor_device_attribute_2 w83795_pwm[][7] = {
+static const struct sensor_device_attribute_2 w83795_pwm[][8] = {
 	SENSOR_ATTR_PWM(1),
 	SENSOR_ATTR_PWM(2),
 	SENSOR_ATTR_PWM(3),
@@ -1711,13 +1797,24 @@
 	SENSOR_ATTR_PWM(8),
 };
 
+static const struct sensor_device_attribute_2 w83795_tss[6] = {
+	SENSOR_ATTR_2(temp1_source_sel, S_IWUSR | S_IRUGO,
+		      show_temp_src, store_temp_src, NOT_USED, 0),
+	SENSOR_ATTR_2(temp2_source_sel, S_IWUSR | S_IRUGO,
+		      show_temp_src, store_temp_src, NOT_USED, 1),
+	SENSOR_ATTR_2(temp3_source_sel, S_IWUSR | S_IRUGO,
+		      show_temp_src, store_temp_src, NOT_USED, 2),
+	SENSOR_ATTR_2(temp4_source_sel, S_IWUSR | S_IRUGO,
+		      show_temp_src, store_temp_src, NOT_USED, 3),
+	SENSOR_ATTR_2(temp5_source_sel, S_IWUSR | S_IRUGO,
+		      show_temp_src, store_temp_src, NOT_USED, 4),
+	SENSOR_ATTR_2(temp6_source_sel, S_IWUSR | S_IRUGO,
+		      show_temp_src, store_temp_src, NOT_USED, 5),
+};
+
 static const struct sensor_device_attribute_2 sda_single_files[] = {
 	SENSOR_ATTR_2(intrusion0_alarm, S_IWUSR | S_IRUGO, show_alarm_beep,
 		      store_chassis_clear, ALARM_STATUS, 46),
-	SENSOR_ATTR_2(intrusion0_beep, S_IWUSR | S_IRUGO, show_alarm_beep,
-		      store_beep, BEEP_ENABLE, 46),
-	SENSOR_ATTR_2(beep_enable, S_IWUSR | S_IRUGO, show_alarm_beep,
-		      store_beep, BEEP_ENABLE, 47),
 #ifdef CONFIG_SENSORS_W83795_FANCTRL
 	SENSOR_ATTR_2(speed_cruise_tolerance, S_IWUSR | S_IRUGO, show_fanin,
 		store_fanin, FANIN_TOL, NOT_USED),
@@ -1730,6 +1827,13 @@
 #endif
 };
 
+static const struct sensor_device_attribute_2 sda_beep_files[] = {
+	SENSOR_ATTR_2(intrusion0_beep, S_IWUSR | S_IRUGO, show_alarm_beep,
+		      store_beep, BEEP_ENABLE, 46),
+	SENSOR_ATTR_2(beep_enable, S_IWUSR | S_IRUGO, show_alarm_beep,
+		      store_beep, BEEP_ENABLE, 47),
+};
+
 /*
  * Driver interface
  */
@@ -1859,6 +1963,8 @@
 		if (!(data->has_in & (1 << i)))
 			continue;
 		for (j = 0; j < ARRAY_SIZE(w83795_in[0]); j++) {
+			if (j == 4 && !data->enable_beep)
+				continue;
 			err = fn(dev, &w83795_in[i][j].dev_attr);
 			if (err)
 				return err;
@@ -1869,18 +1975,37 @@
 		if (!(data->has_fan & (1 << i)))
 			continue;
 		for (j = 0; j < ARRAY_SIZE(w83795_fan[0]); j++) {
+			if (j == 3 && !data->enable_beep)
+				continue;
 			err = fn(dev, &w83795_fan[i][j].dev_attr);
 			if (err)
 				return err;
 		}
 	}
 
+	for (i = 0; i < ARRAY_SIZE(w83795_tss); i++) {
+		j = w83795_tss_useful(data, i);
+		if (!j)
+			continue;
+		err = fn(dev, &w83795_tss[i].dev_attr);
+		if (err)
+			return err;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(sda_single_files); i++) {
 		err = fn(dev, &sda_single_files[i].dev_attr);
 		if (err)
 			return err;
 	}
 
+	if (data->enable_beep) {
+		for (i = 0; i < ARRAY_SIZE(sda_beep_files); i++) {
+			err = fn(dev, &sda_beep_files[i].dev_attr);
+			if (err)
+				return err;
+		}
+	}
+
 #ifdef CONFIG_SENSORS_W83795_FANCTRL
 	for (i = 0; i < data->has_pwm; i++) {
 		for (j = 0; j < ARRAY_SIZE(w83795_pwm[0]); j++) {
@@ -1899,6 +2024,8 @@
 #else
 		for (j = 0; j < 8; j++) {
 #endif
+			if (j == 7 && !data->enable_beep)
+				continue;
 			err = fn(dev, &w83795_temp[i][j].dev_attr);
 			if (err)
 				return err;
@@ -1910,6 +2037,8 @@
 			if (!(data->has_dts & (1 << i)))
 				continue;
 			for (j = 0; j < ARRAY_SIZE(w83795_dts[0]); j++) {
+				if (j == 7 && !data->enable_beep)
+					continue;
 				err = fn(dev, &w83795_dts[i][j].dev_attr);
 				if (err)
 					return err;
@@ -2049,6 +2178,18 @@
 	else
 		data->has_pwm = 2;
 
+	/* Check if BEEP pin is available */
+	if (data->chip_type == w83795g) {
+		/* The W83795G has a dedicated BEEP pin */
+		data->enable_beep = 1;
+	} else {
+		/* The W83795ADG has a shared pin for OVT# and BEEP, so you
+		 * can't have both */
+		tmp = w83795_read(client, W83795_REG_OVT_CFG);
+		if ((tmp & OVT_CFG_SEL) == 0)
+			data->enable_beep = 1;
+	}
+
 	err = w83795_handle_files(dev, device_create_file);
 	if (err)
 		goto exit_remove;
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index d231f68..6b4cc56 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -848,6 +848,18 @@
 		goto out_list;
 	}
 
+	/* Sanity checks */
+	if (unlikely(adap->name[0] == '\0')) {
+		pr_err("i2c-core: Attempt to register an adapter with "
+		       "no name!\n");
+		return -EINVAL;
+	}
+	if (unlikely(!adap->algo)) {
+		pr_err("i2c-core: Attempt to register adapter '%s' with "
+		       "no algo!\n", adap->name);
+		return -EINVAL;
+	}
+
 	rt_mutex_init(&adap->bus_lock);
 	mutex_init(&adap->userspace_clients_lock);
 	INIT_LIST_HEAD(&adap->userspace_clients);
diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index d32a484..d7a4833 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -120,7 +120,6 @@
 	snprintf(priv->adap.name, sizeof(priv->adap.name),
 		 "i2c-%d-mux (chan_id %d)", i2c_adapter_id(parent), chan_id);
 	priv->adap.owner = THIS_MODULE;
-	priv->adap.id = parent->id;
 	priv->adap.algo = &priv->algo;
 	priv->adap.algo_data = priv;
 	priv->adap.dev.parent = &parent->dev;
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 6078992..9292a15 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -40,7 +40,6 @@
 #include <linux/highmem.h>
 #include <linux/io.h>
 #include <linux/jiffies.h>
-#include <linux/smp_lock.h>
 #include <asm/pgtable.h>
 
 #include "ipath_kernel.h"
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index cfc1d65..1e1e347 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1123,7 +1123,7 @@
 	}
 }
 
-static int srp_queuecommand(struct scsi_cmnd *scmnd,
+static int srp_queuecommand_lck(struct scsi_cmnd *scmnd,
 			    void (*done)(struct scsi_cmnd *))
 {
 	struct srp_target_port *target = host_to_target(scmnd->device->host);
@@ -1196,6 +1196,8 @@
 	return SCSI_MLQUEUE_HOST_BUSY;
 }
 
+static DEF_SCSI_QCMD(srp_queuecommand)
+
 static int srp_alloc_iu_bufs(struct srp_target_port *target)
 {
 	int i;
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 7f26ca6..db409d6 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -24,7 +24,6 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/rcupdate.h>
-#include <linux/smp_lock.h>
 #include "input-compat.h"
 
 MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>");
@@ -753,7 +752,7 @@
 	if (index >= dev->keycodemax)
 		return -EINVAL;
 
-	if (dev->keycodesize < sizeof(dev->keycode) &&
+	if (dev->keycodesize < sizeof(ke->keycode) &&
 			(ke->keycode >> (dev->keycodesize * 8)))
 		return -EINVAL;
 
diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c
index cd82bb1..b7ba459 100644
--- a/drivers/input/serio/serio_raw.c
+++ b/drivers/input/serio/serio_raw.c
@@ -11,7 +11,6 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/poll.h>
 #include <linux/module.h>
 #include <linux/serio.h>
diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c
index 57b25b84..0a619c5 100644
--- a/drivers/input/tablet/aiptek.c
+++ b/drivers/input/tablet/aiptek.c
@@ -1097,7 +1097,7 @@
 }
 
 static DEVICE_ATTR(pointer_mode,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletPointerMode, store_tabletPointerMode);
 
 /***********************************************************************
@@ -1134,7 +1134,7 @@
 }
 
 static DEVICE_ATTR(coordinate_mode,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletCoordinateMode, store_tabletCoordinateMode);
 
 /***********************************************************************
@@ -1176,7 +1176,7 @@
 }
 
 static DEVICE_ATTR(tool_mode,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletToolMode, store_tabletToolMode);
 
 /***********************************************************************
@@ -1219,7 +1219,7 @@
 }
 
 static DEVICE_ATTR(xtilt,
-		   S_IRUGO | S_IWUGO, show_tabletXtilt, store_tabletXtilt);
+		   S_IRUGO | S_IWUSR, show_tabletXtilt, store_tabletXtilt);
 
 /***********************************************************************
  * support routines for the 'ytilt' file. Note that this file
@@ -1261,7 +1261,7 @@
 }
 
 static DEVICE_ATTR(ytilt,
-		   S_IRUGO | S_IWUGO, show_tabletYtilt, store_tabletYtilt);
+		   S_IRUGO | S_IWUSR, show_tabletYtilt, store_tabletYtilt);
 
 /***********************************************************************
  * support routines for the 'jitter' file. Note that this file
@@ -1288,7 +1288,7 @@
 }
 
 static DEVICE_ATTR(jitter,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletJitterDelay, store_tabletJitterDelay);
 
 /***********************************************************************
@@ -1317,7 +1317,7 @@
 }
 
 static DEVICE_ATTR(delay,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletProgrammableDelay, store_tabletProgrammableDelay);
 
 /***********************************************************************
@@ -1406,7 +1406,7 @@
 }
 
 static DEVICE_ATTR(stylus_upper,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletStylusUpper, store_tabletStylusUpper);
 
 /***********************************************************************
@@ -1437,7 +1437,7 @@
 }
 
 static DEVICE_ATTR(stylus_lower,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletStylusLower, store_tabletStylusLower);
 
 /***********************************************************************
@@ -1475,7 +1475,7 @@
 }
 
 static DEVICE_ATTR(mouse_left,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletMouseLeft, store_tabletMouseLeft);
 
 /***********************************************************************
@@ -1505,7 +1505,7 @@
 }
 
 static DEVICE_ATTR(mouse_middle,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletMouseMiddle, store_tabletMouseMiddle);
 
 /***********************************************************************
@@ -1535,7 +1535,7 @@
 }
 
 static DEVICE_ATTR(mouse_right,
-		   S_IRUGO | S_IWUGO,
+		   S_IRUGO | S_IWUSR,
 		   show_tabletMouseRight, store_tabletMouseRight);
 
 /***********************************************************************
@@ -1567,7 +1567,7 @@
 }
 
 static DEVICE_ATTR(wheel,
-		   S_IRUGO | S_IWUGO, show_tabletWheel, store_tabletWheel);
+		   S_IRUGO | S_IWUSR, show_tabletWheel, store_tabletWheel);
 
 /***********************************************************************
  * support routines for the 'execute' file. Note that this file
@@ -1600,7 +1600,7 @@
 }
 
 static DEVICE_ATTR(execute,
-		   S_IRUGO | S_IWUGO, show_tabletExecute, store_tabletExecute);
+		   S_IRUGO | S_IWUSR, show_tabletExecute, store_tabletExecute);
 
 /***********************************************************************
  * support routines for the 'odm_code' file. Note that this file
diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c
index 3782f31..33facd0 100644
--- a/drivers/leds/leds-lp5521.c
+++ b/drivers/leds/leds-lp5521.c
@@ -125,11 +125,22 @@
 	u8			num_leds;
 };
 
-#define cdev_to_led(c)		container_of(c, struct lp5521_led, cdev)
-#define engine_to_lp5521(eng)	container_of((eng), struct lp5521_chip, \
-						engines[(eng)->id - 1])
-#define led_to_lp5521(led)	container_of((led), struct lp5521_chip, \
-						leds[(led)->id])
+static inline struct lp5521_led *cdev_to_led(struct led_classdev *cdev)
+{
+	return container_of(cdev, struct lp5521_led, cdev);
+}
+
+static inline struct lp5521_chip *engine_to_lp5521(struct lp5521_engine *engine)
+{
+	return container_of(engine, struct lp5521_chip,
+			    engines[engine->id - 1]);
+}
+
+static inline struct lp5521_chip *led_to_lp5521(struct lp5521_led *led)
+{
+	return container_of(led, struct lp5521_chip,
+			    leds[led->id]);
+}
 
 static void lp5521_led_brightness_work(struct work_struct *work);
 
@@ -185,14 +196,17 @@
 
 	/* move current engine to direct mode and remember the state */
 	ret = lp5521_set_engine_mode(eng, LP5521_CMD_DIRECT);
-	usleep_range(1000, 10000);
+	/* Mode change requires min 500 us delay. 1 - 2 ms  with margin */
+	usleep_range(1000, 2000);
 	ret |= lp5521_read(client, LP5521_REG_OP_MODE, &mode);
 
 	/* For loading, all the engines to load mode */
 	lp5521_write(client, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT);
-	usleep_range(1000, 10000);
+	/* Mode change requires min 500 us delay. 1 - 2 ms  with margin */
+	usleep_range(1000, 2000);
 	lp5521_write(client, LP5521_REG_OP_MODE, LP5521_CMD_LOAD);
-	usleep_range(1000, 10000);
+	/* Mode change requires min 500 us delay. 1 - 2 ms  with margin */
+	usleep_range(1000, 2000);
 
 	addr = LP5521_PROG_MEM_BASE + eng->prog_page * LP5521_PROG_MEM_SIZE;
 	i2c_smbus_write_i2c_block_data(client,
@@ -231,10 +245,6 @@
 
 	lp5521_init_engine(chip, attr_group);
 
-	lp5521_write(client, LP5521_REG_RESET, 0xff);
-
-	usleep_range(10000, 20000);
-
 	/* Set all PWMs to direct control mode */
 	ret = lp5521_write(client, LP5521_REG_OP_MODE, 0x3F);
 
@@ -251,8 +261,8 @@
 	ret |= lp5521_write(client, LP5521_REG_ENABLE,
 			LP5521_MASTER_ENABLE | LP5521_LOGARITHMIC_PWM |
 			LP5521_EXEC_RUN);
-	/* enable takes 500us */
-	usleep_range(500, 20000);
+	/* enable takes 500us. 1 - 2 ms leaves some margin */
+	usleep_range(1000, 2000);
 
 	return ret;
 }
@@ -305,7 +315,8 @@
 			LP5521_MASTER_ENABLE | LP5521_LOGARITHMIC_PWM);
 	if (ret)
 		return ret;
-	usleep_range(1000, 10000);
+	/* enable takes 500us. 1 - 2 ms leaves some margin */
+	usleep_range(1000, 2000);
 	ret = lp5521_read(client, LP5521_REG_ENABLE, &buf);
 	if (ret)
 		return ret;
@@ -693,11 +704,16 @@
 
 	if (pdata->enable) {
 		pdata->enable(0);
-		usleep_range(1000, 10000);
+		usleep_range(1000, 2000); /* Keep enable down at least 1ms */
 		pdata->enable(1);
-		usleep_range(1000, 10000); /* Spec says min 500us */
+		usleep_range(1000, 2000); /* 500us abs min. */
 	}
 
+	lp5521_write(client, LP5521_REG_RESET, 0xff);
+	usleep_range(10000, 20000); /*
+				     * Exact value is not available. 10 - 20ms
+				     * appears to be enough for reset.
+				     */
 	ret = lp5521_detect(client);
 
 	if (ret) {
diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index 1e11fcc..0cc4ead 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c
@@ -134,15 +134,18 @@
 	u8			num_leds;
 };
 
-#define cdev_to_led(c)          container_of(c, struct lp5523_led, cdev)
+static inline struct lp5523_led *cdev_to_led(struct led_classdev *cdev)
+{
+	return container_of(cdev, struct lp5523_led, cdev);
+}
 
-static struct lp5523_chip *engine_to_lp5523(struct lp5523_engine *engine)
+static inline struct lp5523_chip *engine_to_lp5523(struct lp5523_engine *engine)
 {
 	return container_of(engine, struct lp5523_chip,
 			    engines[engine->id - 1]);
 }
 
-static struct lp5523_chip *led_to_lp5523(struct lp5523_led *led)
+static inline struct lp5523_chip *led_to_lp5523(struct lp5523_led *led)
 {
 	return container_of(led, struct lp5523_chip,
 			    leds[led->id]);
@@ -200,13 +203,9 @@
 		{ 0x9c, 0x50, 0x9c, 0xd0, 0x9d, 0x80, 0xd8, 0x00, 0},
 	};
 
-	lp5523_write(client, LP5523_REG_RESET, 0xff);
-
-	usleep_range(10000, 100000);
-
 	ret |= lp5523_write(client, LP5523_REG_ENABLE, LP5523_ENABLE);
-	/* Chip startup time after reset is 500 us */
-	usleep_range(1000, 10000);
+	/* Chip startup time is 500 us, 1 - 2 ms gives some margin */
+	usleep_range(1000, 2000);
 
 	ret |= lp5523_write(client, LP5523_REG_CONFIG,
 			    LP5523_AUTO_INC | LP5523_PWR_SAVE |
@@ -243,8 +242,8 @@
 		return -1;
 	}
 
-	/* Wait 3ms and check the engine status */
-	usleep_range(3000, 20000);
+	/* Let the programs run for couple of ms and check the engine status */
+	usleep_range(3000, 6000);
 	lp5523_read(client, LP5523_REG_STATUS, &status);
 	status &= LP5523_ENG_STATUS_MASK;
 
@@ -449,10 +448,10 @@
 	/* Measure VDD (i.e. VBAT) first (channel 16 corresponds to VDD) */
 	lp5523_write(chip->client, LP5523_REG_LED_TEST_CTRL,
 				    LP5523_EN_LEDTEST | 16);
-	usleep_range(3000, 10000);
+	usleep_range(3000, 6000); /* ADC conversion time is typically 2.7 ms */
 	ret = lp5523_read(chip->client, LP5523_REG_STATUS, &status);
 	if (!(status & LP5523_LEDTEST_DONE))
-		usleep_range(3000, 10000);
+		usleep_range(3000, 6000); /* Was not ready. Wait little bit */
 
 	ret |= lp5523_read(chip->client, LP5523_REG_LED_TEST_ADC, &vdd);
 	vdd--;	/* There may be some fluctuation in measurement */
@@ -468,16 +467,16 @@
 			chip->pdata->led_config[i].led_current);
 
 		lp5523_write(chip->client, LP5523_REG_LED_PWM_BASE + i, 0xff);
-		/* let current stabilize 2ms before measurements start */
-		usleep_range(2000, 10000);
+		/* let current stabilize 2 - 4ms before measurements start */
+		usleep_range(2000, 4000);
 		lp5523_write(chip->client,
 			     LP5523_REG_LED_TEST_CTRL,
 			     LP5523_EN_LEDTEST | i);
-		/* ledtest takes 2.7ms */
-		usleep_range(3000, 10000);
+		/* ADC conversion time is 2.7 ms typically */
+		usleep_range(3000, 6000);
 		ret = lp5523_read(chip->client, LP5523_REG_STATUS, &status);
 		if (!(status & LP5523_LEDTEST_DONE))
-			usleep_range(3000, 10000);
+			usleep_range(3000, 6000);/* Was not ready. Wait. */
 		ret |= lp5523_read(chip->client, LP5523_REG_LED_TEST_ADC, &adc);
 
 		if (adc >= vdd || adc < LP5523_ADC_SHORTCIRC_LIM)
@@ -930,11 +929,16 @@
 
 	if (pdata->enable) {
 		pdata->enable(0);
-		usleep_range(1000, 10000);
+		usleep_range(1000, 2000); /* Keep enable down at least 1ms */
 		pdata->enable(1);
-		usleep_range(1000, 10000); /* Spec says min 500us */
+		usleep_range(1000, 2000); /* 500us abs min. */
 	}
 
+	lp5523_write(client, LP5523_REG_RESET, 0xff);
+	usleep_range(10000, 20000); /*
+				     * Exact value is not available. 10 - 20ms
+				     * appears to be enough for reset.
+				     */
 	ret = lp5523_detect(client);
 	if (ret)
 		goto fail2;
diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c
index a688293..614ebeb 100644
--- a/drivers/leds/leds-ss4200.c
+++ b/drivers/leds/leds-ss4200.c
@@ -102,6 +102,7 @@
 			DMI_MATCH(DMI_PRODUCT_VERSION, "1.00.00")
 		}
 	},
+	{}
 };
 
 /*
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 324a366..84c46a1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1337,7 +1337,7 @@
 	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
 		       rdev->sb_page);
 	md_super_wait(rdev->mddev);
-	return num_sectors / 2; /* kB for sysfs */
+	return num_sectors;
 }
 
 
@@ -1704,7 +1704,7 @@
 	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
 		       rdev->sb_page);
 	md_super_wait(rdev->mddev);
-	return num_sectors / 2; /* kB for sysfs */
+	return num_sectors;
 }
 
 static struct super_type super_types[] = {
@@ -4338,6 +4338,8 @@
 	if (mddev->kobj.sd &&
 	    sysfs_create_group(&mddev->kobj, &md_bitmap_group))
 		printk(KERN_DEBUG "pointless warning\n");
+
+	blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
  abort:
 	mutex_unlock(&disks_mutex);
 	if (!error && mddev->kobj.sd) {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 45f8324..845cf95 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1161,6 +1161,7 @@
 		 * is not possible.
 		 */
 		if (!test_bit(Faulty, &rdev->flags) &&
+		    !mddev->recovery_disabled &&
 		    mddev->degraded < conf->raid_disks) {
 			err = -EBUSY;
 			goto abort;
diff --git a/drivers/media/common/saa7146_i2c.c b/drivers/media/common/saa7146_i2c.c
index 3d88542..74ee172 100644
--- a/drivers/media/common/saa7146_i2c.c
+++ b/drivers/media/common/saa7146_i2c.c
@@ -391,7 +391,6 @@
 
 /*****************************************************************************/
 /* i2c-adapter helper functions                                              */
-#include <linux/i2c-id.h>
 
 /* exported algorithm data */
 static struct i2c_algorithm saa7146_algo = {
diff --git a/drivers/media/dvb/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
index 4d0646d..7ea517b 100644
--- a/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
@@ -36,7 +36,6 @@
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/kthread.h>
 
 #include "dvb_ca_en50221.h"
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 1589d5a..cad6634 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -36,7 +36,6 @@
 #include <linux/list.h>
 #include <linux/freezer.h>
 #include <linux/jiffies.h>
-#include <linux/smp_lock.h>
 #include <linux/kthread.h>
 #include <asm/processor.h>
 
diff --git a/drivers/media/dvb/ngene/ngene-core.c b/drivers/media/dvb/ngene/ngene-core.c
index 4caeb16..3a7ef71 100644
--- a/drivers/media/dvb/ngene/ngene-core.c
+++ b/drivers/media/dvb/ngene/ngene-core.c
@@ -34,7 +34,6 @@
 #include <linux/io.h>
 #include <asm/div64.h>
 #include <linux/pci.h>
-#include <linux/smp_lock.h>
 #include <linux/timer.h>
 #include <linux/byteorder/generic.h>
 #include <linux/firmware.h>
diff --git a/drivers/media/dvb/ngene/ngene-dvb.c b/drivers/media/dvb/ngene/ngene-dvb.c
index 48f980b..3832e59 100644
--- a/drivers/media/dvb/ngene/ngene-dvb.c
+++ b/drivers/media/dvb/ngene/ngene-dvb.c
@@ -35,7 +35,6 @@
 #include <linux/io.h>
 #include <asm/div64.h>
 #include <linux/pci.h>
-#include <linux/smp_lock.h>
 #include <linux/timer.h>
 #include <linux/byteorder/generic.h>
 #include <linux/firmware.h>
diff --git a/drivers/media/dvb/ngene/ngene-i2c.c b/drivers/media/dvb/ngene/ngene-i2c.c
index c3ae956..d28554f 100644
--- a/drivers/media/dvb/ngene/ngene-i2c.c
+++ b/drivers/media/dvb/ngene/ngene-i2c.c
@@ -37,7 +37,6 @@
 #include <asm/div64.h>
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
-#include <linux/smp_lock.h>
 #include <linux/timer.h>
 #include <linux/byteorder/generic.h>
 #include <linux/firmware.h>
diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
index b540e80..e6b2d08 100644
--- a/drivers/media/radio/radio-mr800.c
+++ b/drivers/media/radio/radio-mr800.c
@@ -58,7 +58,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/input.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/radio/si470x/radio-si470x.h b/drivers/media/radio/si470x/radio-si470x.h
index ea12782..b9914d7 100644
--- a/drivers/media/radio/si470x/radio-si470x.h
+++ b/drivers/media/radio/si470x/radio-si470x.h
@@ -31,7 +31,6 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/input.h>
 #include <linux/version.h>
 #include <linux/videodev2.h>
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 3da6e80..a529619 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -42,7 +42,6 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/kdev_t.h>
 #include "bttvp.h"
diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index 417d1d5..d7c9484 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -33,7 +33,6 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/firmware.h>
-#include <linux/smp_lock.h>
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
 #include <media/cx2341x.h>
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index d2f159d..88b5119 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -31,7 +31,6 @@
 #include <linux/kmod.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
diff --git a/drivers/media/video/imx074.c b/drivers/media/video/imx074.c
index 380e459..27b5dfd 100644
--- a/drivers/media/video/imx074.c
+++ b/drivers/media/video/imx074.c
@@ -451,7 +451,6 @@
 	ret = imx074_video_probe(icd, client);
 	if (ret < 0) {
 		icd->ops = NULL;
-		i2c_set_clientdata(client, NULL);
 		kfree(priv);
 		return ret;
 	}
@@ -468,7 +467,6 @@
 	icd->ops = NULL;
 	if (icl->free_bus)
 		icl->free_bus(icl);
-	i2c_set_clientdata(client, NULL);
 	client->driver = NULL;
 	kfree(priv);
 
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 5a000c6..ce4a753 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -44,7 +44,6 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
-#include <linux/i2c-id.h>
 #include <linux/workqueue.h>
 
 #include <media/ir-core.h>
diff --git a/drivers/media/video/ov6650.c b/drivers/media/video/ov6650.c
index 31f1937..cf93de9 100644
--- a/drivers/media/video/ov6650.c
+++ b/drivers/media/video/ov6650.c
@@ -1174,7 +1174,6 @@
 
 	if (ret) {
 		icd->ops = NULL;
-		i2c_set_clientdata(client, NULL);
 		kfree(priv);
 	}
 
@@ -1185,7 +1184,6 @@
 {
 	struct ov6650 *priv = to_ov6650(client);
 
-	i2c_set_clientdata(client, NULL);
 	kfree(priv);
 	return 0;
 }
diff --git a/drivers/media/video/pwc/pwc-if.c b/drivers/media/video/pwc/pwc-if.c
index e62beb4..f3dc89d 100644
--- a/drivers/media/video/pwc/pwc-if.c
+++ b/drivers/media/video/pwc/pwc-if.c
@@ -62,7 +62,6 @@
 #include <linux/module.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #ifdef CONFIG_USB_PWC_INPUT_EVDEV
 #include <linux/usb/input.h>
 #endif
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index f5a46c4..a845753 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -49,7 +49,6 @@
 #include <linux/videodev2.h>
 #include <linux/version.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <media/videobuf-vmalloc.h>
 #include <media/v4l2-common.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index 1467a30..b890aaf 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -21,7 +21,6 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/smp_lock.h>
 #include <linux/delay.h>
 
 #include "saa7134-reg.h"
diff --git a/drivers/media/video/saa7164/saa7164.h b/drivers/media/video/saa7164/saa7164.h
index 1d9c5cb..041ae8e 100644
--- a/drivers/media/video/saa7164/saa7164.h
+++ b/drivers/media/video/saa7164/saa7164.h
@@ -58,7 +58,6 @@
 #include <media/tveeprom.h>
 #include <media/videobuf-dma-sg.h>
 #include <media/videobuf-dvb.h>
-#include <linux/smp_lock.h>
 #include <dvb_demux.h>
 #include <dvb_frontend.h>
 #include <dvb_net.h>
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index db6b828..011c0c3 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -50,7 +50,6 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 86294ed3..e30e8df 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -18,7 +18,6 @@
 #include <linux/videodev.h>
 #include <linux/videodev2.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <media/v4l2-ioctl.h>
 
 #ifdef CONFIG_COMPAT
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index e15220f..d784c36 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -97,8 +97,7 @@
 
 static int mptfc_target_alloc(struct scsi_target *starget);
 static int mptfc_slave_alloc(struct scsi_device *sdev);
-static int mptfc_qcmd(struct scsi_cmnd *SCpnt,
-		      void (*done)(struct scsi_cmnd *));
+static int mptfc_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *SCpnt);
 static void mptfc_target_destroy(struct scsi_target *starget);
 static void mptfc_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout);
 static void __devexit mptfc_remove(struct pci_dev *pdev);
@@ -650,7 +649,7 @@
 }
 
 static int
-mptfc_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+mptfc_qcmd_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
 	struct mptfc_rport_info	*ri;
 	struct fc_rport	*rport = starget_to_rport(scsi_target(SCpnt->device));
@@ -681,6 +680,8 @@
 	return mptscsih_qcmd(SCpnt,done);
 }
 
+static DEF_SCSI_QCMD(mptfc_qcmd)
+
 /*
  *	mptfc_display_port_link_speed - displaying link speed
  *	@ioc: Pointer to MPT_ADAPTER structure
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 83a5115..d48c2c6 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1889,7 +1889,7 @@
 }
 
 static int
-mptsas_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+mptsas_qcmd_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
 	MPT_SCSI_HOST	*hd;
 	MPT_ADAPTER	*ioc;
@@ -1913,6 +1913,8 @@
 	return mptscsih_qcmd(SCpnt,done);
 }
 
+static DEF_SCSI_QCMD(mptsas_qcmd)
+
 /**
  *	mptsas_mptsas_eh_timed_out - resets the scsi_cmnd timeout
  *		if the device under question is currently in the
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 0e28031..6d9568d 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -780,7 +780,7 @@
 }
 
 static int
-mptspi_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+mptspi_qcmd_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
 	struct _MPT_SCSI_HOST *hd = shost_priv(SCpnt->device->host);
 	VirtDevice	*vdevice = SCpnt->device->hostdata;
@@ -805,6 +805,8 @@
 	return mptscsih_qcmd(SCpnt,done);
 }
 
+static DEF_SCSI_QCMD(mptspi_qcmd)
+
 static void mptspi_slave_destroy(struct scsi_device *sdev)
 {
 	struct scsi_target *starget = scsi_target(sdev);
diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c
index ea6b219..97bdf82 100644
--- a/drivers/message/i2o/i2o_scsi.c
+++ b/drivers/message/i2o/i2o_scsi.c
@@ -506,7 +506,7 @@
  *	Locks: takes the controller lock on error path only
  */
 
-static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
+static int i2o_scsi_queuecommand_lck(struct scsi_cmnd *SCpnt,
 				 void (*done) (struct scsi_cmnd *))
 {
 	struct i2o_controller *c;
@@ -688,7 +688,9 @@
 
       exit:
 	return rc;
-};
+}
+
+static DEF_SCSI_QCMD(i2o_scsi_queuecommand)
 
 /**
  *	i2o_scsi_abort - abort a running command
diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c
index 0ed0935..644d4cd 100644
--- a/drivers/misc/apds9802als.c
+++ b/drivers/misc/apds9802als.c
@@ -251,7 +251,6 @@
 
 	return res;
 als_error1:
-	i2c_set_clientdata(client, NULL);
 	kfree(data);
 	return res;
 }
diff --git a/drivers/misc/isl29020.c b/drivers/misc/isl29020.c
index ca47e62..307aada 100644
--- a/drivers/misc/isl29020.c
+++ b/drivers/misc/isl29020.c
@@ -183,9 +183,7 @@
 
 static int isl29020_remove(struct i2c_client *client)
 {
-	struct als_data *data = i2c_get_clientdata(client);
 	sysfs_remove_group(&client->dev.kobj, &m_als_gr);
-	kfree(data);
 	return 0;
 }
 
@@ -245,6 +243,6 @@
 module_init(sensor_isl29020_init);
 module_exit(sensor_isl29020_exit);
 
-MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com");
+MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com>");
 MODULE_DESCRIPTION("Intersil isl29020 ALS Driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index d551f09..6956f7e 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -439,18 +439,23 @@
 	 * nodes that can comprise an access protection grouping. The access
 	 * protection is in regards to memory, IOI and IPI.
 	 */
-	max_regions = 64;
 	region_size = xp_region_size;
 
-	switch (region_size) {
-	case 128:
-		max_regions *= 2;
-	case 64:
-		max_regions *= 2;
-	case 32:
-		max_regions *= 2;
-		region_size = 16;
-		DBUG_ON(!is_shub2());
+	if (is_uv())
+		max_regions = 256;
+	else {
+		max_regions = 64;
+
+		switch (region_size) {
+		case 128:
+			max_regions *= 2;
+		case 64:
+			max_regions *= 2;
+		case 32:
+			max_regions *= 2;
+			region_size = 16;
+			DBUG_ON(!is_shub2());
+		}
 	}
 
 	for (region = 0; region < max_regions; region++) {
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index e1da258..0a92436f 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -699,7 +699,8 @@
 #define DEVICE_PCI(dev) NULL
 #endif
 
-#define VORTEX_PCI(vp) (((vp)->gendev) ? DEVICE_PCI((vp)->gendev) : NULL)
+#define VORTEX_PCI(vp)							\
+	((struct pci_dev *) (((vp)->gendev) ? DEVICE_PCI((vp)->gendev) : NULL))
 
 #ifdef CONFIG_EISA
 #define DEVICE_EISA(dev) (((dev)->bus == &eisa_bus_type) ? to_eisa_device((dev)) : NULL)
@@ -707,7 +708,8 @@
 #define DEVICE_EISA(dev) NULL
 #endif
 
-#define VORTEX_EISA(vp) (((vp)->gendev) ? DEVICE_EISA((vp)->gendev) : NULL)
+#define VORTEX_EISA(vp)							\
+	((struct eisa_device *) (((vp)->gendev) ? DEVICE_EISA((vp)->gendev) : NULL))
 
 /* The action to take with a media selection timer tick.
    Note that we deviate from the 3Com order by checking 10base2 before AUI.
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index ac422cd..dd16e83 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -490,13 +490,11 @@
 {
 	unsigned int protocol = (status >> 16) & 0x3;
 
-	if (likely((protocol == RxProtoTCP) && (!(status & TCPFail))))
+	if (((protocol == RxProtoTCP) && !(status & TCPFail)) ||
+	    ((protocol == RxProtoUDP) && !(status & UDPFail)))
 		return 1;
-	else if ((protocol == RxProtoUDP) && (!(status & UDPFail)))
-		return 1;
-	else if ((protocol == RxProtoIP) && (!(status & IPFail)))
-		return 1;
-	return 0;
+	else
+		return 0;
 }
 
 static int cp_rx_poll(struct napi_struct *napi, int budget)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f6668cd..43db398 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2945,6 +2945,18 @@
 
 source "drivers/net/caif/Kconfig"
 
+config TILE_NET
+	tristate "Tilera GBE/XGBE network driver support"
+	depends on TILE
+	default y
+	select CRC32
+	help
+	  This is a standard Linux network device driver for the
+	  on-chip Tilera Gigabit Ethernet and XAUI interfaces.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called tile_net.
+
 config XEN_NETDEV_FRONTEND
 	tristate "Xen network device frontend driver"
 	depends on XEN
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 652fc6b..b90738d 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -301,3 +301,4 @@
 
 obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/
 obj-$(CONFIG_PCH_GBE) += pch_gbe/
+obj-$(CONFIG_TILE_NET) += tile/
diff --git a/drivers/net/atl1c/atl1c_hw.c b/drivers/net/atl1c/atl1c_hw.c
index 919080b..1bf6720 100644
--- a/drivers/net/atl1c/atl1c_hw.c
+++ b/drivers/net/atl1c/atl1c_hw.c
@@ -82,7 +82,7 @@
 	addr[0] = addr[1] = 0;
 	AT_READ_REG(hw, REG_OTP_CTRL, &otp_ctrl_data);
 	if (atl1c_check_eeprom_exist(hw)) {
-		if (hw->nic_type == athr_l1c || hw->nic_type == athr_l2c_b) {
+		if (hw->nic_type == athr_l1c || hw->nic_type == athr_l2c) {
 			/* Enable OTP CLK */
 			if (!(otp_ctrl_data & OTP_CTRL_CLK_EN)) {
 				otp_ctrl_data |= OTP_CTRL_CLK_EN;
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index c36cd2f..93354ee 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -2458,6 +2458,12 @@
 	int status, i = 0, num_imgs = 0;
 	const u8 *p;
 
+	if (!netif_running(adapter->netdev)) {
+		dev_err(&adapter->pdev->dev,
+			"Firmware load not allowed (interface is down)\n");
+		return -EPERM;
+	}
+
 	strcpy(fw_file, func);
 
 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index e9ad16f..9709b85 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -9064,7 +9064,7 @@
 	default:
 		pr_err("Unknown board_type (%ld), aborting\n",
 			   ent->driver_data);
-		return ENODEV;
+		return -ENODEV;
 	}
 
 	cid_count += CNIC_CONTEXT_USE;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index bdb68a6..71a1697 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -878,8 +878,10 @@
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
+		read_lock(&in_dev->mc_list_lock);
 		for (im = in_dev->mc_list; im; im = im->next)
 			ip_mc_rejoin_group(im);
+		read_unlock(&in_dev->mc_list_lock);
 	}
 
 	rcu_read_unlock();
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index 8b4cea5..20da199 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -635,8 +635,8 @@
 
 	ndev = alloc_netdev(sizeof(struct cfspi),
 			"cfspi%d", cfspi_setup);
-	if (!dev)
-		return -ENODEV;
+	if (!ndev)
+		return -ENOMEM;
 
 	cfspi = netdev_priv(ndev);
 	netif_stop_queue(ndev);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 4686c39..4d62f7b 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -31,7 +31,7 @@
 
 char e1000_driver_name[] = "e1000";
 static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
-#define DRV_VERSION "7.3.21-k6-NAPI"
+#define DRV_VERSION "7.3.21-k8-NAPI"
 const char e1000_driver_version[] = DRV_VERSION;
 static const char e1000_copyright[] = "Copyright (c) 1999-2006 Intel Corporation.";
 
@@ -485,9 +485,6 @@
 	struct net_device *netdev = adapter->netdev;
 	u32 rctl, tctl;
 
-	/* signal that we're down so the interrupt handler does not
-	 * reschedule our watchdog timer */
-	set_bit(__E1000_DOWN, &adapter->flags);
 
 	/* disable receives in the hardware */
 	rctl = er32(RCTL);
@@ -508,6 +505,13 @@
 
 	e1000_irq_disable(adapter);
 
+	/*
+	 * Setting DOWN must be after irq_disable to prevent
+	 * a screaming interrupt.  Setting DOWN also prevents
+	 * timers and tasks from rescheduling.
+	 */
+	set_bit(__E1000_DOWN, &adapter->flags);
+
 	del_timer_sync(&adapter->tx_fifo_stall_timer);
 	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 49e4ce1..d1bec62 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -577,11 +577,10 @@
 			irq_of_parse_and_map(np, 1);
 		priv->gfargrp[priv->num_grps].interruptError =
 			irq_of_parse_and_map(np,2);
-		if (priv->gfargrp[priv->num_grps].interruptTransmit < 0 ||
-			priv->gfargrp[priv->num_grps].interruptReceive < 0 ||
-			priv->gfargrp[priv->num_grps].interruptError < 0) {
+		if (priv->gfargrp[priv->num_grps].interruptTransmit == NO_IRQ ||
+		    priv->gfargrp[priv->num_grps].interruptReceive  == NO_IRQ ||
+		    priv->gfargrp[priv->num_grps].interruptError    == NO_IRQ)
 			return -EINVAL;
-		}
 	}
 
 	priv->gfargrp[priv->num_grps].grp_id = priv->num_grps;
diff --git a/drivers/net/ipg.c b/drivers/net/ipg.c
index dc01980..aa93655 100644
--- a/drivers/net/ipg.c
+++ b/drivers/net/ipg.c
@@ -88,16 +88,14 @@
 	"IC PLUS IP1000 1000/100/10 based NIC",
 	"Sundance Technology ST2021 based NIC",
 	"Tamarack Microelectronics TC9020/9021 based NIC",
-	"Tamarack Microelectronics TC9020/9021 based NIC",
 	"D-Link NIC IP1000A"
 };
 
 static DEFINE_PCI_DEVICE_TABLE(ipg_pci_tbl) = {
 	{ PCI_VDEVICE(SUNDANCE,	0x1023), 0 },
 	{ PCI_VDEVICE(SUNDANCE,	0x2021), 1 },
-	{ PCI_VDEVICE(SUNDANCE,	0x1021), 2 },
-	{ PCI_VDEVICE(DLINK,	0x9021), 3 },
-	{ PCI_VDEVICE(DLINK,	0x4020), 4 },
+	{ PCI_VDEVICE(DLINK,	0x9021), 2 },
+	{ PCI_VDEVICE(DLINK,	0x4020), 3 },
 	{ 0, }
 };
 
diff --git a/drivers/net/irda/sh_sir.c b/drivers/net/irda/sh_sir.c
index 00b38bc..52a7c86 100644
--- a/drivers/net/irda/sh_sir.c
+++ b/drivers/net/irda/sh_sir.c
@@ -258,7 +258,7 @@
 
 	/* Baud Rate Error Correction x 10000 */
 	u32 rate_err_array[] = {
-		0000, 0625, 1250, 1875,
+		   0,  625, 1250, 1875,
 		2500, 3125, 3750, 4375,
 		5000, 5625, 6250, 6875,
 		7500, 8125, 8750, 9375,
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index f0bd1a1..e8b9c53 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -30,11 +30,14 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 #include <linux/marvell_phy.h>
+#include <linux/of.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 
+#define MII_MARVELL_PHY_PAGE		22
+
 #define MII_M1011_IEVENT		0x13
 #define MII_M1011_IEVENT_CLEAR		0x0000
 
@@ -80,7 +83,6 @@
 #define MII_88E1121_PHY_LED_CTRL	16
 #define MII_88E1121_PHY_LED_PAGE	3
 #define MII_88E1121_PHY_LED_DEF		0x0030
-#define MII_88E1121_PHY_PAGE		22
 
 #define MII_M1011_PHY_STATUS		0x11
 #define MII_M1011_PHY_STATUS_1000	0x8000
@@ -186,13 +188,94 @@
 	return 0;
 }
 
+#ifdef CONFIG_OF_MDIO
+/*
+ * Set and/or override some configuration registers based on the
+ * marvell,reg-init property stored in the of_node for the phydev.
+ *
+ * marvell,reg-init = <reg-page reg mask value>,...;
+ *
+ * There may be one or more sets of <reg-page reg mask value>:
+ *
+ * reg-page: which register bank to use.
+ * reg: the register.
+ * mask: if non-zero, ANDed with existing register value.
+ * value: ORed with the masked value and written to the regiser.
+ *
+ */
+static int marvell_of_reg_init(struct phy_device *phydev)
+{
+	const __be32 *paddr;
+	int len, i, saved_page, current_page, page_changed, ret;
+
+	if (!phydev->dev.of_node)
+		return 0;
+
+	paddr = of_get_property(phydev->dev.of_node, "marvell,reg-init", &len);
+	if (!paddr || len < (4 * sizeof(*paddr)))
+		return 0;
+
+	saved_page = phy_read(phydev, MII_MARVELL_PHY_PAGE);
+	if (saved_page < 0)
+		return saved_page;
+	page_changed = 0;
+	current_page = saved_page;
+
+	ret = 0;
+	len /= sizeof(*paddr);
+	for (i = 0; i < len - 3; i += 4) {
+		u16 reg_page = be32_to_cpup(paddr + i);
+		u16 reg = be32_to_cpup(paddr + i + 1);
+		u16 mask = be32_to_cpup(paddr + i + 2);
+		u16 val_bits = be32_to_cpup(paddr + i + 3);
+		int val;
+
+		if (reg_page != current_page) {
+			current_page = reg_page;
+			page_changed = 1;
+			ret = phy_write(phydev, MII_MARVELL_PHY_PAGE, reg_page);
+			if (ret < 0)
+				goto err;
+		}
+
+		val = 0;
+		if (mask) {
+			val = phy_read(phydev, reg);
+			if (val < 0) {
+				ret = val;
+				goto err;
+			}
+			val &= mask;
+		}
+		val |= val_bits;
+
+		ret = phy_write(phydev, reg, val);
+		if (ret < 0)
+			goto err;
+
+	}
+err:
+	if (page_changed) {
+		i = phy_write(phydev, MII_MARVELL_PHY_PAGE, saved_page);
+		if (ret == 0)
+			ret = i;
+	}
+	return ret;
+}
+#else
+static int marvell_of_reg_init(struct phy_device *phydev)
+{
+	return 0;
+}
+#endif /* CONFIG_OF_MDIO */
+
 static int m88e1121_config_aneg(struct phy_device *phydev)
 {
 	int err, oldpage, mscr;
 
-	oldpage = phy_read(phydev, MII_88E1121_PHY_PAGE);
+	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
 
-	err = phy_write(phydev, MII_88E1121_PHY_PAGE,
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE,
 			MII_88E1121_PHY_MSCR_PAGE);
 	if (err < 0)
 		return err;
@@ -218,7 +301,7 @@
 			return err;
 	}
 
-	phy_write(phydev, MII_88E1121_PHY_PAGE, oldpage);
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
 
 	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
 	if (err < 0)
@@ -229,11 +312,11 @@
 	if (err < 0)
 		return err;
 
-	oldpage = phy_read(phydev, MII_88E1121_PHY_PAGE);
+	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
 
-	phy_write(phydev, MII_88E1121_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
 	phy_write(phydev, MII_88E1121_PHY_LED_CTRL, MII_88E1121_PHY_LED_DEF);
-	phy_write(phydev, MII_88E1121_PHY_PAGE, oldpage);
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
 
 	err = genphy_config_aneg(phydev);
 
@@ -244,9 +327,9 @@
 {
 	int err, oldpage, mscr;
 
-	oldpage = phy_read(phydev, MII_88E1121_PHY_PAGE);
+	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
 
-	err = phy_write(phydev, MII_88E1121_PHY_PAGE,
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE,
 			MII_88E1121_PHY_MSCR_PAGE);
 	if (err < 0)
 		return err;
@@ -258,7 +341,7 @@
 	if (err < 0)
 		return err;
 
-	err = phy_write(phydev, MII_88E1121_PHY_PAGE, oldpage);
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
 	if (err < 0)
 		return err;
 
@@ -368,6 +451,9 @@
 			return err;
 	}
 
+	err = marvell_of_reg_init(phydev);
+	if (err < 0)
+		return err;
 
 	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
 	if (err < 0)
@@ -398,7 +484,7 @@
 	int err;
 
 	/* Change address */
-	err = phy_write(phydev, 0x16, 0x0002);
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 0x0002);
 	if (err < 0)
 		return err;
 
@@ -408,7 +494,7 @@
 		return err;
 
 	/* Change address */
-	err = phy_write(phydev, 0x16, 0x0003);
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 0x0003);
 	if (err < 0)
 		return err;
 
@@ -420,8 +506,42 @@
 	if (err < 0)
 		return err;
 
+	err = marvell_of_reg_init(phydev);
+	if (err < 0)
+		return err;
+
 	/* Reset address */
-	err = phy_write(phydev, 0x16, 0x0);
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 0x0);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int m88e1149_config_init(struct phy_device *phydev)
+{
+	int err;
+
+	/* Change address */
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 0x0002);
+	if (err < 0)
+		return err;
+
+	/* Enable 1000 Mbit */
+	err = phy_write(phydev, 0x15, 0x1048);
+	if (err < 0)
+		return err;
+
+	err = marvell_of_reg_init(phydev);
+	if (err < 0)
+		return err;
+
+	/* Reset address */
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 0x0);
 	if (err < 0)
 		return err;
 
@@ -491,6 +611,10 @@
 		}
 	}
 
+	err = marvell_of_reg_init(phydev);
+	if (err < 0)
+		return err;
+
 	return 0;
 }
 
@@ -685,6 +809,19 @@
 		.driver = { .owner = THIS_MODULE },
 	},
 	{
+		.phy_id = MARVELL_PHY_ID_88E1149R,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88E1149R",
+		.features = PHY_GBIT_FEATURES,
+		.flags = PHY_HAS_INTERRUPT,
+		.config_init = &m88e1149_config_init,
+		.config_aneg = &m88e1118_config_aneg,
+		.read_status = &genphy_read_status,
+		.ack_interrupt = &marvell_ack_interrupt,
+		.config_intr = &marvell_config_intr,
+		.driver = { .owner = THIS_MODULE },
+	},
+	{
 		.phy_id = MARVELL_PHY_ID_88E1240,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1240",
@@ -735,6 +872,7 @@
 	{ 0x01410e10, 0xfffffff0 },
 	{ 0x01410cb0, 0xfffffff0 },
 	{ 0x01410cd0, 0xfffffff0 },
+	{ 0x01410e50, 0xfffffff0 },
 	{ 0x01410e30, 0xfffffff0 },
 	{ 0x01410e90, 0xfffffff0 },
 	{ }
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index c30e0fe..528eaef 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -62,15 +62,15 @@
 /* NETIF_MSG_PKTDATA | */
     NETIF_MSG_HW | NETIF_MSG_WOL | 0;
 
-static int debug = 0x00007fff;	/* defaults above */
-module_param(debug, int, 0);
+static int debug = -1;	/* defaults above */
+module_param(debug, int, 0664);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
 #define MSIX_IRQ 0
 #define MSI_IRQ 1
 #define LEG_IRQ 2
 static int qlge_irq_type = MSIX_IRQ;
-module_param(qlge_irq_type, int, MSIX_IRQ);
+module_param(qlge_irq_type, int, 0664);
 MODULE_PARM_DESC(qlge_irq_type, "0 = MSI-X, 1 = MSI, 2 = Legacy.");
 
 static int qlge_mpi_coredump;
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 4c4d169..7d33ef4 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -4440,8 +4440,7 @@
 	u32 status = opts1 & RxProtoMask;
 
 	if (((status == RxProtoTCP) && !(opts1 & TCPFail)) ||
-	    ((status == RxProtoUDP) && !(opts1 & UDPFail)) ||
-	    ((status == RxProtoIP) && !(opts1 & IPFail)))
+	    ((status == RxProtoUDP) && !(opts1 & UDPFail)))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	else
 		skb_checksum_none_assert(skb);
diff --git a/drivers/net/tile/Makefile b/drivers/net/tile/Makefile
new file mode 100644
index 0000000..f634f14
--- /dev/null
+++ b/drivers/net/tile/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the TILE on-chip networking support.
+#
+
+obj-$(CONFIG_TILE_NET) += tile_net.o
+ifdef CONFIG_TILEGX
+tile_net-objs := tilegx.o mpipe.o iorpc_mpipe.o dma_queue.o
+else
+tile_net-objs := tilepro.o
+endif
diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c
new file mode 100644
index 0000000..0e6bac5
--- /dev/null
+++ b/drivers/net/tile/tilepro.c
@@ -0,0 +1,2406 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>      /* printk() */
+#include <linux/slab.h>        /* kmalloc() */
+#include <linux/errno.h>       /* error codes */
+#include <linux/types.h>       /* size_t */
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>   /* struct device, and other headers */
+#include <linux/etherdevice.h> /* eth_type_trans */
+#include <linux/skbuff.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/hugetlb.h>
+#include <linux/in6.h>
+#include <linux/timer.h>
+#include <linux/io.h>
+#include <asm/checksum.h>
+#include <asm/homecache.h>
+
+#include <hv/drv_xgbe_intf.h>
+#include <hv/drv_xgbe_impl.h>
+#include <hv/hypervisor.h>
+#include <hv/netio_intf.h>
+
+/* For TSO */
+#include <linux/ip.h>
+#include <linux/tcp.h>
+
+
+/* There is no singlethread_cpu, so schedule work on the current cpu. */
+#define singlethread_cpu -1
+
+
+/*
+ * First, "tile_net_init_module()" initializes all four "devices" which
+ * can be used by linux.
+ *
+ * Then, "ifconfig DEVICE up" calls "tile_net_open()", which analyzes
+ * the network cpus, then uses "tile_net_open_aux()" to initialize
+ * LIPP/LEPP, and then uses "tile_net_open_inner()" to register all
+ * the tiles, provide buffers to LIPP, allow ingress to start, and
+ * turn on hypervisor interrupt handling (and NAPI) on all tiles.
+ *
+ * If registration fails due to the link being down, then "retry_work"
+ * is used to keep calling "tile_net_open_inner()" until it succeeds.
+ *
+ * If "ifconfig DEVICE down" is called, it uses "tile_net_stop()" to
+ * stop egress, drain the LIPP buffers, unregister all the tiles, stop
+ * LIPP/LEPP, and wipe the LEPP queue.
+ *
+ * We start out with the ingress interrupt enabled on each CPU.  When
+ * this interrupt fires, we disable it, and call "napi_schedule()".
+ * This will cause "tile_net_poll()" to be called, which will pull
+ * packets from the netio queue, filtering them out, or passing them
+ * to "netif_receive_skb()".  If our budget is exhausted, we will
+ * return, knowing we will be called again later.  Otherwise, we
+ * reenable the ingress interrupt, and call "napi_complete()".
+ *
+ *
+ * NOTE: The use of "native_driver" ensures that EPP exists, and that
+ * "epp_sendv" is legal, and that "LIPP" is being used.
+ *
+ * NOTE: Failing to free completions for an arbitrarily long time
+ * (which is defined to be illegal) does in fact cause bizarre
+ * problems.  The "egress_timer" helps prevent this from happening.
+ *
+ * NOTE: The egress code can be interrupted by the interrupt handler.
+ */
+
+
+/* HACK: Allow use of "jumbo" packets. */
+/* This should be 1500 if "jumbo" is not set in LIPP. */
+/* This should be at most 10226 (10240 - 14) if "jumbo" is set in LIPP. */
+/* ISSUE: This has not been thoroughly tested (except at 1500). */
+#define TILE_NET_MTU 1500
+
+/* HACK: Define to support GSO. */
+/* ISSUE: This may actually hurt performance of the TCP blaster. */
+/* #define TILE_NET_GSO */
+
+/* Define this to collapse "duplicate" acks. */
+/* #define IGNORE_DUP_ACKS */
+
+/* HACK: Define this to verify incoming packets. */
+/* #define TILE_NET_VERIFY_INGRESS */
+
+/* Use 3000 to enable the Linux Traffic Control (QoS) layer, else 0. */
+#define TILE_NET_TX_QUEUE_LEN 0
+
+/* Define to dump packets (prints out the whole packet on tx and rx). */
+/* #define TILE_NET_DUMP_PACKETS */
+
+/* Define to enable debug spew (all PDEBUG's are enabled). */
+/* #define TILE_NET_DEBUG */
+
+
+/* Define to activate paranoia checks. */
+/* #define TILE_NET_PARANOIA */
+
+/* Default transmit lockup timeout period, in jiffies. */
+#define TILE_NET_TIMEOUT (5 * HZ)
+
+/* Default retry interval for bringing up the NetIO interface, in jiffies. */
+#define TILE_NET_RETRY_INTERVAL (5 * HZ)
+
+/* Number of ports (xgbe0, xgbe1, gbe0, gbe1). */
+#define TILE_NET_DEVS 4
+
+
+
+/* Paranoia. */
+#if NET_IP_ALIGN != LIPP_PACKET_PADDING
+#error "NET_IP_ALIGN must match LIPP_PACKET_PADDING."
+#endif
+
+
+/* Debug print. */
+#ifdef TILE_NET_DEBUG
+#define PDEBUG(fmt, args...) net_printk(fmt, ## args)
+#else
+#define PDEBUG(fmt, args...)
+#endif
+
+
+MODULE_AUTHOR("Tilera");
+MODULE_LICENSE("GPL");
+
+
+#define IS_MULTICAST(mac_addr) \
+	(((u8 *)(mac_addr))[0] & 0x01)
+
+#define IS_BROADCAST(mac_addr) \
+	(((u16 *)(mac_addr))[0] == 0xffff)
+
+
+/*
+ * Queue of incoming packets for a specific cpu and device.
+ *
+ * Includes a pointer to the "system" data, and the actual "user" data.
+ */
+struct tile_netio_queue {
+	netio_queue_impl_t *__system_part;
+	netio_queue_user_impl_t __user_part;
+
+};
+
+
+/*
+ * Statistics counters for a specific cpu and device.
+ */
+struct tile_net_stats_t {
+	u32 rx_packets;
+	u32 rx_bytes;
+	u32 tx_packets;
+	u32 tx_bytes;
+};
+
+
+/*
+ * Info for a specific cpu and device.
+ *
+ * ISSUE: There is a "dev" pointer in "napi" as well.
+ */
+struct tile_net_cpu {
+	/* The NAPI struct. */
+	struct napi_struct napi;
+	/* Packet queue. */
+	struct tile_netio_queue queue;
+	/* Statistics. */
+	struct tile_net_stats_t stats;
+	/* ISSUE: Is this needed? */
+	bool napi_enabled;
+	/* True if this tile has succcessfully registered with the IPP. */
+	bool registered;
+	/* True if the link was down last time we tried to register. */
+	bool link_down;
+	/* True if "egress_timer" is scheduled. */
+	bool egress_timer_scheduled;
+	/* Number of small sk_buffs which must still be provided. */
+	unsigned int num_needed_small_buffers;
+	/* Number of large sk_buffs which must still be provided. */
+	unsigned int num_needed_large_buffers;
+	/* A timer for handling egress completions. */
+	struct timer_list egress_timer;
+};
+
+
+/*
+ * Info for a specific device.
+ */
+struct tile_net_priv {
+	/* Our network device. */
+	struct net_device *dev;
+	/* The actual egress queue. */
+	lepp_queue_t *epp_queue;
+	/* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */
+	spinlock_t cmd_lock;
+	/* Protects "epp_queue->comp_head". */
+	spinlock_t comp_lock;
+	/* The hypervisor handle for this interface. */
+	int hv_devhdl;
+	/* The intr bit mask that IDs this device. */
+	u32 intr_id;
+	/* True iff "tile_net_open_aux()" has succeeded. */
+	int partly_opened;
+	/* True iff "tile_net_open_inner()" has succeeded. */
+	int fully_opened;
+	/* Effective network cpus. */
+	struct cpumask network_cpus_map;
+	/* Number of network cpus. */
+	int network_cpus_count;
+	/* Credits per network cpu. */
+	int network_cpus_credits;
+	/* Network stats. */
+	struct net_device_stats stats;
+	/* For NetIO bringup retries. */
+	struct delayed_work retry_work;
+	/* Quick access to per cpu data. */
+	struct tile_net_cpu *cpu[NR_CPUS];
+};
+
+
+/*
+ * The actual devices (xgbe0, xgbe1, gbe0, gbe1).
+ */
+static struct net_device *tile_net_devs[TILE_NET_DEVS];
+
+/*
+ * The "tile_net_cpu" structures for each device.
+ */
+static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe0);
+static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe1);
+static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe0);
+static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe1);
+
+
+/*
+ * True if "network_cpus" was specified.
+ */
+static bool network_cpus_used;
+
+/*
+ * The actual cpus in "network_cpus".
+ */
+static struct cpumask network_cpus_map;
+
+
+
+#ifdef TILE_NET_DEBUG
+/*
+ * printk with extra stuff.
+ *
+ * We print the CPU we're running in brackets.
+ */
+static void net_printk(char *fmt, ...)
+{
+	int i;
+	int len;
+	va_list args;
+	static char buf[256];
+
+	len = sprintf(buf, "tile_net[%2.2d]: ", smp_processor_id());
+	va_start(args, fmt);
+	i = vscnprintf(buf + len, sizeof(buf) - len - 1, fmt, args);
+	va_end(args);
+	buf[255] = '\0';
+	pr_notice(buf);
+}
+#endif
+
+
+#ifdef TILE_NET_DUMP_PACKETS
+/*
+ * Dump a packet.
+ */
+static void dump_packet(unsigned char *data, unsigned long length, char *s)
+{
+	unsigned long i;
+	static unsigned int count;
+
+	pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n",
+	       data, length, s, count++);
+
+	pr_info("\n");
+
+	for (i = 0; i < length; i++) {
+		if ((i & 0xf) == 0)
+			sprintf(buf, "%8.8lx:", i);
+		sprintf(buf + strlen(buf), " %2.2x", data[i]);
+		if ((i & 0xf) == 0xf || i == length - 1)
+			pr_info("%s\n", buf);
+	}
+}
+#endif
+
+
+/*
+ * Provide support for the __netio_fastio1() swint
+ * (see <hv/drv_xgbe_intf.h> for how it is used).
+ *
+ * The fastio swint2 call may clobber all the caller-saved registers.
+ * It rarely clobbers memory, but we allow for the possibility in
+ * the signature just to be on the safe side.
+ *
+ * Also, gcc doesn't seem to allow an input operand to be
+ * clobbered, so we fake it with dummy outputs.
+ *
+ * This function can't be static because of the way it is declared
+ * in the netio header.
+ */
+inline int __netio_fastio1(u32 fastio_index, u32 arg0)
+{
+	long result, clobber_r1, clobber_r10;
+	asm volatile("swint2"
+		     : "=R00" (result),
+		       "=R01" (clobber_r1), "=R10" (clobber_r10)
+		     : "R10" (fastio_index), "R01" (arg0)
+		     : "memory", "r2", "r3", "r4",
+		       "r5", "r6", "r7", "r8", "r9",
+		       "r11", "r12", "r13", "r14",
+		       "r15", "r16", "r17", "r18", "r19",
+		       "r20", "r21", "r22", "r23", "r24",
+		       "r25", "r26", "r27", "r28", "r29");
+	return result;
+}
+
+
+/*
+ * Provide a linux buffer to LIPP.
+ */
+static void tile_net_provide_linux_buffer(struct tile_net_cpu *info,
+					  void *va, bool small)
+{
+	struct tile_netio_queue *queue = &info->queue;
+
+	/* Convert "va" and "small" to "linux_buffer_t". */
+	unsigned int buffer = ((unsigned int)(__pa(va) >> 7) << 1) + small;
+
+	__netio_fastio_free_buffer(queue->__user_part.__fastio_index, buffer);
+}
+
+
+/*
+ * Provide a linux buffer for LIPP.
+ */
+static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info,
+					   bool small)
+{
+	/* ISSUE: What should we use here? */
+	unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100;
+
+	/* Round up to ensure to avoid "false sharing" with last cache line. */
+	unsigned int buffer_size =
+		 (((small ? LIPP_SMALL_PACKET_SIZE : large_size) +
+		   CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE());
+
+	/*
+	 * ISSUE: Since CPAs are 38 bits, and we can only encode the
+	 * high 31 bits in a "linux_buffer_t", the low 7 bits must be
+	 * zero, and thus, we must align the actual "va" mod 128.
+	 */
+	const unsigned long align = 128;
+
+	struct sk_buff *skb;
+	void *va;
+
+	struct sk_buff **skb_ptr;
+
+	/* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */
+	/* and also "reserves" that many bytes. */
+	/* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */
+	int len = sizeof(*skb_ptr) + align + buffer_size;
+
+	while (1) {
+
+		/* Allocate (or fail). */
+		skb = dev_alloc_skb(len);
+		if (skb == NULL)
+			return false;
+
+		/* Make room for a back-pointer to 'skb'. */
+		skb_reserve(skb, sizeof(*skb_ptr));
+
+		/* Make sure we are aligned. */
+		skb_reserve(skb, -(long)skb->data & (align - 1));
+
+		/* This address is given to IPP. */
+		va = skb->data;
+
+		if (small)
+			break;
+
+		/* ISSUE: This has never been observed! */
+		/* Large buffers must not span a huge page. */
+		if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0)
+			break;
+		pr_err("Leaking unaligned linux buffer at %p.\n", va);
+	}
+
+	/* Skip two bytes to satisfy LIPP assumptions. */
+	/* Note that this aligns IP on a 16 byte boundary. */
+	/* ISSUE: Do this when the packet arrives? */
+	skb_reserve(skb, NET_IP_ALIGN);
+
+	/* Save a back-pointer to 'skb'. */
+	skb_ptr = va - sizeof(*skb_ptr);
+	*skb_ptr = skb;
+
+	/* Invalidate the packet buffer. */
+	if (!hash_default)
+		__inv_buffer(skb->data, buffer_size);
+
+	/* Make sure "skb_ptr" has been flushed. */
+	__insn_mf();
+
+#ifdef TILE_NET_PARANOIA
+#if CHIP_HAS_CBOX_HOME_MAP()
+	if (hash_default) {
+		HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va);
+		if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
+			panic("Non-coherent ingress buffer!");
+	}
+#endif
+#endif
+
+	/* Provide the new buffer. */
+	tile_net_provide_linux_buffer(info, va, small);
+
+	return true;
+}
+
+
+/*
+ * Provide linux buffers for LIPP.
+ */
+static void tile_net_provide_needed_buffers(struct tile_net_cpu *info)
+{
+	while (info->num_needed_small_buffers != 0) {
+		if (!tile_net_provide_needed_buffer(info, true))
+			goto oops;
+		info->num_needed_small_buffers--;
+	}
+
+	while (info->num_needed_large_buffers != 0) {
+		if (!tile_net_provide_needed_buffer(info, false))
+			goto oops;
+		info->num_needed_large_buffers--;
+	}
+
+	return;
+
+oops:
+
+	/* Add a description to the page allocation failure dump. */
+	pr_notice("Could not provide a linux buffer to LIPP.\n");
+}
+
+
+/*
+ * Grab some LEPP completions, and store them in "comps", of size
+ * "comps_size", and return the number of completions which were
+ * stored, so the caller can free them.
+ *
+ * If "pending" is not NULL, it will be set to true if there might
+ * still be some pending completions caused by this tile, else false.
+ */
+static unsigned int tile_net_lepp_grab_comps(struct net_device *dev,
+					     struct sk_buff *comps[],
+					     unsigned int comps_size,
+					     bool *pending)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	lepp_queue_t *eq = priv->epp_queue;
+
+	unsigned int n = 0;
+
+	unsigned int comp_head;
+	unsigned int comp_busy;
+	unsigned int comp_tail;
+
+	spin_lock(&priv->comp_lock);
+
+	comp_head = eq->comp_head;
+	comp_busy = eq->comp_busy;
+	comp_tail = eq->comp_tail;
+
+	while (comp_head != comp_busy && n < comps_size) {
+		comps[n++] = eq->comps[comp_head];
+		LEPP_QINC(comp_head);
+	}
+
+	if (pending != NULL)
+		*pending = (comp_head != comp_tail);
+
+	eq->comp_head = comp_head;
+
+	spin_unlock(&priv->comp_lock);
+
+	return n;
+}
+
+
+/*
+ * Make sure the egress timer is scheduled.
+ *
+ * Note that we use "schedule if not scheduled" logic instead of the more
+ * obvious "reschedule" logic, because "reschedule" is fairly expensive.
+ */
+static void tile_net_schedule_egress_timer(struct tile_net_cpu *info)
+{
+	if (!info->egress_timer_scheduled) {
+		mod_timer_pinned(&info->egress_timer, jiffies + 1);
+		info->egress_timer_scheduled = true;
+	}
+}
+
+
+/*
+ * The "function" for "info->egress_timer".
+ *
+ * This timer will reschedule itself as long as there are any pending
+ * completions expected (on behalf of any tile).
+ *
+ * ISSUE: Realistically, will the timer ever stop scheduling itself?
+ *
+ * ISSUE: This timer is almost never actually needed, so just use a global
+ * timer that can run on any tile.
+ *
+ * ISSUE: Maybe instead track number of expected completions, and free
+ * only that many, resetting to zero if "pending" is ever false.
+ */
+static void tile_net_handle_egress_timer(unsigned long arg)
+{
+	struct tile_net_cpu *info = (struct tile_net_cpu *)arg;
+	struct net_device *dev = info->napi.dev;
+
+	struct sk_buff *olds[32];
+	unsigned int wanted = 32;
+	unsigned int i, nolds = 0;
+	bool pending;
+
+	/* The timer is no longer scheduled. */
+	info->egress_timer_scheduled = false;
+
+	nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending);
+
+	for (i = 0; i < nolds; i++)
+		kfree_skb(olds[i]);
+
+	/* Reschedule timer if needed. */
+	if (pending)
+		tile_net_schedule_egress_timer(info);
+}
+
+
+#ifdef IGNORE_DUP_ACKS
+
+/*
+ * Help detect "duplicate" ACKs.  These are sequential packets (for a
+ * given flow) which are exactly 66 bytes long, sharing everything but
+ * ID=2@0x12, Hsum=2@0x18, Ack=4@0x2a, WinSize=2@0x30, Csum=2@0x32,
+ * Tstamps=10@0x38.  The ID's are +1, the Hsum's are -1, the Ack's are
+ * +N, and the Tstamps are usually identical.
+ *
+ * NOTE: Apparently truly duplicate acks (with identical "ack" values),
+ * should not be collapsed, as they are used for some kind of flow control.
+ */
+static bool is_dup_ack(char *s1, char *s2, unsigned int len)
+{
+	int i;
+
+	unsigned long long ignorable = 0;
+
+	/* Identification. */
+	ignorable |= (1ULL << 0x12);
+	ignorable |= (1ULL << 0x13);
+
+	/* Header checksum. */
+	ignorable |= (1ULL << 0x18);
+	ignorable |= (1ULL << 0x19);
+
+	/* ACK. */
+	ignorable |= (1ULL << 0x2a);
+	ignorable |= (1ULL << 0x2b);
+	ignorable |= (1ULL << 0x2c);
+	ignorable |= (1ULL << 0x2d);
+
+	/* WinSize. */
+	ignorable |= (1ULL << 0x30);
+	ignorable |= (1ULL << 0x31);
+
+	/* Checksum. */
+	ignorable |= (1ULL << 0x32);
+	ignorable |= (1ULL << 0x33);
+
+	for (i = 0; i < len; i++, ignorable >>= 1) {
+
+		if ((ignorable & 1) || (s1[i] == s2[i]))
+			continue;
+
+#ifdef TILE_NET_DEBUG
+		/* HACK: Mention non-timestamp diffs. */
+		if (i < 0x38 && i != 0x2f &&
+		    net_ratelimit())
+			pr_info("Diff at 0x%x\n", i);
+#endif
+
+		return false;
+	}
+
+#ifdef TILE_NET_NO_SUPPRESS_DUP_ACKS
+	/* HACK: Do not suppress truly duplicate ACKs. */
+	/* ISSUE: Is this actually necessary or helpful? */
+	if (s1[0x2a] == s2[0x2a] &&
+	    s1[0x2b] == s2[0x2b] &&
+	    s1[0x2c] == s2[0x2c] &&
+	    s1[0x2d] == s2[0x2d]) {
+		return false;
+	}
+#endif
+
+	return true;
+}
+
+#endif
+
+
+
+/*
+ * Like "tile_net_handle_packets()", but just discard packets.
+ */
+static void tile_net_discard_packets(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+	struct tile_netio_queue *queue = &info->queue;
+	netio_queue_impl_t *qsp = queue->__system_part;
+	netio_queue_user_impl_t *qup = &queue->__user_part;
+
+	while (qup->__packet_receive_read !=
+	       qsp->__packet_receive_queue.__packet_write) {
+
+		int index = qup->__packet_receive_read;
+
+		int index2_aux = index + sizeof(netio_pkt_t);
+		int index2 =
+			((index2_aux ==
+			  qsp->__packet_receive_queue.__last_packet_plus_one) ?
+			 0 : index2_aux);
+
+		netio_pkt_t *pkt = (netio_pkt_t *)
+			((unsigned long) &qsp[1] + index);
+
+		/* Extract the "linux_buffer_t". */
+		unsigned int buffer = pkt->__packet.word;
+
+		/* Convert "linux_buffer_t" to "va". */
+		void *va = __va((phys_addr_t)(buffer >> 1) << 7);
+
+		/* Acquire the associated "skb". */
+		struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
+		struct sk_buff *skb = *skb_ptr;
+
+		kfree_skb(skb);
+
+		/* Consume this packet. */
+		qup->__packet_receive_read = index2;
+	}
+}
+
+
+/*
+ * Handle the next packet.  Return true if "processed", false if "filtered".
+ */
+static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
+{
+	struct net_device *dev = info->napi.dev;
+
+	struct tile_netio_queue *queue = &info->queue;
+	netio_queue_impl_t *qsp = queue->__system_part;
+	netio_queue_user_impl_t *qup = &queue->__user_part;
+	struct tile_net_stats_t *stats = &info->stats;
+
+	int filter;
+
+	int index2_aux = index + sizeof(netio_pkt_t);
+	int index2 =
+		((index2_aux ==
+		  qsp->__packet_receive_queue.__last_packet_plus_one) ?
+		 0 : index2_aux);
+
+	netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index);
+
+	netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt);
+
+	/* Extract the packet size. */
+	unsigned long len =
+		(NETIO_PKT_CUSTOM_LENGTH(pkt) +
+		 NET_IP_ALIGN - NETIO_PACKET_PADDING);
+
+	/* Extract the "linux_buffer_t". */
+	unsigned int buffer = pkt->__packet.word;
+
+	/* Extract "small" (vs "large"). */
+	bool small = ((buffer & 1) != 0);
+
+	/* Convert "linux_buffer_t" to "va". */
+	void *va = __va((phys_addr_t)(buffer >> 1) << 7);
+
+	/* Extract the packet data pointer. */
+	/* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
+	unsigned char *buf = va + NET_IP_ALIGN;
+
+#ifdef IGNORE_DUP_ACKS
+
+	static int other;
+	static int final;
+	static int keep;
+	static int skip;
+
+#endif
+
+	/* Invalidate the packet buffer. */
+	if (!hash_default)
+		__inv_buffer(buf, len);
+
+	/* ISSUE: Is this needed? */
+	dev->last_rx = jiffies;
+
+#ifdef TILE_NET_DUMP_PACKETS
+	dump_packet(buf, len, "rx");
+#endif /* TILE_NET_DUMP_PACKETS */
+
+#ifdef TILE_NET_VERIFY_INGRESS
+	if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) &&
+	    NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) {
+		/*
+		 * FIXME: This complains about UDP packets
+		 * with a "zero" checksum (bug 6624).
+		 */
+#ifdef TILE_NET_PANIC_ON_BAD
+		dump_packet(buf, len, "rx");
+		panic("Bad L4 checksum.");
+#else
+		pr_warning("Bad L4 checksum on %d byte packet.\n", len);
+#endif
+	}
+	if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) &&
+	    NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) {
+		dump_packet(buf, len, "rx");
+		panic("Bad L3 checksum.");
+	}
+	switch (NETIO_PKT_STATUS_M(metadata, pkt)) {
+	case NETIO_PKT_STATUS_OVERSIZE:
+		if (len >= 64) {
+			dump_packet(buf, len, "rx");
+			panic("Unexpected OVERSIZE.");
+		}
+		break;
+	case NETIO_PKT_STATUS_BAD:
+#ifdef TILE_NET_PANIC_ON_BAD
+		dump_packet(buf, len, "rx");
+		panic("Unexpected BAD packet.");
+#else
+		pr_warning("Unexpected BAD %d byte packet.\n", len);
+#endif
+	}
+#endif
+
+	filter = 0;
+
+	if (!(dev->flags & IFF_UP)) {
+		/* Filter packets received before we're up. */
+		filter = 1;
+	} else if (!(dev->flags & IFF_PROMISC)) {
+		/*
+		 * FIXME: Implement HW multicast filter.
+		 */
+		if (!IS_MULTICAST(buf) && !IS_BROADCAST(buf)) {
+			/* Filter packets not for our address. */
+			const u8 *mine = dev->dev_addr;
+			filter = compare_ether_addr(mine, buf);
+		}
+	}
+
+#ifdef IGNORE_DUP_ACKS
+
+	if (len != 66) {
+		/* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */
+
+		other++;
+
+	} else if (index2 ==
+		   qsp->__packet_receive_queue.__packet_write) {
+
+		final++;
+
+	} else {
+
+		netio_pkt_t *pkt2 = (netio_pkt_t *)
+			((unsigned long) &qsp[1] + index2);
+
+		netio_pkt_metadata_t *metadata2 =
+			NETIO_PKT_METADATA(pkt2);
+
+		/* Extract the packet size. */
+		unsigned long len2 =
+			(NETIO_PKT_CUSTOM_LENGTH(pkt2) +
+			 NET_IP_ALIGN - NETIO_PACKET_PADDING);
+
+		if (len2 == 66 &&
+		    NETIO_PKT_FLOW_HASH_M(metadata, pkt) ==
+		    NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) {
+
+			/* Extract the "linux_buffer_t". */
+			unsigned int buffer2 = pkt2->__packet.word;
+
+			/* Convert "linux_buffer_t" to "va". */
+			void *va2 =
+				__va((phys_addr_t)(buffer2 >> 1) << 7);
+
+			/* Extract the packet data pointer. */
+			/* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
+			unsigned char *buf2 = va2 + NET_IP_ALIGN;
+
+			/* Invalidate the packet buffer. */
+			if (!hash_default)
+				__inv_buffer(buf2, len2);
+
+			if (is_dup_ack(buf, buf2, len)) {
+				skip++;
+				filter = 1;
+			} else {
+				keep++;
+			}
+		}
+	}
+
+	if (net_ratelimit())
+		pr_info("Other %d Final %d Keep %d Skip %d.\n",
+			other, final, keep, skip);
+
+#endif
+
+	if (filter) {
+
+		/* ISSUE: Update "drop" statistics? */
+
+		tile_net_provide_linux_buffer(info, va, small);
+
+	} else {
+
+		/* Acquire the associated "skb". */
+		struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
+		struct sk_buff *skb = *skb_ptr;
+
+		/* Paranoia. */
+		if (skb->data != buf)
+			panic("Corrupt linux buffer from LIPP! "
+			      "VA=%p, skb=%p, skb->data=%p\n",
+			      va, skb, skb->data);
+
+		/* Encode the actual packet length. */
+		skb_put(skb, len);
+
+		/* NOTE: This call also sets "skb->dev = dev". */
+		skb->protocol = eth_type_trans(skb, dev);
+
+		/* ISSUE: Discard corrupt packets? */
+		/* ISSUE: Discard packets with bad checksums? */
+
+		/* Avoid recomputing TCP/UDP checksums. */
+		if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt))
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		netif_receive_skb(skb);
+
+		stats->rx_packets++;
+		stats->rx_bytes += len;
+
+		if (small)
+			info->num_needed_small_buffers++;
+		else
+			info->num_needed_large_buffers++;
+	}
+
+	/* Return four credits after every fourth packet. */
+	if (--qup->__receive_credit_remaining == 0) {
+		u32 interval = qup->__receive_credit_interval;
+		qup->__receive_credit_remaining = interval;
+		__netio_fastio_return_credits(qup->__fastio_index, interval);
+	}
+
+	/* Consume this packet. */
+	qup->__packet_receive_read = index2;
+
+	return !filter;
+}
+
+
+/*
+ * Handle some packets for the given device on the current CPU.
+ *
+ * ISSUE: The "rotting packet" race condition occurs if a packet
+ * arrives after the queue appears to be empty, and before the
+ * hypervisor interrupt is re-enabled.
+ */
+static int tile_net_poll(struct napi_struct *napi, int budget)
+{
+	struct net_device *dev = napi->dev;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+	struct tile_netio_queue *queue = &info->queue;
+	netio_queue_impl_t *qsp = queue->__system_part;
+	netio_queue_user_impl_t *qup = &queue->__user_part;
+
+	unsigned int work = 0;
+
+	while (1) {
+		int index = qup->__packet_receive_read;
+		if (index == qsp->__packet_receive_queue.__packet_write)
+			break;
+
+		if (tile_net_poll_aux(info, index)) {
+			if (++work >= budget)
+				goto done;
+		}
+	}
+
+	napi_complete(&info->napi);
+
+	/* Re-enable hypervisor interrupts. */
+	enable_percpu_irq(priv->intr_id);
+
+	/* HACK: Avoid the "rotting packet" problem. */
+	if (qup->__packet_receive_read !=
+	    qsp->__packet_receive_queue.__packet_write)
+		napi_schedule(&info->napi);
+
+	/* ISSUE: Handle completions? */
+
+done:
+
+	tile_net_provide_needed_buffers(info);
+
+	return work;
+}
+
+
+/*
+ * Handle an ingress interrupt for the given device on the current cpu.
+ */
+static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+
+	/* Disable hypervisor interrupt. */
+	disable_percpu_irq(priv->intr_id);
+
+	napi_schedule(&info->napi);
+
+	return IRQ_HANDLED;
+}
+
+
+/*
+ * One time initialization per interface.
+ */
+static int tile_net_open_aux(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	int ret;
+	int dummy;
+	unsigned int epp_lotar;
+
+	/*
+	 * Find out where EPP memory should be homed.
+	 */
+	ret = hv_dev_pread(priv->hv_devhdl, 0,
+			   (HV_VirtAddr)&epp_lotar, sizeof(epp_lotar),
+			   NETIO_EPP_SHM_OFF);
+	if (ret < 0) {
+		pr_err("could not read epp_shm_queue lotar.\n");
+		return -EIO;
+	}
+
+	/*
+	 * Home the page on the EPP.
+	 */
+	{
+		int epp_home = hv_lotar_to_cpu(epp_lotar);
+		struct page *page = virt_to_page(priv->epp_queue);
+		homecache_change_page_home(page, 0, epp_home);
+	}
+
+	/*
+	 * Register the EPP shared memory queue.
+	 */
+	{
+		netio_ipp_address_t ea = {
+			.va = 0,
+			.pa = __pa(priv->epp_queue),
+			.pte = hv_pte(0),
+			.size = PAGE_SIZE,
+		};
+		ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar);
+		ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3);
+		ret = hv_dev_pwrite(priv->hv_devhdl, 0,
+				    (HV_VirtAddr)&ea,
+				    sizeof(ea),
+				    NETIO_EPP_SHM_OFF);
+		if (ret < 0)
+			return -EIO;
+	}
+
+	/*
+	 * Start LIPP/LEPP.
+	 */
+	if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
+			  sizeof(dummy), NETIO_IPP_START_SHIM_OFF) < 0) {
+		pr_warning("Failed to start LIPP/LEPP.\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Register with hypervisor on each CPU.
+ *
+ * Strangely, this function does important things even if it "fails",
+ * which is especially common if the link is not up yet.  Hopefully
+ * these things are all "harmless" if done twice!
+ */
+static void tile_net_register(void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info;
+
+	struct tile_netio_queue *queue;
+
+	/* Only network cpus can receive packets. */
+	int queue_id =
+		cpumask_test_cpu(my_cpu, &priv->network_cpus_map) ? 0 : 255;
+
+	netio_input_config_t config = {
+		.flags = 0,
+		.num_receive_packets = priv->network_cpus_credits,
+		.queue_id = queue_id
+	};
+
+	int ret = 0;
+	netio_queue_impl_t *queuep;
+
+	PDEBUG("tile_net_register(queue_id %d)\n", queue_id);
+
+	if (!strcmp(dev->name, "xgbe0"))
+		info = &__get_cpu_var(hv_xgbe0);
+	else if (!strcmp(dev->name, "xgbe1"))
+		info = &__get_cpu_var(hv_xgbe1);
+	else if (!strcmp(dev->name, "gbe0"))
+		info = &__get_cpu_var(hv_gbe0);
+	else if (!strcmp(dev->name, "gbe1"))
+		info = &__get_cpu_var(hv_gbe1);
+	else
+		BUG();
+
+	/* Initialize the egress timer. */
+	init_timer(&info->egress_timer);
+	info->egress_timer.data = (long)info;
+	info->egress_timer.function = tile_net_handle_egress_timer;
+
+	priv->cpu[my_cpu] = info;
+
+	/*
+	 * Register ourselves with the IPP.
+	 */
+	ret = hv_dev_pwrite(priv->hv_devhdl, 0,
+			    (HV_VirtAddr)&config,
+			    sizeof(netio_input_config_t),
+			    NETIO_IPP_INPUT_REGISTER_OFF);
+	PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
+	       ret);
+	if (ret < 0) {
+		printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF"
+		       " failure %d\n", ret);
+		info->link_down = (ret == NETIO_LINK_DOWN);
+		return;
+	}
+
+	/*
+	 * Get the pointer to our queue's system part.
+	 */
+
+	ret = hv_dev_pread(priv->hv_devhdl, 0,
+			   (HV_VirtAddr)&queuep,
+			   sizeof(netio_queue_impl_t *),
+			   NETIO_IPP_INPUT_REGISTER_OFF);
+	PDEBUG("hv_dev_pread(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
+	       ret);
+	PDEBUG("queuep %p\n", queuep);
+	if (ret <= 0) {
+		/* ISSUE: Shouldn't this be a fatal error? */
+		pr_err("hv_dev_pread NETIO_IPP_INPUT_REGISTER_OFF failure\n");
+		return;
+	}
+
+	queue = &info->queue;
+
+	queue->__system_part = queuep;
+
+	memset(&queue->__user_part, 0, sizeof(netio_queue_user_impl_t));
+
+	/* This is traditionally "config.num_receive_packets / 2". */
+	queue->__user_part.__receive_credit_interval = 4;
+	queue->__user_part.__receive_credit_remaining =
+		queue->__user_part.__receive_credit_interval;
+
+	/*
+	 * Get a fastio index from the hypervisor.
+	 * ISSUE: Shouldn't this check the result?
+	 */
+	ret = hv_dev_pread(priv->hv_devhdl, 0,
+			   (HV_VirtAddr)&queue->__user_part.__fastio_index,
+			   sizeof(queue->__user_part.__fastio_index),
+			   NETIO_IPP_GET_FASTIO_OFF);
+	PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret);
+
+	netif_napi_add(dev, &info->napi, tile_net_poll, 64);
+
+	/* Now we are registered. */
+	info->registered = true;
+}
+
+
+/*
+ * Unregister with hypervisor on each CPU.
+ */
+static void tile_net_unregister(void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+
+	int ret = 0;
+	int dummy = 0;
+
+	/* Do nothing if never registered. */
+	if (info == NULL)
+		return;
+
+	/* Do nothing if already unregistered. */
+	if (!info->registered)
+		return;
+
+	/*
+	 * Unregister ourselves with LIPP.
+	 */
+	ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
+			    sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF);
+	PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n",
+	       ret);
+	if (ret < 0) {
+		/* FIXME: Just panic? */
+		pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF"
+		       " failure %d\n", ret);
+	}
+
+	/*
+	 * Discard all packets still in our NetIO queue.  Hopefully,
+	 * once the unregister call is complete, there will be no
+	 * packets still in flight on the IDN.
+	 */
+	tile_net_discard_packets(dev);
+
+	/* Reset state. */
+	info->num_needed_small_buffers = 0;
+	info->num_needed_large_buffers = 0;
+
+	/* Cancel egress timer. */
+	del_timer(&info->egress_timer);
+	info->egress_timer_scheduled = false;
+
+	netif_napi_del(&info->napi);
+
+	/* Now we are unregistered. */
+	info->registered = false;
+}
+
+
+/*
+ * Helper function for "tile_net_stop()".
+ *
+ * Also used to handle registration failure in "tile_net_open_inner()",
+ * when "fully_opened" is known to be false, and the various extra
+ * steps in "tile_net_stop()" are not necessary.  ISSUE: It might be
+ * simpler if we could just call "tile_net_stop()" anyway.
+ */
+static void tile_net_stop_aux(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	int dummy = 0;
+
+	/* Unregister all tiles, so LIPP will stop delivering packets. */
+	on_each_cpu(tile_net_unregister, (void *)dev, 1);
+
+	/* Stop LIPP/LEPP. */
+	if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
+			  sizeof(dummy), NETIO_IPP_STOP_SHIM_OFF) < 0)
+		panic("Failed to stop LIPP/LEPP!\n");
+
+	priv->partly_opened = 0;
+}
+
+
+/*
+ * Disable ingress interrupts for the given device on the current cpu.
+ */
+static void tile_net_disable_intr(void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+
+	/* Disable hypervisor interrupt. */
+	disable_percpu_irq(priv->intr_id);
+
+	/* Disable NAPI if needed. */
+	if (info != NULL && info->napi_enabled) {
+		napi_disable(&info->napi);
+		info->napi_enabled = false;
+	}
+}
+
+
+/*
+ * Enable ingress interrupts for the given device on the current cpu.
+ */
+static void tile_net_enable_intr(void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+
+	/* Enable hypervisor interrupt. */
+	enable_percpu_irq(priv->intr_id);
+
+	/* Enable NAPI. */
+	napi_enable(&info->napi);
+	info->napi_enabled = true;
+}
+
+
+/*
+ * tile_net_open_inner does most of the work of bringing up the interface.
+ * It's called from tile_net_open(), and also from tile_net_retry_open().
+ * The return value is 0 if the interface was brought up, < 0 if
+ * tile_net_open() should return the return value as an error, and > 0 if
+ * tile_net_open() should return success and schedule a work item to
+ * periodically retry the bringup.
+ */
+static int tile_net_open_inner(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info;
+	struct tile_netio_queue *queue;
+	unsigned int irq;
+	int i;
+
+	/*
+	 * First try to register just on the local CPU, and handle any
+	 * semi-expected "link down" failure specially.  Note that we
+	 * do NOT call "tile_net_stop_aux()", unlike below.
+	 */
+	tile_net_register(dev);
+	info = priv->cpu[my_cpu];
+	if (!info->registered) {
+		if (info->link_down)
+			return 1;
+		return -EAGAIN;
+	}
+
+	/*
+	 * Now register everywhere else.  If any registration fails,
+	 * even for "link down" (which might not be possible), we
+	 * clean up using "tile_net_stop_aux()".
+	 */
+	smp_call_function(tile_net_register, (void *)dev, 1);
+	for_each_online_cpu(i) {
+		if (!priv->cpu[i]->registered) {
+			tile_net_stop_aux(dev);
+			return -EAGAIN;
+		}
+	}
+
+	queue = &info->queue;
+
+	/*
+	 * Set the device intr bit mask.
+	 * The tile_net_register above sets per tile __intr_id.
+	 */
+	priv->intr_id = queue->__system_part->__intr_id;
+	BUG_ON(!priv->intr_id);
+
+	/*
+	 * Register the device interrupt handler.
+	 * The __ffs() function returns the index into the interrupt handler
+	 * table from the interrupt bit mask which should have one bit
+	 * and one bit only set.
+	 */
+	irq = __ffs(priv->intr_id);
+	tile_irq_activate(irq, TILE_IRQ_PERCPU);
+	BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt,
+			   0, dev->name, (void *)dev) != 0);
+
+	/* ISSUE: How could "priv->fully_opened" ever be "true" here? */
+
+	if (!priv->fully_opened) {
+
+		int dummy = 0;
+
+		/* Allocate initial buffers. */
+
+		int max_buffers =
+			priv->network_cpus_count * priv->network_cpus_credits;
+
+		info->num_needed_small_buffers =
+			min(LIPP_SMALL_BUFFERS, max_buffers);
+
+		info->num_needed_large_buffers =
+			min(LIPP_LARGE_BUFFERS, max_buffers);
+
+		tile_net_provide_needed_buffers(info);
+
+		if (info->num_needed_small_buffers != 0 ||
+		    info->num_needed_large_buffers != 0)
+			panic("Insufficient memory for buffer stack!");
+
+		/* Start LIPP/LEPP and activate "ingress" at the shim. */
+		if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
+				  sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0)
+			panic("Failed to activate the LIPP Shim!\n");
+
+		priv->fully_opened = 1;
+	}
+
+	/* On each tile, enable the hypervisor to trigger interrupts. */
+	/* ISSUE: Do this before starting LIPP/LEPP? */
+	on_each_cpu(tile_net_enable_intr, (void *)dev, 1);
+
+	/* Start our transmit queue. */
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+
+/*
+ * Called periodically to retry bringing up the NetIO interface,
+ * if it doesn't come up cleanly during tile_net_open().
+ */
+static void tile_net_open_retry(struct work_struct *w)
+{
+	struct delayed_work *dw =
+		container_of(w, struct delayed_work, work);
+
+	struct tile_net_priv *priv =
+		container_of(dw, struct tile_net_priv, retry_work);
+
+	/*
+	 * Try to bring the NetIO interface up.  If it fails, reschedule
+	 * ourselves to try again later; otherwise, tell Linux we now have
+	 * a working link.  ISSUE: What if the return value is negative?
+	 */
+	if (tile_net_open_inner(priv->dev))
+		schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
+					 TILE_NET_RETRY_INTERVAL);
+	else
+		netif_carrier_on(priv->dev);
+}
+
+
+/*
+ * Called when a network interface is made active.
+ *
+ * Returns 0 on success, negative value on failure.
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ *
+ * If the actual link is not available yet, then we tell Linux that
+ * we have no carrier, and we keep checking until the link comes up.
+ */
+static int tile_net_open(struct net_device *dev)
+{
+	int ret = 0;
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	/*
+	 * We rely on priv->partly_opened to tell us if this is the
+	 * first time this interface is being brought up. If it is
+	 * set, the IPP was already initialized and should not be
+	 * initialized again.
+	 */
+	if (!priv->partly_opened) {
+
+		int count;
+		int credits;
+
+		/* Initialize LIPP/LEPP, and start the Shim. */
+		ret = tile_net_open_aux(dev);
+		if (ret < 0) {
+			pr_err("tile_net_open_aux failed: %d\n", ret);
+			return ret;
+		}
+
+		/* Analyze the network cpus. */
+
+		if (network_cpus_used)
+			cpumask_copy(&priv->network_cpus_map,
+				     &network_cpus_map);
+		else
+			cpumask_copy(&priv->network_cpus_map, cpu_online_mask);
+
+
+		count = cpumask_weight(&priv->network_cpus_map);
+
+		/* Limit credits to available buffers, and apply min. */
+		credits = max(16, (LIPP_LARGE_BUFFERS / count) & ~1);
+
+		/* Apply "GBE" max limit. */
+		/* ISSUE: Use higher limit for XGBE? */
+		credits = min(NETIO_MAX_RECEIVE_PKTS, credits);
+
+		priv->network_cpus_count = count;
+		priv->network_cpus_credits = credits;
+
+#ifdef TILE_NET_DEBUG
+		pr_info("Using %d network cpus, with %d credits each\n",
+		       priv->network_cpus_count, priv->network_cpus_credits);
+#endif
+
+		priv->partly_opened = 1;
+	}
+
+	/*
+	 * Attempt to bring up the link.
+	 */
+	ret = tile_net_open_inner(dev);
+	if (ret <= 0) {
+		if (ret == 0)
+			netif_carrier_on(dev);
+		return ret;
+	}
+
+	/*
+	 * We were unable to bring up the NetIO interface, but we want to
+	 * try again in a little bit.  Tell Linux that we have no carrier
+	 * so it doesn't try to use the interface before the link comes up
+	 * and then remember to try again later.
+	 */
+	netif_carrier_off(dev);
+	schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
+				 TILE_NET_RETRY_INTERVAL);
+
+	return 0;
+}
+
+
+/*
+ * Disables a network interface.
+ *
+ * Returns 0, this is not allowed to fail.
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ *
+ * ISSUE: Can this can be called while "tile_net_poll()" is running?
+ */
+static int tile_net_stop(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	bool pending = true;
+
+	PDEBUG("tile_net_stop()\n");
+
+	/* ISSUE: Only needed if not yet fully open. */
+	cancel_delayed_work_sync(&priv->retry_work);
+
+	/* Can't transmit any more. */
+	netif_stop_queue(dev);
+
+	/*
+	 * Disable hypervisor interrupts on each tile.
+	 */
+	on_each_cpu(tile_net_disable_intr, (void *)dev, 1);
+
+	/*
+	 * Unregister the interrupt handler.
+	 * The __ffs() function returns the index into the interrupt handler
+	 * table from the interrupt bit mask which should have one bit
+	 * and one bit only set.
+	 */
+	if (priv->intr_id)
+		free_irq(__ffs(priv->intr_id), dev);
+
+	/*
+	 * Drain all the LIPP buffers.
+	 */
+
+	while (true) {
+		int buffer;
+
+		/* NOTE: This should never fail. */
+		if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&buffer,
+				 sizeof(buffer), NETIO_IPP_DRAIN_OFF) < 0)
+			break;
+
+		/* Stop when done. */
+		if (buffer == 0)
+			break;
+
+		{
+			/* Convert "linux_buffer_t" to "va". */
+			void *va = __va((phys_addr_t)(buffer >> 1) << 7);
+
+			/* Acquire the associated "skb". */
+			struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
+			struct sk_buff *skb = *skb_ptr;
+
+			kfree_skb(skb);
+		}
+	}
+
+	/* Stop LIPP/LEPP. */
+	tile_net_stop_aux(dev);
+
+
+	priv->fully_opened = 0;
+
+
+	/*
+	 * XXX: ISSUE: It appears that, in practice anyway, by the
+	 * time we get here, there are no pending completions.
+	 */
+	while (pending) {
+
+		struct sk_buff *olds[32];
+		unsigned int wanted = 32;
+		unsigned int i, nolds = 0;
+
+		nolds = tile_net_lepp_grab_comps(dev, olds,
+						 wanted, &pending);
+
+		/* ISSUE: We have never actually seen this debug spew. */
+		if (nolds != 0)
+			pr_info("During tile_net_stop(), grabbed %d comps.\n",
+			       nolds);
+
+		for (i = 0; i < nolds; i++)
+			kfree_skb(olds[i]);
+	}
+
+
+	/* Wipe the EPP queue. */
+	memset(priv->epp_queue, 0, sizeof(lepp_queue_t));
+
+	/* Evict the EPP queue. */
+	finv_buffer(priv->epp_queue, PAGE_SIZE);
+
+	return 0;
+}
+
+
+/*
+ * Prepare the "frags" info for the resulting LEPP command.
+ *
+ * If needed, flush the memory used by the frags.
+ */
+static unsigned int tile_net_tx_frags(lepp_frag_t *frags,
+				      struct sk_buff *skb,
+				      void *b_data, unsigned int b_len)
+{
+	unsigned int i, n = 0;
+
+	struct skb_shared_info *sh = skb_shinfo(skb);
+
+	phys_addr_t cpa;
+
+	if (b_len != 0) {
+
+		if (!hash_default)
+			finv_buffer_remote(b_data, b_len);
+
+		cpa = __pa(b_data);
+		frags[n].cpa_lo = cpa;
+		frags[n].cpa_hi = cpa >> 32;
+		frags[n].length = b_len;
+		frags[n].hash_for_home = hash_default;
+		n++;
+	}
+
+	for (i = 0; i < sh->nr_frags; i++) {
+
+		skb_frag_t *f = &sh->frags[i];
+		unsigned long pfn = page_to_pfn(f->page);
+
+		/* FIXME: Compute "hash_for_home" properly. */
+		/* ISSUE: The hypervisor checks CHIP_HAS_REV1_DMA_PACKETS(). */
+		int hash_for_home = hash_default;
+
+		/* FIXME: Hmmm. */
+		if (!hash_default) {
+			void *va = pfn_to_kaddr(pfn) + f->page_offset;
+			BUG_ON(PageHighMem(f->page));
+			finv_buffer_remote(va, f->size);
+		}
+
+		cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset;
+		frags[n].cpa_lo = cpa;
+		frags[n].cpa_hi = cpa >> 32;
+		frags[n].length = f->size;
+		frags[n].hash_for_home = hash_for_home;
+		n++;
+	}
+
+	return n;
+}
+
+
+/*
+ * This function takes "skb", consisting of a header template and a
+ * payload, and hands it to LEPP, to emit as one or more segments,
+ * each consisting of a possibly modified header, plus a piece of the
+ * payload, via a process known as "tcp segmentation offload".
+ *
+ * Usually, "data" will contain the header template, of size "sh_len",
+ * and "sh->frags" will contain "skb->data_len" bytes of payload, and
+ * there will be "sh->gso_segs" segments.
+ *
+ * Sometimes, if "sendfile()" requires copying, we will be called with
+ * "data" containing the header and payload, with "frags" being empty.
+ *
+ * In theory, "sh->nr_frags" could be 3, but in practice, it seems
+ * that this will never actually happen.
+ *
+ * See "emulate_large_send_offload()" for some reference code, which
+ * does not handle checksumming.
+ *
+ * ISSUE: How do we make sure that high memory DMA does not migrate?
+ */
+static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+	struct tile_net_stats_t *stats = &info->stats;
+
+	struct skb_shared_info *sh = skb_shinfo(skb);
+
+	unsigned char *data = skb->data;
+
+	/* The ip header follows the ethernet header. */
+	struct iphdr *ih = ip_hdr(skb);
+	unsigned int ih_len = ih->ihl * 4;
+
+	/* Note that "nh == ih", by definition. */
+	unsigned char *nh = skb_network_header(skb);
+	unsigned int eh_len = nh - data;
+
+	/* The tcp header follows the ip header. */
+	struct tcphdr *th = (struct tcphdr *)(nh + ih_len);
+	unsigned int th_len = th->doff * 4;
+
+	/* The total number of header bytes. */
+	/* NOTE: This may be less than skb_headlen(skb). */
+	unsigned int sh_len = eh_len + ih_len + th_len;
+
+	/* The number of payload bytes at "skb->data + sh_len". */
+	/* This is non-zero for sendfile() without HIGHDMA. */
+	unsigned int b_len = skb_headlen(skb) - sh_len;
+
+	/* The total number of payload bytes. */
+	unsigned int d_len = b_len + skb->data_len;
+
+	/* The maximum payload size. */
+	unsigned int p_len = sh->gso_size;
+
+	/* The total number of segments. */
+	unsigned int num_segs = sh->gso_segs;
+
+	/* The temporary copy of the command. */
+	u32 cmd_body[(LEPP_MAX_CMD_SIZE + 3) / 4];
+	lepp_tso_cmd_t *cmd = (lepp_tso_cmd_t *)cmd_body;
+
+	/* Analyze the "frags". */
+	unsigned int num_frags =
+		tile_net_tx_frags(cmd->frags, skb, data + sh_len, b_len);
+
+	/* The size of the command, including frags and header. */
+	size_t cmd_size = LEPP_TSO_CMD_SIZE(num_frags, sh_len);
+
+	/* The command header. */
+	lepp_tso_cmd_t cmd_init = {
+		.tso = true,
+		.header_size = sh_len,
+		.ip_offset = eh_len,
+		.tcp_offset = eh_len + ih_len,
+		.payload_size = p_len,
+		.num_frags = num_frags,
+	};
+
+	unsigned long irqflags;
+
+	lepp_queue_t *eq = priv->epp_queue;
+
+	struct sk_buff *olds[4];
+	unsigned int wanted = 4;
+	unsigned int i, nolds = 0;
+
+	unsigned int cmd_head, cmd_tail, cmd_next;
+	unsigned int comp_tail;
+
+	unsigned int free_slots;
+
+
+	/* Paranoia. */
+	BUG_ON(skb->protocol != htons(ETH_P_IP));
+	BUG_ON(ih->protocol != IPPROTO_TCP);
+	BUG_ON(skb->ip_summed != CHECKSUM_PARTIAL);
+	BUG_ON(num_frags > LEPP_MAX_FRAGS);
+	/*--BUG_ON(num_segs != (d_len + (p_len - 1)) / p_len); */
+	BUG_ON(num_segs <= 1);
+
+
+	/* Finish preparing the command. */
+
+	/* Copy the command header. */
+	*cmd = cmd_init;
+
+	/* Copy the "header". */
+	memcpy(&cmd->frags[num_frags], data, sh_len);
+
+
+	/* Prefetch and wait, to minimize time spent holding the spinlock. */
+	prefetch_L1(&eq->comp_tail);
+	prefetch_L1(&eq->cmd_tail);
+	mb();
+
+
+	/* Enqueue the command. */
+
+	spin_lock_irqsave(&priv->cmd_lock, irqflags);
+
+	/*
+	 * Handle completions if needed to make room.
+	 * HACK: Spin until there is sufficient room.
+	 */
+	free_slots = lepp_num_free_comp_slots(eq);
+	if (free_slots < 1) {
+spin:
+		nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
+						  wanted - nolds, NULL);
+		if (lepp_num_free_comp_slots(eq) < 1)
+			goto spin;
+	}
+
+	cmd_head = eq->cmd_head;
+	cmd_tail = eq->cmd_tail;
+
+	/* NOTE: The "gotos" below are untested. */
+
+	/* Prepare to advance, detecting full queue. */
+	cmd_next = cmd_tail + cmd_size;
+	if (cmd_tail < cmd_head && cmd_next >= cmd_head)
+		goto spin;
+	if (cmd_next > LEPP_CMD_LIMIT) {
+		cmd_next = 0;
+		if (cmd_next == cmd_head)
+			goto spin;
+	}
+
+	/* Copy the command. */
+	memcpy(&eq->cmds[cmd_tail], cmd, cmd_size);
+
+	/* Advance. */
+	cmd_tail = cmd_next;
+
+	/* Record "skb" for eventual freeing. */
+	comp_tail = eq->comp_tail;
+	eq->comps[comp_tail] = skb;
+	LEPP_QINC(comp_tail);
+	eq->comp_tail = comp_tail;
+
+	/* Flush before allowing LEPP to handle the command. */
+	__insn_mf();
+
+	eq->cmd_tail = cmd_tail;
+
+	spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
+
+	if (nolds == 0)
+		nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
+
+	/* Handle completions. */
+	for (i = 0; i < nolds; i++)
+		kfree_skb(olds[i]);
+
+	/* Update stats. */
+	stats->tx_packets += num_segs;
+	stats->tx_bytes += (num_segs * sh_len) + d_len;
+
+	/* Make sure the egress timer is scheduled. */
+	tile_net_schedule_egress_timer(info);
+
+	return NETDEV_TX_OK;
+}
+
+
+/*
+ * Transmit a packet (called by the kernel via "hard_start_xmit" hook).
+ */
+static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+	struct tile_net_stats_t *stats = &info->stats;
+
+	unsigned long irqflags;
+
+	struct skb_shared_info *sh = skb_shinfo(skb);
+
+	unsigned int len = skb->len;
+	unsigned char *data = skb->data;
+
+	unsigned int csum_start = skb->csum_start - skb_headroom(skb);
+
+	lepp_frag_t frags[LEPP_MAX_FRAGS];
+
+	unsigned int num_frags;
+
+	lepp_queue_t *eq = priv->epp_queue;
+
+	struct sk_buff *olds[4];
+	unsigned int wanted = 4;
+	unsigned int i, nolds = 0;
+
+	unsigned int cmd_size = sizeof(lepp_cmd_t);
+
+	unsigned int cmd_head, cmd_tail, cmd_next;
+	unsigned int comp_tail;
+
+	lepp_cmd_t cmds[LEPP_MAX_FRAGS];
+
+	unsigned int free_slots;
+
+
+	/*
+	 * This is paranoia, since we think that if the link doesn't come
+	 * up, telling Linux we have no carrier will keep it from trying
+	 * to transmit.  If it does, though, we can't execute this routine,
+	 * since data structures we depend on aren't set up yet.
+	 */
+	if (!info->registered)
+		return NETDEV_TX_BUSY;
+
+
+	/* Save the timestamp. */
+	dev->trans_start = jiffies;
+
+
+#ifdef TILE_NET_PARANOIA
+#if CHIP_HAS_CBOX_HOME_MAP()
+	if (hash_default) {
+		HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data);
+		if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
+			panic("Non-coherent egress buffer!");
+	}
+#endif
+#endif
+
+
+#ifdef TILE_NET_DUMP_PACKETS
+	/* ISSUE: Does not dump the "frags". */
+	dump_packet(data, skb_headlen(skb), "tx");
+#endif /* TILE_NET_DUMP_PACKETS */
+
+
+	if (sh->gso_size != 0)
+		return tile_net_tx_tso(skb, dev);
+
+
+	/* Prepare the commands. */
+
+	num_frags = tile_net_tx_frags(frags, skb, data, skb_headlen(skb));
+
+	for (i = 0; i < num_frags; i++) {
+
+		bool final = (i == num_frags - 1);
+
+		lepp_cmd_t cmd = {
+			.cpa_lo = frags[i].cpa_lo,
+			.cpa_hi = frags[i].cpa_hi,
+			.length = frags[i].length,
+			.hash_for_home = frags[i].hash_for_home,
+			.send_completion = final,
+			.end_of_packet = final
+		};
+
+		if (i == 0 && skb->ip_summed == CHECKSUM_PARTIAL) {
+			cmd.compute_checksum = 1;
+			cmd.checksum_data.bits.start_byte = csum_start;
+			cmd.checksum_data.bits.count = len - csum_start;
+			cmd.checksum_data.bits.destination_byte =
+				csum_start + skb->csum_offset;
+		}
+
+		cmds[i] = cmd;
+	}
+
+
+	/* Prefetch and wait, to minimize time spent holding the spinlock. */
+	prefetch_L1(&eq->comp_tail);
+	prefetch_L1(&eq->cmd_tail);
+	mb();
+
+
+	/* Enqueue the commands. */
+
+	spin_lock_irqsave(&priv->cmd_lock, irqflags);
+
+	/*
+	 * Handle completions if needed to make room.
+	 * HACK: Spin until there is sufficient room.
+	 */
+	free_slots = lepp_num_free_comp_slots(eq);
+	if (free_slots < 1) {
+spin:
+		nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
+						  wanted - nolds, NULL);
+		if (lepp_num_free_comp_slots(eq) < 1)
+			goto spin;
+	}
+
+	cmd_head = eq->cmd_head;
+	cmd_tail = eq->cmd_tail;
+
+	/* NOTE: The "gotos" below are untested. */
+
+	/* Copy the commands, or fail. */
+	for (i = 0; i < num_frags; i++) {
+
+		/* Prepare to advance, detecting full queue. */
+		cmd_next = cmd_tail + cmd_size;
+		if (cmd_tail < cmd_head && cmd_next >= cmd_head)
+			goto spin;
+		if (cmd_next > LEPP_CMD_LIMIT) {
+			cmd_next = 0;
+			if (cmd_next == cmd_head)
+				goto spin;
+		}
+
+		/* Copy the command. */
+		*(lepp_cmd_t *)&eq->cmds[cmd_tail] = cmds[i];
+
+		/* Advance. */
+		cmd_tail = cmd_next;
+	}
+
+	/* Record "skb" for eventual freeing. */
+	comp_tail = eq->comp_tail;
+	eq->comps[comp_tail] = skb;
+	LEPP_QINC(comp_tail);
+	eq->comp_tail = comp_tail;
+
+	/* Flush before allowing LEPP to handle the command. */
+	__insn_mf();
+
+	eq->cmd_tail = cmd_tail;
+
+	spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
+
+	if (nolds == 0)
+		nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
+
+	/* Handle completions. */
+	for (i = 0; i < nolds; i++)
+		kfree_skb(olds[i]);
+
+	/* HACK: Track "expanded" size for short packets (e.g. 42 < 60). */
+	stats->tx_packets++;
+	stats->tx_bytes += ((len >= ETH_ZLEN) ? len : ETH_ZLEN);
+
+	/* Make sure the egress timer is scheduled. */
+	tile_net_schedule_egress_timer(info);
+
+	return NETDEV_TX_OK;
+}
+
+
+/*
+ * Deal with a transmit timeout.
+ */
+static void tile_net_tx_timeout(struct net_device *dev)
+{
+	PDEBUG("tile_net_tx_timeout()\n");
+	PDEBUG("Transmit timeout at %ld, latency %ld\n", jiffies,
+	       jiffies - dev->trans_start);
+
+	/* XXX: ISSUE: This doesn't seem useful for us. */
+	netif_wake_queue(dev);
+}
+
+
+/*
+ * Ioctl commands.
+ */
+static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	return -EOPNOTSUPP;
+}
+
+
+/*
+ * Get System Network Statistics.
+ *
+ * Returns the address of the device statistics structure.
+ */
+static struct net_device_stats *tile_net_get_stats(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	u32 rx_packets = 0;
+	u32 tx_packets = 0;
+	u32 rx_bytes = 0;
+	u32 tx_bytes = 0;
+	int i;
+
+	for_each_online_cpu(i) {
+		if (priv->cpu[i]) {
+			rx_packets += priv->cpu[i]->stats.rx_packets;
+			rx_bytes += priv->cpu[i]->stats.rx_bytes;
+			tx_packets += priv->cpu[i]->stats.tx_packets;
+			tx_bytes += priv->cpu[i]->stats.tx_bytes;
+		}
+	}
+
+	priv->stats.rx_packets = rx_packets;
+	priv->stats.rx_bytes = rx_bytes;
+	priv->stats.tx_packets = tx_packets;
+	priv->stats.tx_bytes = tx_bytes;
+
+	return &priv->stats;
+}
+
+
+/*
+ * Change the "mtu".
+ *
+ * The "change_mtu" method is usually not needed.
+ * If you need it, it must be like this.
+ */
+static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
+{
+	PDEBUG("tile_net_change_mtu()\n");
+
+	/* Check ranges. */
+	if ((new_mtu < 68) || (new_mtu > 1500))
+		return -EINVAL;
+
+	/* Accept the value. */
+	dev->mtu = new_mtu;
+
+	return 0;
+}
+
+
+/*
+ * Change the Ethernet Address of the NIC.
+ *
+ * The hypervisor driver does not support changing MAC address.  However,
+ * the IPP does not do anything with the MAC address, so the address which
+ * gets used on outgoing packets, and which is accepted on incoming packets,
+ * is completely up to the NetIO program or kernel driver which is actually
+ * handling them.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int tile_net_set_mac_address(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EINVAL;
+
+	/* ISSUE: Note that "dev_addr" is now a pointer. */
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+
+	return 0;
+}
+
+
+/*
+ * Obtain the MAC address from the hypervisor.
+ * This must be done before opening the device.
+ */
+static int tile_net_get_mac(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	char hv_dev_name[32];
+	int len;
+
+	__netio_getset_offset_t offset = { .word = NETIO_IPP_PARAM_OFF };
+
+	int ret;
+
+	/* For example, "xgbe0". */
+	strcpy(hv_dev_name, dev->name);
+	len = strlen(hv_dev_name);
+
+	/* For example, "xgbe/0". */
+	hv_dev_name[len] = hv_dev_name[len - 1];
+	hv_dev_name[len - 1] = '/';
+	len++;
+
+	/* For example, "xgbe/0/native_hash". */
+	strcpy(hv_dev_name + len, hash_default ? "/native_hash" : "/native");
+
+	/* Get the hypervisor handle for this device. */
+	priv->hv_devhdl = hv_dev_open((HV_VirtAddr)hv_dev_name, 0);
+	PDEBUG("hv_dev_open(%s) returned %d %p\n",
+	       hv_dev_name, priv->hv_devhdl, &priv->hv_devhdl);
+	if (priv->hv_devhdl < 0) {
+		if (priv->hv_devhdl == HV_ENODEV)
+			printk(KERN_DEBUG "Ignoring unconfigured device %s\n",
+				 hv_dev_name);
+		else
+			printk(KERN_DEBUG "hv_dev_open(%s) returned %d\n",
+				 hv_dev_name, priv->hv_devhdl);
+		return -1;
+	}
+
+	/*
+	 * Read the hardware address from the hypervisor.
+	 * ISSUE: Note that "dev_addr" is now a pointer.
+	 */
+	offset.bits.class = NETIO_PARAM;
+	offset.bits.addr = NETIO_PARAM_MAC;
+	ret = hv_dev_pread(priv->hv_devhdl, 0,
+			   (HV_VirtAddr)dev->dev_addr, dev->addr_len,
+			   offset.word);
+	PDEBUG("hv_dev_pread(NETIO_PARAM_MAC) returned %d\n", ret);
+	if (ret <= 0) {
+		printk(KERN_DEBUG "hv_dev_pread(NETIO_PARAM_MAC) %s failed\n",
+		       dev->name);
+		/*
+		 * Since the device is configured by the hypervisor but we
+		 * can't get its MAC address, we are most likely running
+		 * the simulator, so let's generate a random MAC address.
+		 */
+		random_ether_addr(dev->dev_addr);
+	}
+
+	return 0;
+}
+
+
+static struct net_device_ops tile_net_ops = {
+	.ndo_open = tile_net_open,
+	.ndo_stop = tile_net_stop,
+	.ndo_start_xmit = tile_net_tx,
+	.ndo_do_ioctl = tile_net_ioctl,
+	.ndo_get_stats = tile_net_get_stats,
+	.ndo_change_mtu = tile_net_change_mtu,
+	.ndo_tx_timeout = tile_net_tx_timeout,
+	.ndo_set_mac_address = tile_net_set_mac_address
+};
+
+
+/*
+ * The setup function.
+ *
+ * This uses ether_setup() to assign various fields in dev, including
+ * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields.
+ */
+static void tile_net_setup(struct net_device *dev)
+{
+	PDEBUG("tile_net_setup()\n");
+
+	ether_setup(dev);
+
+	dev->netdev_ops = &tile_net_ops;
+
+	dev->watchdog_timeo = TILE_NET_TIMEOUT;
+
+	/* We want lockless xmit. */
+	dev->features |= NETIF_F_LLTX;
+
+	/* We support hardware tx checksums. */
+	dev->features |= NETIF_F_HW_CSUM;
+
+	/* We support scatter/gather. */
+	dev->features |= NETIF_F_SG;
+
+	/* We support TSO. */
+	dev->features |= NETIF_F_TSO;
+
+#ifdef TILE_NET_GSO
+	/* We support GSO. */
+	dev->features |= NETIF_F_GSO;
+#endif
+
+	if (hash_default)
+		dev->features |= NETIF_F_HIGHDMA;
+
+	/* ISSUE: We should support NETIF_F_UFO. */
+
+	dev->tx_queue_len = TILE_NET_TX_QUEUE_LEN;
+
+	dev->mtu = TILE_NET_MTU;
+}
+
+
+/*
+ * Allocate the device structure, register the device, and obtain the
+ * MAC address from the hypervisor.
+ */
+static struct net_device *tile_net_dev_init(const char *name)
+{
+	int ret;
+	struct net_device *dev;
+	struct tile_net_priv *priv;
+	struct page *page;
+
+	/*
+	 * Allocate the device structure.  This allocates "priv", calls
+	 * tile_net_setup(), and saves "name".  Normally, "name" is a
+	 * template, instantiated by register_netdev(), but not for us.
+	 */
+	dev = alloc_netdev(sizeof(*priv), name, tile_net_setup);
+	if (!dev) {
+		pr_err("alloc_netdev(%s) failed\n", name);
+		return NULL;
+	}
+
+	priv = netdev_priv(dev);
+
+	/* Initialize "priv". */
+
+	memset(priv, 0, sizeof(*priv));
+
+	/* Save "dev" for "tile_net_open_retry()". */
+	priv->dev = dev;
+
+	INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry);
+
+	spin_lock_init(&priv->cmd_lock);
+	spin_lock_init(&priv->comp_lock);
+
+	/* Allocate "epp_queue". */
+	BUG_ON(get_order(sizeof(lepp_queue_t)) != 0);
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
+	if (!page) {
+		free_netdev(dev);
+		return NULL;
+	}
+	priv->epp_queue = page_address(page);
+
+	/* Register the network device. */
+	ret = register_netdev(dev);
+	if (ret) {
+		pr_err("register_netdev %s failed %d\n", dev->name, ret);
+		free_page((unsigned long)priv->epp_queue);
+		free_netdev(dev);
+		return NULL;
+	}
+
+	/* Get the MAC address. */
+	ret = tile_net_get_mac(dev);
+	if (ret < 0) {
+		unregister_netdev(dev);
+		free_page((unsigned long)priv->epp_queue);
+		free_netdev(dev);
+		return NULL;
+	}
+
+	return dev;
+}
+
+
+/*
+ * Module cleanup.
+ */
+static void tile_net_cleanup(void)
+{
+	int i;
+
+	for (i = 0; i < TILE_NET_DEVS; i++) {
+		if (tile_net_devs[i]) {
+			struct net_device *dev = tile_net_devs[i];
+			struct tile_net_priv *priv = netdev_priv(dev);
+			unregister_netdev(dev);
+			finv_buffer(priv->epp_queue, PAGE_SIZE);
+			free_page((unsigned long)priv->epp_queue);
+			free_netdev(dev);
+		}
+	}
+}
+
+
+/*
+ * Module initialization.
+ */
+static int tile_net_init_module(void)
+{
+	pr_info("Tilera IPP Net Driver\n");
+
+	tile_net_devs[0] = tile_net_dev_init("xgbe0");
+	tile_net_devs[1] = tile_net_dev_init("xgbe1");
+	tile_net_devs[2] = tile_net_dev_init("gbe0");
+	tile_net_devs[3] = tile_net_dev_init("gbe1");
+
+	return 0;
+}
+
+
+#ifndef MODULE
+/*
+ * The "network_cpus" boot argument specifies the cpus that are dedicated
+ * to handle ingress packets.
+ *
+ * The parameter should be in the form "network_cpus=m-n[,x-y]", where
+ * m, n, x, y are integer numbers that represent the cpus that can be
+ * neither a dedicated cpu nor a dataplane cpu.
+ */
+static int __init network_cpus_setup(char *str)
+{
+	int rc = cpulist_parse_crop(str, &network_cpus_map);
+	if (rc != 0) {
+		pr_warning("network_cpus=%s: malformed cpu list\n",
+		       str);
+	} else {
+
+		/* Remove dedicated cpus. */
+		cpumask_and(&network_cpus_map, &network_cpus_map,
+			    cpu_possible_mask);
+
+
+		if (cpumask_empty(&network_cpus_map)) {
+			pr_warning("Ignoring network_cpus='%s'.\n",
+			       str);
+		} else {
+			char buf[1024];
+			cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map);
+			pr_info("Linux network CPUs: %s\n", buf);
+			network_cpus_used = true;
+		}
+	}
+
+	return 0;
+}
+__setup("network_cpus=", network_cpus_setup);
+#endif
+
+
+module_init(tile_net_init_module);
+module_exit(tile_net_cleanup);
diff --git a/drivers/net/wireless/ath/ath9k/eeprom_9287.c b/drivers/net/wireless/ath/ath9k/eeprom_9287.c
index 966b949..195406d 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom_9287.c
+++ b/drivers/net/wireless/ath/ath9k/eeprom_9287.c
@@ -37,7 +37,7 @@
 	int addr, eep_start_loc;
 	eep_data = (u16 *)eep;
 
-	if (ah->hw_version.devid == 0x7015)
+	if (AR9287_HTC_DEVID(ah))
 		eep_start_loc = AR9287_HTC_EEP_START_LOC;
 	else
 		eep_start_loc = AR9287_EEP_START_LOC;
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index f7ec31b..dfb6560 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -36,8 +36,13 @@
 	{ USB_DEVICE(0x13D3, 0x3327) }, /* Azurewave */
 	{ USB_DEVICE(0x13D3, 0x3328) }, /* Azurewave */
 	{ USB_DEVICE(0x13D3, 0x3346) }, /* IMC Networks */
+	{ USB_DEVICE(0x13D3, 0x3348) }, /* Azurewave */
+	{ USB_DEVICE(0x13D3, 0x3349) }, /* Azurewave */
+	{ USB_DEVICE(0x13D3, 0x3350) }, /* Azurewave */
 	{ USB_DEVICE(0x04CA, 0x4605) }, /* Liteon */
 	{ USB_DEVICE(0x083A, 0xA704) }, /* SMC Networks */
+	{ USB_DEVICE(0x040D, 0x3801) }, /* VIA */
+	{ USB_DEVICE(0x1668, 0x1200) }, /* Verizon */
 	{ },
 };
 
@@ -806,6 +811,8 @@
 	case 0x7010:
 	case 0x7015:
 	case 0x9018:
+	case 0xA704:
+	case 0x1200:
 		firm_offset = AR7010_FIRMWARE_TEXT;
 		break;
 	default:
@@ -928,6 +935,8 @@
 	case 0x7010:
 	case 0x7015:
 	case 0x9018:
+	case 0xA704:
+	case 0x1200:
 		if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x0202)
 			hif_dev->fw_name = FIRMWARE_AR7010_1_1;
 		else
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index 3d7b97f..7c8a38d 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -249,6 +249,8 @@
 	case 0x7010:
 	case 0x7015:
 	case 0x9018:
+	case 0xA704:
+	case 0x1200:
 		priv->htc->credits = 45;
 		break;
 	default:
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index 3d19b5b..29d80ca 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
@@ -121,7 +121,7 @@
 			tx_hdr.data_type = ATH9K_HTC_NORMAL;
 		}
 
-		if (ieee80211_is_data(fc)) {
+		if (ieee80211_is_data_qos(fc)) {
 			qc = ieee80211_get_qos_ctl(hdr);
 			tx_hdr.tidno = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
 		}
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 6a0d99e..92bc5c5 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -817,8 +817,6 @@
 
 	ath9k_ps_wakeup(sc);
 
-	pm_qos_remove_request(&ath9k_pm_qos_req);
-
 	wiphy_rfkill_stop_polling(sc->hw->wiphy);
 	ath_deinit_leds(sc);
 
@@ -832,6 +830,7 @@
 	}
 
 	ieee80211_unregister_hw(hw);
+	pm_qos_remove_request(&ath9k_pm_qos_req);
 	ath_rx_cleanup(sc);
 	ath_tx_cleanup(sc);
 	ath9k_deinit_softc(sc);
diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h
index fa05b71..dddf579 100644
--- a/drivers/net/wireless/ath/ath9k/reg.h
+++ b/drivers/net/wireless/ath/ath9k/reg.h
@@ -866,7 +866,13 @@
 #define AR_DEVID_7010(_ah) \
 	(((_ah)->hw_version.devid == 0x7010) || \
 	 ((_ah)->hw_version.devid == 0x7015) || \
-	 ((_ah)->hw_version.devid == 0x9018))
+	 ((_ah)->hw_version.devid == 0x9018) || \
+	 ((_ah)->hw_version.devid == 0xA704) || \
+	 ((_ah)->hw_version.devid == 0x1200))
+
+#define AR9287_HTC_DEVID(_ah) \
+	(((_ah)->hw_version.devid == 0x7015) || \
+	 ((_ah)->hw_version.devid == 0x1200))
 
 #define AR_RADIO_SREV_MAJOR                   0xf0
 #define AR_RAD5133_SREV_MAJOR                 0xc0
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index 3317039..7504ed1 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -553,12 +553,12 @@
 		usb_free_urb(urb);
 	}
 
-	ret = usb_wait_anchor_empty_timeout(&ar->tx_cmd, HZ);
+	ret = usb_wait_anchor_empty_timeout(&ar->tx_cmd, 1000);
 	if (ret == 0)
 		err = -ETIMEDOUT;
 
 	/* lets wait a while until the tx - queues are dried out */
-	ret = usb_wait_anchor_empty_timeout(&ar->tx_anch, HZ);
+	ret = usb_wait_anchor_empty_timeout(&ar->tx_anch, 1000);
 	if (ret == 0)
 		err = -ETIMEDOUT;
 
diff --git a/drivers/net/wireless/orinoco/orinoco_usb.c b/drivers/net/wireless/orinoco/orinoco_usb.c
index a38a7bd2..b9aedf1 100644
--- a/drivers/net/wireless/orinoco/orinoco_usb.c
+++ b/drivers/net/wireless/orinoco/orinoco_usb.c
@@ -57,7 +57,6 @@
 #include <linux/fcntl.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/smp_lock.h>
 #include <linux/usb.h>
 #include <linux/timer.h>
 
diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c
index cce00ed..af212c6 100644
--- a/drivers/parisc/eisa_eeprom.c
+++ b/drivers/parisc/eisa_eeprom.c
@@ -24,7 +24,6 @@
 #include <linux/kernel.h>
 #include <linux/miscdevice.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/fs.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index f01e344..98e6fdf 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -49,6 +49,7 @@
 obj-$(CONFIG_X86_VISWS) += setup-irq.o
 obj-$(CONFIG_MN10300) += setup-bus.o
 obj-$(CONFIG_MICROBLAZE) += setup-bus.o
+obj-$(CONFIG_TILE) += setup-bus.o setup-irq.o
 
 #
 # ACPI Related PCI FW Functions
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 5624db8..003170e 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -64,17 +64,57 @@
 	}
 }
 
+static bool pci_bus_resource_better(struct resource *res1, bool pos1,
+				    struct resource *res2, bool pos2)
+{
+	/* If exactly one is positive decode, always prefer that one */
+	if (pos1 != pos2)
+		return pos1 ? true : false;
+
+	/* Prefer the one that contains the highest address */
+	if (res1->end != res2->end)
+		return (res1->end > res2->end) ? true : false;
+
+	/* Otherwise, prefer the one with highest "center of gravity" */
+	if (res1->start != res2->start)
+		return (res1->start > res2->start) ? true : false;
+
+	/* Otherwise, choose one arbitrarily (but consistently) */
+	return (res1 > res2) ? true : false;
+}
+
+static bool pci_bus_resource_positive(struct pci_bus *bus, struct resource *res)
+{
+	struct pci_bus_resource *bus_res;
+
+	/*
+	 * This relies on the fact that pci_bus.resource[] refers to P2P or
+	 * CardBus bridge base/limit registers, which are always positively
+	 * decoded.  The pci_bus.resources list contains host bridge or
+	 * subtractively decoded resources.
+	 */
+	list_for_each_entry(bus_res, &bus->resources, list) {
+		if (bus_res->res == res)
+			return (bus_res->flags & PCI_SUBTRACTIVE_DECODE) ?
+				false : true;
+	}
+	return true;
+}
+
 /*
- * Find the highest-address bus resource below the cursor "res".  If the
- * cursor is NULL, return the highest resource.
+ * Find the next-best bus resource after the cursor "res".  If the cursor is
+ * NULL, return the best resource.  "Best" means that we prefer positive
+ * decode regions over subtractive decode, then those at higher addresses.
  */
 static struct resource *pci_bus_find_resource_prev(struct pci_bus *bus,
 						   unsigned int type,
 						   struct resource *res)
 {
+	bool res_pos, r_pos, prev_pos = false;
 	struct resource *r, *prev = NULL;
 	int i;
 
+	res_pos = pci_bus_resource_positive(bus, res);
 	pci_bus_for_each_resource(bus, r, i) {
 		if (!r)
 			continue;
@@ -82,26 +122,14 @@
 		if ((r->flags & IORESOURCE_TYPE_BITS) != type)
 			continue;
 
-		/* If this resource is at or past the cursor, skip it */
-		if (res) {
-			if (r == res)
-				continue;
-			if (r->end > res->end)
-				continue;
-			if (r->end == res->end && r->start > res->start)
-				continue;
+		r_pos = pci_bus_resource_positive(bus, r);
+		if (!res || pci_bus_resource_better(res, res_pos, r, r_pos)) {
+			if (!prev || pci_bus_resource_better(r, r_pos,
+							     prev, prev_pos)) {
+				prev = r;
+				prev_pos = r_pos;
+			}
 		}
-
-		if (!prev)
-			prev = r;
-
-		/*
-		 * A small resource is higher than a large one that ends at
-		 * the same address.
-		 */
-		if (r->end > prev->end ||
-		    (r->end == prev->end && r->start > prev->start))
-			prev = r;
 	}
 
 	return prev;
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 5becbde..2850e64 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -276,6 +276,12 @@
 
 	for (;;) {
 		offset = next_offset;
+
+		/* Make sure what we read is still in the mapped section */
+		if (WARN(offset > (ebda_sz * 1024 - 4),
+			 "ibmphp_ebda: next read is beyond ebda_sz\n"))
+			break;
+
 		next_offset = readw (io_mem + offset);	/* offset of next blk */
 
 		offset += 2;
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index b5a7d9b..63d5042 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -705,17 +705,21 @@
 
 #ifdef HAVE_PCI_MMAP
 
-int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma)
+int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma,
+		  enum pci_mmap_api mmap_api)
 {
-	unsigned long nr, start, size;
+	unsigned long nr, start, size, pci_start;
 
+	if (pci_resource_len(pdev, resno) == 0)
+		return 0;
 	nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	start = vma->vm_pgoff;
 	size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1;
-	if (start < size && size - start >= nr)
+	pci_start = (mmap_api == PCI_MMAP_PROCFS) ?
+			pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0;
+	if (start >= pci_start && start < pci_start + size &&
+			start + nr <= pci_start + size)
 		return 1;
-	WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n",
-		current->comm, start, start+nr, pci_name(pdev), resno, size);
 	return 0;
 }
 
@@ -745,8 +749,15 @@
 	if (i >= PCI_ROM_RESOURCE)
 		return -ENODEV;
 
-	if (!pci_mmap_fits(pdev, i, vma))
+	if (!pci_mmap_fits(pdev, i, vma, PCI_MMAP_SYSFS)) {
+		WARN(1, "process \"%s\" tried to map 0x%08lx bytes "
+			"at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n",
+			current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff,
+			pci_name(pdev), i,
+			(u64)pci_resource_start(pdev, i),
+			(u64)pci_resource_len(pdev, i));
 		return -EINVAL;
+	}
 
 	/* pci_mmap_page_range() expects the same kind of entry as coming
 	 * from /proc/bus/pci/ which is a "user visible" value. If this is
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e98c810..710c8a2 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1007,6 +1007,18 @@
 	int err;
 	int i, bars = 0;
 
+	/*
+	 * Power state could be unknown at this point, either due to a fresh
+	 * boot or a device removal call.  So get the current power state
+	 * so that things like MSI message writing will behave as expected
+	 * (e.g. if the device really is in D0 at enable time).
+	 */
+	if (dev->pm_cap) {
+		u16 pmcsr;
+		pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+		dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
+	}
+
 	if (atomic_add_return(1, &dev->enable_cnt) > 1)
 		return 0;		/* already enabled */
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f5c7c38..7d33f66 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -22,8 +22,13 @@
 #endif
 extern void pci_cleanup_rom(struct pci_dev *dev);
 #ifdef HAVE_PCI_MMAP
+enum pci_mmap_api {
+	PCI_MMAP_SYSFS,	/* mmap on /sys/bus/pci/devices/<BDF>/resource<N> */
+	PCI_MMAP_PROCFS	/* mmap on /proc/bus/pci/<BDF> */
+};
 extern int pci_mmap_fits(struct pci_dev *pdev, int resno,
-			 struct vm_area_struct *vma);
+			 struct vm_area_struct *vmai,
+			 enum pci_mmap_api mmap_api);
 #endif
 int pci_probe_reset_function(struct pci_dev *dev);
 
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 297b72c..27911b5 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -10,7 +10,6 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 #include <linux/capability.h>
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
@@ -257,7 +256,7 @@
 
 	/* Make sure the caller is mapping a real resource for this device */
 	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
-		if (pci_mmap_fits(dev, i, vma))
+		if (pci_mmap_fits(dev, i, vma,  PCI_MMAP_PROCFS))
 			break;
 	}
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f5c63fe..6f9350c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2136,6 +2136,24 @@
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB,
 			quirk_unhide_mch_dev6);
 
+#ifdef CONFIG_TILE
+/*
+ * The Tilera TILEmpower platform needs to set the link speed
+ * to 2.5GT(Giga-Transfers)/s (Gen 1). The default link speed
+ * setting is 5GT/s (Gen 2). 0x98 is the Link Control2 PCIe
+ * capability register of the PEX8624 PCIe switch. The switch
+ * supports link speed auto negotiation, but falsely sets
+ * the link speed to 5GT/s.
+ */
+static void __devinit quirk_tile_plx_gen1(struct pci_dev *dev)
+{
+	if (tile_plx_gen1) {
+		pci_write_config_dword(dev, 0x98, 0x1);
+		mdelay(50);
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1);
+#endif /* CONFIG_TILE */
 
 #ifdef CONFIG_PCI_MSI
 /* Some chipsets do not support MSI. We cannot easily rely on setting
diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index e73ebef..315b311 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -21,7 +21,6 @@
 #include <linux/isapnp.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
 extern struct pnp_protocol isapnp_protocol;
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 5efbd59..06e41ed 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -761,7 +761,7 @@
 	clk_put(rtc->clk);
 	iounmap(rtc->regbase);
 err_badmap:
-	release_resource(rtc->res);
+	release_mem_region(rtc->res->start, rtc->regsize);
 err_badres:
 	kfree(rtc);
 
@@ -786,7 +786,7 @@
 	}
 
 	iounmap(rtc->regbase);
-	release_resource(rtc->res);
+	release_mem_region(rtc->res->start, rtc->regsize);
 
 	clk_disable(rtc->clk);
 	clk_put(rtc->clk);
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index c71d89d..83b4615 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -17,7 +17,6 @@
 #include <linux/device.h>
 #include <linux/poll.h>
 #include <linux/mutex.h>
-#include <linux/smp_lock.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index eb28fb0..f6489eb 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -14,7 +14,6 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <linux/smp_lock.h>
 
 #include <asm/compat.h>
 #include <asm/ccwdev.h>
diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c
index 883e2db..e090a30 100644
--- a/drivers/s390/char/tape_char.c
+++ b/drivers/s390/char/tape_char.c
@@ -17,7 +17,6 @@
 #include <linux/types.h>
 #include <linux/proc_fs.h>
 #include <linux/mtio.h>
-#include <linux/smp_lock.h>
 #include <linux/compat.h>
 
 #include <asm/uaccess.h>
diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c
index 6c40867..b3a3e8e 100644
--- a/drivers/s390/char/tape_core.c
+++ b/drivers/s390/char/tape_core.c
@@ -209,29 +209,79 @@
 	wake_up(&device->state_change_wq);
 }
 
+struct tape_med_state_work_data {
+	struct tape_device *device;
+	enum tape_medium_state state;
+	struct work_struct  work;
+};
+
+static void
+tape_med_state_work_handler(struct work_struct *work)
+{
+	static char env_state_loaded[] = "MEDIUM_STATE=LOADED";
+	static char env_state_unloaded[] = "MEDIUM_STATE=UNLOADED";
+	struct tape_med_state_work_data *p =
+		container_of(work, struct tape_med_state_work_data, work);
+	struct tape_device *device = p->device;
+	char *envp[] = { NULL, NULL };
+
+	switch (p->state) {
+	case MS_UNLOADED:
+		pr_info("%s: The tape cartridge has been successfully "
+			"unloaded\n", dev_name(&device->cdev->dev));
+		envp[0] = env_state_unloaded;
+		kobject_uevent_env(&device->cdev->dev.kobj, KOBJ_CHANGE, envp);
+		break;
+	case MS_LOADED:
+		pr_info("%s: A tape cartridge has been mounted\n",
+			dev_name(&device->cdev->dev));
+		envp[0] = env_state_loaded;
+		kobject_uevent_env(&device->cdev->dev.kobj, KOBJ_CHANGE, envp);
+		break;
+	default:
+		break;
+	}
+	tape_put_device(device);
+	kfree(p);
+}
+
+static void
+tape_med_state_work(struct tape_device *device, enum tape_medium_state state)
+{
+	struct tape_med_state_work_data *p;
+
+	p = kzalloc(sizeof(*p), GFP_ATOMIC);
+	if (p) {
+		INIT_WORK(&p->work, tape_med_state_work_handler);
+		p->device = tape_get_device(device);
+		p->state = state;
+		schedule_work(&p->work);
+	}
+}
+
 void
 tape_med_state_set(struct tape_device *device, enum tape_medium_state newstate)
 {
-	if (device->medium_state == newstate)
+	enum tape_medium_state oldstate;
+
+	oldstate = device->medium_state;
+	if (oldstate == newstate)
 		return;
+	device->medium_state = newstate;
 	switch(newstate){
 	case MS_UNLOADED:
 		device->tape_generic_status |= GMT_DR_OPEN(~0);
-		if (device->medium_state == MS_LOADED)
-			pr_info("%s: The tape cartridge has been successfully "
-				"unloaded\n", dev_name(&device->cdev->dev));
+		if (oldstate == MS_LOADED)
+			tape_med_state_work(device, MS_UNLOADED);
 		break;
 	case MS_LOADED:
 		device->tape_generic_status &= ~GMT_DR_OPEN(~0);
-		if (device->medium_state == MS_UNLOADED)
-			pr_info("%s: A tape cartridge has been mounted\n",
-				dev_name(&device->cdev->dev));
+		if (oldstate == MS_UNLOADED)
+			tape_med_state_work(device, MS_LOADED);
 		break;
 	default:
-		// print nothing
 		break;
 	}
-	device->medium_state = newstate;
 	wake_up(&device->state_change_wq);
 }
 
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index 9f66142..c837d74 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -30,7 +30,6 @@
 #include <linux/kmod.h>
 #include <linux/cdev.h>
 #include <linux/device.h>
-#include <linux/smp_lock.h>
 #include <linux/string.h>
 
 MODULE_AUTHOR
@@ -249,27 +248,25 @@
 	char cp_command[80];
 	char cp_response[160];
 	char *onoff, *qid_string;
+	int rc;
 
-	memset(cp_command, 0x00, sizeof(cp_command));
-	memset(cp_response, 0x00, sizeof(cp_response));
-
-        onoff = ((action == 1) ? "ON" : "OFF");
+	onoff = ((action == 1) ? "ON" : "OFF");
 	qid_string = ((recording_class_AB == 1) ? " QID * " : "");
 
-        /*
+	/*
 	 * The recording commands needs to be called with option QID
 	 * for guests that have previlege classes A or B.
 	 * Purging has to be done as separate step, because recording
 	 * can't be switched on as long as records are on the queue.
 	 * Doing both at the same time doesn't work.
 	 */
-
-	if (purge) {
+	if (purge && (action == 1)) {
+		memset(cp_command, 0x00, sizeof(cp_command));
+		memset(cp_response, 0x00, sizeof(cp_response));
 		snprintf(cp_command, sizeof(cp_command),
 			 "RECORDING %s PURGE %s",
 			 logptr->recording_name,
 			 qid_string);
-
 		cpcmd(cp_command, cp_response, sizeof(cp_response), NULL);
 	}
 
@@ -279,19 +276,33 @@
 		logptr->recording_name,
 		onoff,
 		qid_string);
-
 	cpcmd(cp_command, cp_response, sizeof(cp_response), NULL);
 	/* The recording command will usually answer with 'Command complete'
 	 * on success, but when the specific service was never connected
 	 * before then there might be an additional informational message
 	 * 'HCPCRC8072I Recording entry not found' before the
-         * 'Command complete'. So I use strstr rather then the strncmp.
+	 * 'Command complete'. So I use strstr rather then the strncmp.
 	 */
 	if (strstr(cp_response,"Command complete"))
-		return 0;
+		rc = 0;
 	else
-		return -EIO;
+		rc = -EIO;
+	/*
+	 * If we turn recording off, we have to purge any remaining records
+	 * afterwards, as a large number of queued records may impact z/VM
+	 * performance.
+	 */
+	if (purge && (action == 0)) {
+		memset(cp_command, 0x00, sizeof(cp_command));
+		memset(cp_response, 0x00, sizeof(cp_response));
+		snprintf(cp_command, sizeof(cp_command),
+			 "RECORDING %s PURGE %s",
+			 logptr->recording_name,
+			 qid_string);
+		cpcmd(cp_command, cp_response, sizeof(cp_response), NULL);
+	}
 
+	return rc;
 }
 
 
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 1de672f..f7e4ae6 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -13,7 +13,6 @@
 
 #include <linux/cdev.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 #include <asm/cio.h>
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 2ff8a22..e8391b89 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1455,7 +1455,16 @@
 		break;
 	case IO_SCH_UNREG_ATTACH:
 	case IO_SCH_UNREG:
-		if (cdev)
+		if (!cdev)
+			break;
+		if (cdev->private->state == DEV_STATE_SENSE_ID) {
+			/*
+			 * Note: delayed work triggered by this event
+			 * and repeated calls to sch_event are synchronized
+			 * by the above check for work_pending(cdev).
+			 */
+			dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+		} else
 			ccw_device_set_notoper(cdev);
 		break;
 	case IO_SCH_NOP:
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index f522174..7fca9c1 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -35,7 +35,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/compat.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 50286d8..6bd2dbc 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -76,7 +76,7 @@
 	scpnt->scsi_done(scpnt);
 }
 
-static int zfcp_scsi_queuecommand(struct scsi_cmnd *scpnt,
+static int zfcp_scsi_queuecommand_lck(struct scsi_cmnd *scpnt,
 				  void (*done) (struct scsi_cmnd *))
 {
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(scpnt->device);
@@ -127,6 +127,8 @@
 	return ret;
 }
 
+static DEF_SCSI_QCMD(zfcp_scsi_queuecommand)
+
 static int zfcp_scsi_slave_alloc(struct scsi_device *sdev)
 {
 	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index fcf08b3..b7bd5b0 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1765,7 +1765,7 @@
 } /* End twa_scsi_eh_reset() */
 
 /* This is the main scsi queue function to handle scsi opcodes */
-static int twa_scsi_queue(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
 	int request_id, retval;
 	TW_Device_Extension *tw_dev = (TW_Device_Extension *)SCpnt->device->host->hostdata;
@@ -1812,6 +1812,8 @@
 	return retval;
 } /* End twa_scsi_queue() */
 
+static DEF_SCSI_QCMD(twa_scsi_queue)
+
 /* This function hands scsi cdb's to the firmware */
 static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg)
 {
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 6a95d11..13e39e1 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -1501,7 +1501,7 @@
 } /* End twl_scsi_eh_reset() */
 
 /* This is the main scsi queue function to handle scsi opcodes */
-static int twl_scsi_queue(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+static int twl_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
 	int request_id, retval;
 	TW_Device_Extension *tw_dev = (TW_Device_Extension *)SCpnt->device->host->hostdata;
@@ -1536,6 +1536,8 @@
 	return retval;
 } /* End twl_scsi_queue() */
 
+static DEF_SCSI_QCMD(twl_scsi_queue)
+
 /* This function tells the controller to shut down */
 static void __twl_shutdown(TW_Device_Extension *tw_dev)
 {
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index b112534..7fe96ff 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1947,7 +1947,7 @@
 } /* End tw_scsiop_test_unit_ready_complete() */
 
 /* This is the main scsi queue function to handle scsi opcodes */
-static int tw_scsi_queue(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) 
+static int tw_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
 	unsigned char *command = SCpnt->cmnd;
 	int request_id = 0;
@@ -2023,6 +2023,8 @@
 	return retval;
 } /* End tw_scsi_queue() */
 
+static DEF_SCSI_QCMD(tw_scsi_queue)
+
 /* This function is the interrupt service routine */
 static irqreturn_t tw_interrupt(int irq, void *dev_instance) 
 {
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 89fc1c8..f672491 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -167,7 +167,7 @@
 #include "53c700_d.h"
 
 
-STATIC int NCR_700_queuecommand(struct scsi_cmnd *, void (*done)(struct scsi_cmnd *));
+STATIC int NCR_700_queuecommand(struct Scsi_Host *h, struct scsi_cmnd *);
 STATIC int NCR_700_abort(struct scsi_cmnd * SCpnt);
 STATIC int NCR_700_bus_reset(struct scsi_cmnd * SCpnt);
 STATIC int NCR_700_host_reset(struct scsi_cmnd * SCpnt);
@@ -1749,8 +1749,8 @@
 	return IRQ_RETVAL(handled);
 }
 
-STATIC int
-NCR_700_queuecommand(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *))
+static int
+NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *))
 {
 	struct NCR_700_Host_Parameters *hostdata = 
 		(struct NCR_700_Host_Parameters *)SCp->device->host->hostdata[0];
@@ -1904,6 +1904,8 @@
 	return 0;
 }
 
+STATIC DEF_SCSI_QCMD(NCR_700_queuecommand)
+
 STATIC int
 NCR_700_abort(struct scsi_cmnd * SCp)
 {
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index fc0b4b8..f66c33b 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -2807,7 +2807,7 @@
   Outgoing Mailbox for execution by the associated Host Adapter.
 */
 
-static int BusLogic_QueueCommand(struct scsi_cmnd *Command, void (*CompletionRoutine) (struct scsi_cmnd *))
+static int BusLogic_QueueCommand_lck(struct scsi_cmnd *Command, void (*CompletionRoutine) (struct scsi_cmnd *))
 {
 	struct BusLogic_HostAdapter *HostAdapter = (struct BusLogic_HostAdapter *) Command->device->host->hostdata;
 	struct BusLogic_TargetFlags *TargetFlags = &HostAdapter->TargetFlags[Command->device->id];
@@ -2994,6 +2994,7 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(BusLogic_QueueCommand)
 
 #if 0
 /*
diff --git a/drivers/scsi/BusLogic.h b/drivers/scsi/BusLogic.h
index 73f237a1..649fcb3 100644
--- a/drivers/scsi/BusLogic.h
+++ b/drivers/scsi/BusLogic.h
@@ -1319,7 +1319,7 @@
 */
 
 static const char *BusLogic_DriverInfo(struct Scsi_Host *);
-static int BusLogic_QueueCommand(struct scsi_cmnd *, void (*CompletionRoutine) (struct scsi_cmnd *));
+static int BusLogic_QueueCommand(struct Scsi_Host *h, struct scsi_cmnd *);
 static int BusLogic_BIOSDiskParameters(struct scsi_device *, struct block_device *, sector_t, int *);
 static int BusLogic_ProcDirectoryInfo(struct Scsi_Host *, char *, char **, off_t, int, int);
 static int BusLogic_SlaveConfigure(struct scsi_device *);
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index 5d2f148..9a5629f 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -952,7 +952,7 @@
  *	Locks: host lock taken by caller
  */
 
-static int NCR5380_queue_command(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *)) 
+static int NCR5380_queue_command_lck(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
 {
 	struct Scsi_Host *instance = cmd->device->host;
 	struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
@@ -1021,6 +1021,7 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(NCR5380_queue_command)
 
 /**
  *	NCR5380_main	-	NCR state machines
diff --git a/drivers/scsi/NCR5380.h b/drivers/scsi/NCR5380.h
index bdc468c..fd40a32 100644
--- a/drivers/scsi/NCR5380.h
+++ b/drivers/scsi/NCR5380.h
@@ -313,7 +313,7 @@
 #endif
 static int NCR5380_abort(Scsi_Cmnd * cmd);
 static int NCR5380_bus_reset(Scsi_Cmnd * cmd);
-static int NCR5380_queue_command(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *));
+static int NCR5380_queue_command(struct Scsi_Host *, struct scsi_cmnd *);
 static int __maybe_unused NCR5380_proc_info(struct Scsi_Host *instance,
 	char *buffer, char **start, off_t offset, int length, int inout);
 
diff --git a/drivers/scsi/NCR53c406a.c b/drivers/scsi/NCR53c406a.c
index 6961f78..c91888a 100644
--- a/drivers/scsi/NCR53c406a.c
+++ b/drivers/scsi/NCR53c406a.c
@@ -693,7 +693,7 @@
 }
 #endif
 
-static int NCR53c406a_queue(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
+static int NCR53c406a_queue_lck(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
 {
 	int i;
 
@@ -726,6 +726,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(NCR53c406a_queue)
+
 static int NCR53c406a_host_reset(Scsi_Cmnd * SCpnt)
 {
 	DEB(printk("NCR53c406a_reset called\n"));
diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c
index dbbc601..dc5ac6e 100644
--- a/drivers/scsi/a100u2w.c
+++ b/drivers/scsi/a100u2w.c
@@ -911,7 +911,7 @@
  *	queue the command down to the controller
  */
 
-static int inia100_queue(struct scsi_cmnd * cmd, void (*done) (struct scsi_cmnd *))
+static int inia100_queue_lck(struct scsi_cmnd * cmd, void (*done) (struct scsi_cmnd *))
 {
 	struct orc_scb *scb;
 	struct orc_host *host;		/* Point to Host adapter control block */
@@ -930,6 +930,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(inia100_queue)
+
 /*****************************************************************************
  Function name  : inia100_abort
  Description    : Abort a queued command.
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 29c0ed1..2c93d94 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -248,7 +248,7 @@
  *	TODO: unify with aac_scsi_cmd().
  */
 
-static int aac_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+static int aac_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct aac_dev *dev = (struct aac_dev *)host->hostdata;
@@ -267,6 +267,8 @@
 	return (aac_scsi_cmd(cmd) ? FAILED : 0);
 }
 
+static DEF_SCSI_QCMD(aac_queuecommand)
+
 /**
  *	aac_info		-	Returns the host adapter name
  *	@shost:		Scsi host to report on
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index 0ec3da6..081c6de 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -9500,7 +9500,7 @@
  * in the 'scp' result field.
  */
 static int
-advansys_queuecommand(struct scsi_cmnd *scp, void (*done)(struct scsi_cmnd *))
+advansys_queuecommand_lck(struct scsi_cmnd *scp, void (*done)(struct scsi_cmnd *))
 {
 	struct Scsi_Host *shost = scp->device->host;
 	int asc_res, result = 0;
@@ -9525,6 +9525,8 @@
 	return result;
 }
 
+static DEF_SCSI_QCMD(advansys_queuecommand)
+
 static ushort __devinit AscGetEisaChipCfg(PortAddr iop_base)
 {
 	PortAddr eisa_cfg_iop = (PortAddr) ASC_GET_EISA_SLOT(iop_base) |
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index 8eab858..c5169f0 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -1056,7 +1056,7 @@
  *  queue a command
  *
  */
-static int aha152x_queue(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *))
+static int aha152x_queue_lck(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *))
 {
 #if 0
 	if(*SCpnt->cmnd == REQUEST_SENSE) {
@@ -1070,6 +1070,8 @@
 	return aha152x_internal_queue(SCpnt, NULL, 0, done);
 }
 
+static DEF_SCSI_QCMD(aha152x_queue)
+
 
 /*
  *  
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 4f785f2..195823a 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -558,7 +558,7 @@
 	};
 }
 
-static int aha1542_queuecommand(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
+static int aha1542_queuecommand_lck(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
 {
 	unchar ahacmd = CMD_START_SCSI;
 	unchar direction;
@@ -718,6 +718,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(aha1542_queuecommand)
+
 /* Initialize mailboxes */
 static void setup_mailboxes(int bse, struct Scsi_Host *shpnt)
 {
diff --git a/drivers/scsi/aha1542.h b/drivers/scsi/aha1542.h
index 1db5385..b871d2b 100644
--- a/drivers/scsi/aha1542.h
+++ b/drivers/scsi/aha1542.h
@@ -132,7 +132,7 @@
 };
 
 static int aha1542_detect(struct scsi_host_template *);
-static int aha1542_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+static int aha1542_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 static int aha1542_bus_reset(Scsi_Cmnd * SCpnt);
 static int aha1542_dev_reset(Scsi_Cmnd * SCpnt);
 static int aha1542_host_reset(Scsi_Cmnd * SCpnt);
diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c
index 0107a4c..d058f1a 100644
--- a/drivers/scsi/aha1740.c
+++ b/drivers/scsi/aha1740.c
@@ -331,7 +331,7 @@
 	return IRQ_RETVAL(handled);
 }
 
-static int aha1740_queuecommand(Scsi_Cmnd * SCpnt, void (*done)(Scsi_Cmnd *))
+static int aha1740_queuecommand_lck(Scsi_Cmnd * SCpnt, void (*done)(Scsi_Cmnd *))
 {
 	unchar direction;
 	unchar *cmd = (unchar *) SCpnt->cmnd;
@@ -503,6 +503,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(aha1740_queuecommand)
+
 /* Query the board for its irq_level and irq_type.  Nothing else matters
    in enhanced mode on an EISA bus. */
 
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 88ad848..25d0666 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -573,7 +573,7 @@
  * Queue an SCB to the controller.
  */
 static int
-ahd_linux_queue(struct scsi_cmnd * cmd, void (*scsi_done) (struct scsi_cmnd *))
+ahd_linux_queue_lck(struct scsi_cmnd * cmd, void (*scsi_done) (struct scsi_cmnd *))
 {
 	struct	 ahd_softc *ahd;
 	struct	 ahd_linux_device *dev = scsi_transport_device_data(cmd->device);
@@ -588,6 +588,8 @@
 	return rtn;
 }
 
+static DEF_SCSI_QCMD(ahd_linux_queue)
+
 static struct scsi_target **
 ahd_linux_target_in_softc(struct scsi_target *starget)
 {
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index aeea7a6..4a359bb 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -528,7 +528,7 @@
  * Queue an SCB to the controller.
  */
 static int
-ahc_linux_queue(struct scsi_cmnd * cmd, void (*scsi_done) (struct scsi_cmnd *))
+ahc_linux_queue_lck(struct scsi_cmnd * cmd, void (*scsi_done) (struct scsi_cmnd *))
 {
 	struct	 ahc_softc *ahc;
 	struct	 ahc_linux_device *dev = scsi_transport_device_data(cmd->device);
@@ -548,6 +548,8 @@
 	return rtn;
 }
 
+static DEF_SCSI_QCMD(ahc_linux_queue)
+
 static inline struct scsi_target **
 ahc_linux_target_in_softc(struct scsi_target *starget)
 {
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index aee73fa..4ff60a0 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -10234,7 +10234,7 @@
  * Description:
  *   Queue a SCB to the controller.
  *-F*************************************************************************/
-static int aic7xxx_queue(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *))
+static int aic7xxx_queue_lck(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *))
 {
   struct aic7xxx_host *p;
   struct aic7xxx_scb *scb;
@@ -10292,6 +10292,8 @@
   return (0);
 }
 
+static DEF_SCSI_QCMD(aic7xxx_queue)
+
 /*+F*************************************************************************
  * Function:
  *   aic7xxx_bus_device_reset
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index 05a78e5..17e3df4 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -85,8 +85,7 @@
 static int arcmsr_bus_reset(struct scsi_cmnd *);
 static int arcmsr_bios_param(struct scsi_device *sdev,
 		struct block_device *bdev, sector_t capacity, int *info);
-static int arcmsr_queue_command(struct scsi_cmnd *cmd,
-					void (*done) (struct scsi_cmnd *));
+static int arcmsr_queue_command(struct Scsi_Host *h, struct scsi_cmnd *cmd);
 static int arcmsr_probe(struct pci_dev *pdev,
 				const struct pci_device_id *id);
 static void arcmsr_remove(struct pci_dev *pdev);
@@ -2081,7 +2080,7 @@
 	}
 }
 
-static int arcmsr_queue_command(struct scsi_cmnd *cmd,
+static int arcmsr_queue_command_lck(struct scsi_cmnd *cmd,
 	void (* done)(struct scsi_cmnd *))
 {
 	struct Scsi_Host *host = cmd->device->host;
@@ -2124,6 +2123,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(arcmsr_queue_command)
+
 static bool arcmsr_get_hba_config(struct AdapterControlBlock *acb)
 {
 	struct MessageUnit_A __iomem *reg = acb->pmuA;
diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c
index 918ccf8..ec16672 100644
--- a/drivers/scsi/arm/acornscsi.c
+++ b/drivers/scsi/arm/acornscsi.c
@@ -2511,7 +2511,7 @@
  *	      done - function called on completion, with pointer to command descriptor
  * Returns  : 0, or < 0 on error.
  */
-int acornscsi_queuecmd(struct scsi_cmnd *SCpnt,
+static int acornscsi_queuecmd_lck(struct scsi_cmnd *SCpnt,
 		       void (*done)(struct scsi_cmnd *))
 {
     AS_Host *host = (AS_Host *)SCpnt->device->host->hostdata;
@@ -2561,6 +2561,8 @@
     return 0;
 }
 
+DEF_SCSI_QCMD(acornscsi_queuecmd)
+
 /*
  * Prototype: void acornscsi_reportstatus(struct scsi_cmnd **SCpntp1, struct scsi_cmnd **SCpntp2, int result)
  * Purpose  : pass a result to *SCpntp1, and check if *SCpntp1 = *SCpntp2
diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c
index 9e71ac6..2b2ce21 100644
--- a/drivers/scsi/arm/fas216.c
+++ b/drivers/scsi/arm/fas216.c
@@ -2198,7 +2198,7 @@
  * Returns: 0 on success, else error.
  * Notes: io_request_lock is held, interrupts are disabled.
  */
-int fas216_queue_command(struct scsi_cmnd *SCpnt,
+static int fas216_queue_command_lck(struct scsi_cmnd *SCpnt,
 			 void (*done)(struct scsi_cmnd *))
 {
 	FAS216_Info *info = (FAS216_Info *)SCpnt->device->host->hostdata;
@@ -2240,6 +2240,8 @@
 	return result;
 }
 
+DEF_SCSI_QCMD(fas216_queue_command)
+
 /**
  * fas216_internal_done - trigger restart of a waiting thread in fas216_noqueue_command
  * @SCpnt: Command to wake
@@ -2263,7 +2265,7 @@
  * Returns: scsi result code.
  * Notes: io_request_lock is held, interrupts are disabled.
  */
-int fas216_noqueue_command(struct scsi_cmnd *SCpnt,
+static int fas216_noqueue_command_lck(struct scsi_cmnd *SCpnt,
 			   void (*done)(struct scsi_cmnd *))
 {
 	FAS216_Info *info = (FAS216_Info *)SCpnt->device->host->hostdata;
@@ -2277,7 +2279,7 @@
 	BUG_ON(info->scsi.irq != NO_IRQ);
 
 	info->internal_done = 0;
-	fas216_queue_command(SCpnt, fas216_internal_done);
+	fas216_queue_command_lck(SCpnt, fas216_internal_done);
 
 	/*
 	 * This wastes time, since we can't return until the command is
@@ -2310,6 +2312,8 @@
 	return 0;
 }
 
+DEF_SCSI_QCMD(fas216_noqueue_command)
+
 /*
  * Error handler timeout function.  Indicate that we timed out,
  * and wake up any error handler process so it can continue.
diff --git a/drivers/scsi/arm/fas216.h b/drivers/scsi/arm/fas216.h
index b65f4cf..f30f8d6 100644
--- a/drivers/scsi/arm/fas216.h
+++ b/drivers/scsi/arm/fas216.h
@@ -331,23 +331,21 @@
  */
 extern int fas216_add (struct Scsi_Host *instance, struct device *dev);
 
-/* Function: int fas216_queue_command(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+/* Function: int fas216_queue_command(struct Scsi_Host *h, struct scsi_cmnd *SCpnt)
  * Purpose : queue a command for adapter to process.
- * Params  : SCpnt - Command to queue
- *	     done  - done function to call once command is complete
+ * Params  : h - host adapter
+ *	   : SCpnt - Command to queue
  * Returns : 0 - success, else error
  */
-extern int fas216_queue_command(struct scsi_cmnd *,
-				void (*done)(struct scsi_cmnd *));
+extern int fas216_queue_command(struct Scsi_Host *h, struct scsi_cmnd *SCpnt);
 
-/* Function: int fas216_noqueue_command(istruct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+/* Function: int fas216_noqueue_command(struct Scsi_Host *h, struct scsi_cmnd *SCpnt)
  * Purpose : queue a command for adapter to process, and process it to completion.
- * Params  : SCpnt - Command to queue
- *	     done  - done function to call once command is complete
+ * Params  : h - host adapter
+ *	   : SCpnt - Command to queue
  * Returns : 0 - success, else error
  */
-extern int fas216_noqueue_command(struct scsi_cmnd *,
-				  void (*done)(struct scsi_cmnd *));
+extern int fas216_noqueue_command(struct Scsi_Host *, struct scsi_cmnd *);
 
 /* Function: irqreturn_t fas216_intr (FAS216_Info *info)
  * Purpose : handle interrupts from the interface to progress a command
diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c
index 158ebc3..88b2928 100644
--- a/drivers/scsi/atari_NCR5380.c
+++ b/drivers/scsi/atari_NCR5380.c
@@ -910,7 +910,7 @@
  *
  */
 
-static int NCR5380_queue_command(Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
+static int NCR5380_queue_command_lck(Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
 {
 	SETUP_HOSTDATA(cmd->device->host);
 	Scsi_Cmnd *tmp;
@@ -1022,6 +1022,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(NCR5380_queue_command)
+
 /*
  * Function : NCR5380_main (void)
  *
diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c
index ad7a23a..3e8658e 100644
--- a/drivers/scsi/atari_scsi.c
+++ b/drivers/scsi/atari_scsi.c
@@ -572,23 +572,6 @@
 }
 
 
-/* This is the wrapper function for NCR5380_queue_command(). It just
- * tries to get the lock on the ST-DMA (see above) and then calls the
- * original function.
- */
-
-#if 0
-int atari_queue_command(Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
-{
-	/* falcon_get_lock();
-	 * ++guenther: moved to NCR5380_queue_command() to prevent
-	 * race condition, see there for an explanation.
-	 */
-	return NCR5380_queue_command(cmd, done);
-}
-#endif
-
-
 int __init atari_scsi_detect(struct scsi_host_template *host)
 {
 	static int called = 0;
diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c
index ab5bdda..76029d5 100644
--- a/drivers/scsi/atp870u.c
+++ b/drivers/scsi/atp870u.c
@@ -605,7 +605,7 @@
  *
  *	Queue a command to the ATP queue. Called with the host lock held.
  */
-static int atp870u_queuecommand(struct scsi_cmnd * req_p, 
+static int atp870u_queuecommand_lck(struct scsi_cmnd *req_p,
 			 void (*done) (struct scsi_cmnd *))
 {
 	unsigned char c;
@@ -694,6 +694,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(atp870u_queuecommand)
+
 /**
  *	send_s870	-	send a command to the controller
  *	@host: host
diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c
index 8daa716..8ca967d 100644
--- a/drivers/scsi/bfa/bfad_im.c
+++ b/drivers/scsi/bfa/bfad_im.c
@@ -30,8 +30,7 @@
 struct scsi_transport_template *bfad_im_scsi_transport_template;
 struct scsi_transport_template *bfad_im_scsi_vport_transport_template;
 static void bfad_im_itnim_work_handler(struct work_struct *work);
-static int bfad_im_queuecommand(struct scsi_cmnd *cmnd,
-				void (*done)(struct scsi_cmnd *));
+static int bfad_im_queuecommand(struct Scsi_Host *h, struct scsi_cmnd *cmnd);
 static int bfad_im_slave_alloc(struct scsi_device *sdev);
 static void bfad_im_fc_rport_add(struct bfad_im_port_s  *im_port,
 				struct bfad_itnim_s *itnim);
@@ -1120,7 +1119,7 @@
  * Scsi_Host template entry, queue a SCSI command to the BFAD.
  */
 static int
-bfad_im_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
+bfad_im_queuecommand_lck(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
 {
 	struct bfad_im_port_s *im_port =
 		(struct bfad_im_port_s *) cmnd->device->host->hostdata[0];
@@ -1187,6 +1186,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(bfad_im_queuecommand)
+
 void
 bfad_os_rport_online_wait(struct bfad_s *bfad)
 {
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
index 54f50b0..8f1b5c8 100644
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -1080,7 +1080,7 @@
  *        and is expected to be held on return.
  *
  **/
-static int dc395x_queue_command(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+static int dc395x_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
 	struct DeviceCtlBlk *dcb;
 	struct ScsiReqBlk *srb;
@@ -1154,6 +1154,7 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(dc395x_queue_command)
 
 /*
  * Return the disk geometry for the given SCSI device.
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index 23dec00..cffcb10 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -423,7 +423,7 @@
 	return 0;
 }
 
-static int adpt_queue(struct scsi_cmnd * cmd, void (*done) (struct scsi_cmnd *))
+static int adpt_queue_lck(struct scsi_cmnd * cmd, void (*done) (struct scsi_cmnd *))
 {
 	adpt_hba* pHba = NULL;
 	struct adpt_device* pDev = NULL;	/* dpt per device information */
@@ -491,6 +491,8 @@
 	return adpt_scsi_to_i2o(pHba, cmd, pDev);
 }
 
+static DEF_SCSI_QCMD(adpt_queue)
+
 static int adpt_bios_param(struct scsi_device *sdev, struct block_device *dev,
 		sector_t capacity, int geom[])
 {
diff --git a/drivers/scsi/dpti.h b/drivers/scsi/dpti.h
index 337746d..beded71 100644
--- a/drivers/scsi/dpti.h
+++ b/drivers/scsi/dpti.h
@@ -29,7 +29,7 @@
  */
 
 static int adpt_detect(struct scsi_host_template * sht);
-static int adpt_queue(struct scsi_cmnd * cmd, void (*cmdcomplete) (struct scsi_cmnd *));
+static int adpt_queue(struct Scsi_Host *h, struct scsi_cmnd * cmd);
 static int adpt_abort(struct scsi_cmnd * cmd);
 static int adpt_reset(struct scsi_cmnd* cmd);
 static int adpt_release(struct Scsi_Host *host);
diff --git a/drivers/scsi/dtc.h b/drivers/scsi/dtc.h
index 0b205f8..cdc6212 100644
--- a/drivers/scsi/dtc.h
+++ b/drivers/scsi/dtc.h
@@ -36,7 +36,7 @@
 static int dtc_biosparam(struct scsi_device *, struct block_device *,
 		         sector_t, int*);
 static int dtc_detect(struct scsi_host_template *);
-static int dtc_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+static int dtc_queue_command(struct Scsi_Host *, struct scsi_cmnd *);
 static int dtc_bus_reset(Scsi_Cmnd *);
 
 #ifndef CMD_PER_LUN
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index d1c3137..53925ac 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -505,8 +505,7 @@
 
 static int eata2x_detect(struct scsi_host_template *);
 static int eata2x_release(struct Scsi_Host *);
-static int eata2x_queuecommand(struct scsi_cmnd *,
-			       void (*done) (struct scsi_cmnd *));
+static int eata2x_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 static int eata2x_eh_abort(struct scsi_cmnd *);
 static int eata2x_eh_host_reset(struct scsi_cmnd *);
 static int eata2x_bios_param(struct scsi_device *, struct block_device *,
@@ -1758,7 +1757,7 @@
 
 }
 
-static int eata2x_queuecommand(struct scsi_cmnd *SCpnt,
+static int eata2x_queuecommand_lck(struct scsi_cmnd *SCpnt,
 			       void (*done) (struct scsi_cmnd *))
 {
 	struct Scsi_Host *shost = SCpnt->device->host;
@@ -1843,6 +1842,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(eata2x_queuecommand)
+
 static int eata2x_eh_abort(struct scsi_cmnd *SCarg)
 {
 	struct Scsi_Host *shost = SCarg->device->host;
diff --git a/drivers/scsi/eata_pio.c b/drivers/scsi/eata_pio.c
index 60886c1..4a9641e 100644
--- a/drivers/scsi/eata_pio.c
+++ b/drivers/scsi/eata_pio.c
@@ -335,7 +335,7 @@
 	return 0;
 }
 
-static int eata_pio_queue(struct scsi_cmnd *cmd,
+static int eata_pio_queue_lck(struct scsi_cmnd *cmd,
 		void (*done)(struct scsi_cmnd *))
 {
 	unsigned int x, y;
@@ -438,6 +438,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(eata_pio_queue)
+
 static int eata_pio_abort(struct scsi_cmnd *cmd)
 {
 	unsigned int loop = 100;
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
index e2bc779..5755852 100644
--- a/drivers/scsi/esp_scsi.c
+++ b/drivers/scsi/esp_scsi.c
@@ -916,7 +916,7 @@
 	scsi_track_queue_full(dev, lp->num_tagged - 1);
 }
 
-static int esp_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+static int esp_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
 	struct scsi_device *dev = cmd->device;
 	struct esp *esp = shost_priv(dev->host);
@@ -941,6 +941,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(esp_queuecommand)
+
 static int esp_check_gross_error(struct esp *esp)
 {
 	if (esp->sreg & ESP_STAT_SPAM) {
diff --git a/drivers/scsi/fd_mcs.c b/drivers/scsi/fd_mcs.c
index 2ad95aa..a2c6135 100644
--- a/drivers/scsi/fd_mcs.c
+++ b/drivers/scsi/fd_mcs.c
@@ -1072,7 +1072,7 @@
 	return 0;
 }
 
-static int fd_mcs_queue(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
+static int fd_mcs_queue_lck(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
 {
 	struct Scsi_Host *shpnt = SCpnt->device->host;
 
@@ -1122,6 +1122,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(fd_mcs_queue)
+
 #if DEBUG_ABORT || DEBUG_RESET
 static void fd_mcs_print_info(Scsi_Cmnd * SCpnt)
 {
diff --git a/drivers/scsi/fdomain.c b/drivers/scsi/fdomain.c
index e296bcc5..69b7aa5 100644
--- a/drivers/scsi/fdomain.c
+++ b/drivers/scsi/fdomain.c
@@ -1419,7 +1419,7 @@
    return IRQ_HANDLED;
 }
 
-static int fdomain_16x0_queue(struct scsi_cmnd *SCpnt,
+static int fdomain_16x0_queue_lck(struct scsi_cmnd *SCpnt,
 		void (*done)(struct scsi_cmnd *))
 {
    if (in_command) {
@@ -1469,6 +1469,8 @@
    return 0;
 }
 
+static DEF_SCSI_QCMD(fdomain_16x0_queue)
+
 #if DEBUG_ABORT
 static void print_info(struct scsi_cmnd *SCpnt)
 {
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index cbb20b1..92f1850 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -246,7 +246,7 @@
 void fnic_update_mac(struct fc_lport *, u8 *new);
 void fnic_update_mac_locked(struct fnic *, u8 *new);
 
-int fnic_queuecommand(struct scsi_cmnd *, void (*done)(struct scsi_cmnd *));
+int fnic_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 int fnic_abort_cmd(struct scsi_cmnd *);
 int fnic_device_reset(struct scsi_cmnd *);
 int fnic_host_reset(struct scsi_cmnd *);
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 198cbab..22d0240 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -349,7 +349,7 @@
  * Routine to send a scsi cdb
  * Called with host_lock held and interrupts disabled.
  */
-int fnic_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
+static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
 	struct fc_lport *lp;
 	struct fc_rport *rport;
@@ -457,6 +457,8 @@
 	return ret;
 }
 
+DEF_SCSI_QCMD(fnic_queuecommand)
+
 /*
  * fnic_fcpio_fw_reset_cmpl_handler
  * Routine to handle fw reset completion
diff --git a/drivers/scsi/g_NCR5380.h b/drivers/scsi/g_NCR5380.h
index 921764c..1bcdb7b 100644
--- a/drivers/scsi/g_NCR5380.h
+++ b/drivers/scsi/g_NCR5380.h
@@ -46,7 +46,7 @@
 static int generic_NCR5380_abort(Scsi_Cmnd *);
 static int generic_NCR5380_detect(struct scsi_host_template *);
 static int generic_NCR5380_release_resources(struct Scsi_Host *);
-static int generic_NCR5380_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+static int generic_NCR5380_queue_command(struct Scsi_Host *, struct scsi_cmnd *);
 static int generic_NCR5380_bus_reset(Scsi_Cmnd *);
 static const char* generic_NCR5380_info(struct Scsi_Host *);
 
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 8411018..7636570 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -185,7 +185,7 @@
 			        unsigned long arg);
 
 static void gdth_flush(gdth_ha_str *ha);
-static int gdth_queuecommand(Scsi_Cmnd *scp,void (*done)(Scsi_Cmnd *));
+static int gdth_queuecommand(struct Scsi_Host *h, struct scsi_cmnd *cmd);
 static int __gdth_queuecommand(gdth_ha_str *ha, struct scsi_cmnd *scp,
 				struct gdth_cmndinfo *cmndinfo);
 static void gdth_scsi_done(struct scsi_cmnd *scp);
@@ -4004,7 +4004,7 @@
 }
 
 
-static int gdth_queuecommand(struct scsi_cmnd *scp,
+static int gdth_queuecommand_lck(struct scsi_cmnd *scp,
 				void (*done)(struct scsi_cmnd *))
 {
     gdth_ha_str *ha = shost_priv(scp->device->host);
@@ -4022,6 +4022,8 @@
     return __gdth_queuecommand(ha, scp, cmndinfo);
 }
 
+static DEF_SCSI_QCMD(gdth_queuecommand)
+
 static int __gdth_queuecommand(gdth_ha_str *ha, struct scsi_cmnd *scp,
 				struct gdth_cmndinfo *cmndinfo)
 {
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index c5d0606..b2fb2b2 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -31,7 +31,6 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/smp_lock.h>
 #include <linux/compat.h>
 #include <linux/blktrace_api.h>
 #include <linux/uaccess.h>
@@ -143,8 +142,7 @@
 	void *buff, size_t size, u8 page_code, unsigned char *scsi3addr,
 	int cmd_type);
 
-static int hpsa_scsi_queue_command(struct scsi_cmnd *cmd,
-		void (*done)(struct scsi_cmnd *));
+static int hpsa_scsi_queue_command(struct Scsi_Host *h, struct scsi_cmnd *cmd);
 static void hpsa_scan_start(struct Scsi_Host *);
 static int hpsa_scan_finished(struct Scsi_Host *sh,
 	unsigned long elapsed_time);
@@ -1926,7 +1924,7 @@
 }
 
 
-static int hpsa_scsi_queue_command(struct scsi_cmnd *cmd,
+static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd,
 	void (*done)(struct scsi_cmnd *))
 {
 	struct ctlr_info *h;
@@ -2020,6 +2018,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(hpsa_scsi_queue_command)
+
 static void hpsa_scan_start(struct Scsi_Host *sh)
 {
 	struct ctlr_info *h = shost_to_hba(sh);
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index 0729f15..10b6555 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -751,7 +751,7 @@
 		MVIOP_MU_QUEUE_ADDR_HOST_BIT | size_bit, hba);
 }
 
-static int hptiop_queuecommand(struct scsi_cmnd *scp,
+static int hptiop_queuecommand_lck(struct scsi_cmnd *scp,
 				void (*done)(struct scsi_cmnd *))
 {
 	struct Scsi_Host *host = scp->device->host;
@@ -819,6 +819,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(hptiop_queuecommand)
+
 static const char *hptiop_info(struct Scsi_Host *host)
 {
 	return driver_name_long;
diff --git a/drivers/scsi/ibmmca.c b/drivers/scsi/ibmmca.c
index 9a4b69d..67fc8ff 100644
--- a/drivers/scsi/ibmmca.c
+++ b/drivers/scsi/ibmmca.c
@@ -39,7 +39,7 @@
 #include <scsi/scsi_host.h>
 
 /* Common forward declarations for all Linux-versions: */
-static int ibmmca_queuecommand (Scsi_Cmnd *, void (*done) (Scsi_Cmnd *));
+static int ibmmca_queuecommand (struct Scsi_Host *, struct scsi_cmnd *);
 static int ibmmca_abort (Scsi_Cmnd *);
 static int ibmmca_host_reset (Scsi_Cmnd *);
 static int ibmmca_biosparam (struct scsi_device *, struct block_device *, sector_t, int *);
@@ -1691,7 +1691,7 @@
 }
 
 /* The following routine is the SCSI command queue for the midlevel driver */
-static int ibmmca_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
+static int ibmmca_queuecommand_lck(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
 {
 	unsigned int ldn;
 	unsigned int scsi_cmd;
@@ -1996,6 +1996,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(ibmmca_queuecommand)
+
 static int __ibmmca_abort(Scsi_Cmnd * cmd)
 {
 	/* Abort does not work, as the adapter never generates an interrupt on
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 00d08b2..57cad7e 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1606,7 +1606,7 @@
  * Returns:
  *	0 on success / other on failure
  **/
-static int ibmvfc_queuecommand(struct scsi_cmnd *cmnd,
+static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
 			       void (*done) (struct scsi_cmnd *))
 {
 	struct ibmvfc_host *vhost = shost_priv(cmnd->device->host);
@@ -1672,6 +1672,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(ibmvfc_queuecommand)
+
 /**
  * ibmvfc_sync_completion - Signal that a synchronous command has completed
  * @evt:	ibmvfc event struct
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 67f78a4..0419584 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -713,7 +713,7 @@
  * @cmd:	struct scsi_cmnd to be executed
  * @done:	Callback function to be called when cmd is completed
 */
-static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd,
+static int ibmvscsi_queuecommand_lck(struct scsi_cmnd *cmnd,
 				 void (*done) (struct scsi_cmnd *))
 {
 	struct srp_cmd *srp_cmd;
@@ -766,6 +766,8 @@
 	return ibmvscsi_send_srp_event(evt_struct, hostdata, 0);
 }
 
+static DEF_SCSI_QCMD(ibmvscsi_queuecommand)
+
 /* ------------------------------------------------------------
  * Routines for driver initialization
  */
diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c
index 4734ab0..99aa0e5 100644
--- a/drivers/scsi/imm.c
+++ b/drivers/scsi/imm.c
@@ -926,7 +926,7 @@
 	return 0;
 }
 
-static int imm_queuecommand(struct scsi_cmnd *cmd,
+static int imm_queuecommand_lck(struct scsi_cmnd *cmd,
 		void (*done)(struct scsi_cmnd *))
 {
 	imm_struct *dev = imm_dev(cmd->device->host);
@@ -949,6 +949,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(imm_queuecommand)
+
 /*
  * Apparently the disk->capacity attribute is off by 1 sector 
  * for all disk drives.  We add the one here, but it should really
diff --git a/drivers/scsi/in2000.c b/drivers/scsi/in2000.c
index 52bdc6d..6568aab 100644
--- a/drivers/scsi/in2000.c
+++ b/drivers/scsi/in2000.c
@@ -334,7 +334,7 @@
 
 static void in2000_execute(struct Scsi_Host *instance);
 
-static int in2000_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
+static int in2000_queuecommand_lck(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
 {
 	struct Scsi_Host *instance;
 	struct IN2000_hostdata *hostdata;
@@ -431,6 +431,8 @@
 	    return 0;
 }
 
+static DEF_SCSI_QCMD(in2000_queuecommand)
+
 
 
 /*
diff --git a/drivers/scsi/in2000.h b/drivers/scsi/in2000.h
index 0fb8b06..5821e1f 100644
--- a/drivers/scsi/in2000.h
+++ b/drivers/scsi/in2000.h
@@ -396,7 +396,7 @@
 							   flags)
 
 static int in2000_detect(struct scsi_host_template *) in2000__INIT;
-static int in2000_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+static int in2000_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 static int in2000_abort(Scsi_Cmnd *);
 static void in2000_setup(char *, int *) in2000__INIT;
 static int in2000_biosparam(struct scsi_device *, struct block_device *,
diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c
index 1087977..9627d06 100644
--- a/drivers/scsi/initio.c
+++ b/drivers/scsi/initio.c
@@ -2639,7 +2639,7 @@
  *	will cause the mid layer to call us again later with the command)
  */
 
-static int i91u_queuecommand(struct scsi_cmnd *cmd,
+static int i91u_queuecommand_lck(struct scsi_cmnd *cmd,
 		void (*done)(struct scsi_cmnd *))
 {
 	struct initio_host *host = (struct initio_host *) cmd->device->host->hostdata;
@@ -2656,6 +2656,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(i91u_queuecommand)
+
 /**
  *	i91u_bus_reset		-	reset the SCSI bus
  *	@cmnd: Command block we want to trigger the reset for
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index fa60d7d..5bbaee5 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -5709,7 +5709,7 @@
  *	SCSI_MLQUEUE_DEVICE_BUSY if device is busy
  *	SCSI_MLQUEUE_HOST_BUSY if host is busy
  **/
-static int ipr_queuecommand(struct scsi_cmnd *scsi_cmd,
+static int ipr_queuecommand_lck(struct scsi_cmnd *scsi_cmd,
 			    void (*done) (struct scsi_cmnd *))
 {
 	struct ipr_ioa_cfg *ioa_cfg;
@@ -5792,6 +5792,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(ipr_queuecommand)
+
 /**
  * ipr_ioctl - IOCTL handler
  * @sdev:	scsi device struct
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index f83a116..b2511ac 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -232,7 +232,7 @@
 static int ips_release(struct Scsi_Host *);
 static int ips_eh_abort(struct scsi_cmnd *);
 static int ips_eh_reset(struct scsi_cmnd *);
-static int ips_queue(struct scsi_cmnd *, void (*)(struct scsi_cmnd *));
+static int ips_queue(struct Scsi_Host *, struct scsi_cmnd *);
 static const char *ips_info(struct Scsi_Host *);
 static irqreturn_t do_ipsintr(int, void *);
 static int ips_hainit(ips_ha_t *);
@@ -1046,7 +1046,7 @@
 /*    Linux obtains io_request_lock before calling this function            */
 /*                                                                          */
 /****************************************************************************/
-static int ips_queue(struct scsi_cmnd *SC, void (*done) (struct scsi_cmnd *))
+static int ips_queue_lck(struct scsi_cmnd *SC, void (*done) (struct scsi_cmnd *))
 {
 	ips_ha_t *ha;
 	ips_passthru_t *pt;
@@ -1137,6 +1137,8 @@
 	return (0);
 }
 
+static DEF_SCSI_QCMD(ips_queue)
+
 /****************************************************************************/
 /*                                                                          */
 /* Routine Name: ips_biosparam                                              */
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index e340373..2924363 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1753,7 +1753,7 @@
  * This is the i/o strategy routine, called by the SCSI layer. This routine
  * is called with the host_lock held.
  */
-int fc_queuecommand(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *))
+static int fc_queuecommand_lck(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *))
 {
 	struct fc_lport *lport;
 	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
@@ -1851,6 +1851,8 @@
 	spin_lock_irq(lport->host->host_lock);
 	return rc;
 }
+
+DEF_SCSI_QCMD(fc_queuecommand)
 EXPORT_SYMBOL(fc_queuecommand);
 
 /**
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 633e090..c15fde8 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1599,7 +1599,7 @@
 	FAILURE_SESSION_NOT_READY,
 };
 
-int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
+static int iscsi_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
 	struct iscsi_cls_session *cls_session;
 	struct Scsi_Host *host;
@@ -1736,6 +1736,8 @@
 	spin_lock(host->host_lock);
 	return 0;
 }
+
+DEF_SCSI_QCMD(iscsi_queuecommand)
 EXPORT_SYMBOL_GPL(iscsi_queuecommand);
 
 int iscsi_change_queue_depth(struct scsi_device *sdev, int depth, int reason)
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 55f09e9..29251fa 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -189,7 +189,7 @@
  * Note: XXX: Remove the host unlock/lock pair when SCSI Core can
  * call us without holding an IRQ spinlock...
  */
-int sas_queuecommand(struct scsi_cmnd *cmd,
+static int sas_queuecommand_lck(struct scsi_cmnd *cmd,
 		     void (*scsi_done)(struct scsi_cmnd *))
 	__releases(host->host_lock)
 	__acquires(dev->sata_dev.ap->lock)
@@ -254,6 +254,8 @@
 	return res;
 }
 
+DEF_SCSI_QCMD(sas_queuecommand)
+
 static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 {
 	struct sas_task *task = TO_SAS_TASK(cmd);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index f64b65a..581837b 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -2899,7 +2899,7 @@
  *   SCSI_MLQUEUE_HOST_BUSY - Block all devices served by this host temporarily.
  **/
 static int
-lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
+lpfc_queuecommand_lck(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
 {
 	struct Scsi_Host  *shost = cmnd->device->host;
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
@@ -3060,6 +3060,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(lpfc_queuecommand)
+
 /**
  * lpfc_abort_handler - scsi_host_template eh_abort_handler entry point
  * @cmnd: Pointer to scsi_cmnd data structure.
diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c
index 3ddb4dc..6c42dff 100644
--- a/drivers/scsi/mac53c94.c
+++ b/drivers/scsi/mac53c94.c
@@ -66,7 +66,7 @@
 static void set_dma_cmds(struct fsc_state *, struct scsi_cmnd *);
 
 
-static int mac53c94_queue(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+static int mac53c94_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
 	struct fsc_state *state;
 
@@ -99,6 +99,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(mac53c94_queue)
+
 static int mac53c94_host_reset(struct scsi_cmnd *cmd)
 {
 	struct fsc_state *state = (struct fsc_state *) cmd->device->host->hostdata;
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 7ceb5cf..9aa0485 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -366,7 +366,7 @@
  * The command queuing entry point for the mid-layer.
  */
 static int
-megaraid_queue(Scsi_Cmnd *scmd, void (*done)(Scsi_Cmnd *))
+megaraid_queue_lck(Scsi_Cmnd *scmd, void (*done)(Scsi_Cmnd *))
 {
 	adapter_t	*adapter;
 	scb_t	*scb;
@@ -409,6 +409,8 @@
 	return busy;
 }
 
+static DEF_SCSI_QCMD(megaraid_queue)
+
 /**
  * mega_allocate_scb()
  * @adapter - pointer to our soft state
@@ -4456,7 +4458,7 @@
 
 	scb->idx = CMDID_INT_CMDS;
 
-	megaraid_queue(scmd, mega_internal_done);
+	megaraid_queue_lck(scmd, mega_internal_done);
 
 	wait_for_completion(&adapter->int_waitq);
 
diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h
index 2b4a048..f564474 100644
--- a/drivers/scsi/megaraid.h
+++ b/drivers/scsi/megaraid.h
@@ -987,7 +987,7 @@
 static int issue_scb(adapter_t *, scb_t *);
 static int mega_setup_mailbox(adapter_t *);
 
-static int megaraid_queue (Scsi_Cmnd *, void (*)(Scsi_Cmnd *));
+static int megaraid_queue (struct Scsi_Host *, struct scsi_cmnd *);
 static scb_t * mega_build_cmd(adapter_t *, Scsi_Cmnd *, int *);
 static void __mega_runpendq(adapter_t *);
 static int issue_scb_block(adapter_t *, u_char *);
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index a7810a1..5708cb2 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -113,8 +113,7 @@
 static void megaraid_mbox_display_scb(adapter_t *, scb_t *);
 static void megaraid_mbox_setup_device_map(adapter_t *);
 
-static int megaraid_queue_command(struct scsi_cmnd *,
-		void (*)(struct scsi_cmnd *));
+static int megaraid_queue_command(struct Scsi_Host *, struct scsi_cmnd *);
 static scb_t *megaraid_mbox_build_cmd(adapter_t *, struct scsi_cmnd *, int *);
 static void megaraid_mbox_runpendq(adapter_t *, scb_t *);
 static void megaraid_mbox_prepare_pthru(adapter_t *, scb_t *,
@@ -1484,7 +1483,7 @@
  * Queue entry point for mailbox based controllers.
  */
 static int
-megaraid_queue_command(struct scsi_cmnd *scp, void (*done)(struct scsi_cmnd *))
+megaraid_queue_command_lck(struct scsi_cmnd *scp, void (*done)(struct scsi_cmnd *))
 {
 	adapter_t	*adapter;
 	scb_t		*scb;
@@ -1513,6 +1512,8 @@
 	return if_busy;
 }
 
+static DEF_SCSI_QCMD(megaraid_queue_command)
+
 /**
  * megaraid_mbox_build_cmd - transform the mid-layer scsi commands
  * @adapter	: controller's soft state
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index eb29d50..7451bc0 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -1334,7 +1334,7 @@
  * @done:			Callback entry point
  */
 static int
-megasas_queue_command(struct scsi_cmnd *scmd, void (*done) (struct scsi_cmnd *))
+megasas_queue_command_lck(struct scsi_cmnd *scmd, void (*done) (struct scsi_cmnd *))
 {
 	u32 frame_count;
 	struct megasas_cmd *cmd;
@@ -1417,6 +1417,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(megasas_queue_command)
+
 static struct megasas_instance *megasas_lookup_instance(u16 host_no)
 {
 	int i;
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index 1f784fd..197aa1b 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1627,7 +1627,7 @@
  * Called by midlayer with host locked to queue a new
  * request
  */
-static int mesh_queue(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+static int mesh_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
 	struct mesh_state *ms;
 
@@ -1648,6 +1648,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(mesh_queue)
+
 /*
  * Called to handle interrupts, either call by the interrupt
  * handler (do_mesh_interrupt) or by other functions in
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 16e99b6..1a96a00 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -3315,7 +3315,7 @@
  * SCSI_MLQUEUE_HOST_BUSY if the entire host queue is full
  */
 static int
-_scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
+_scsih_qcmd_lck(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
@@ -3441,6 +3441,8 @@
 	return SCSI_MLQUEUE_HOST_BUSY;
 }
 
+static DEF_SCSI_QCMD(_scsih_qcmd)
+
 /**
  * _scsih_normalize_sense - normalize descriptor and fixed format sense data
  * @sense_buffer: sense data returned by target
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index d013a2a..46cc382 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -8029,7 +8029,7 @@
 	return 0;
 }
 
-static int ncr53c8xx_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
+static int ncr53c8xx_queue_command_lck (struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
      struct ncb *np = ((struct host_data *) cmd->device->host->hostdata)->ncb;
      unsigned long flags;
@@ -8068,6 +8068,8 @@
      return sts;
 }
 
+static DEF_SCSI_QCMD(ncr53c8xx_queue_command)
+
 irqreturn_t ncr53c8xx_intr(int irq, void *dev_id)
 {
      unsigned long flags;
diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c
index 4c1e545..6b8b021 100644
--- a/drivers/scsi/nsp32.c
+++ b/drivers/scsi/nsp32.c
@@ -196,8 +196,7 @@
 static int         nsp32_proc_info   (struct Scsi_Host *, char *, char **, off_t, int, int);
 
 static int         nsp32_detect      (struct pci_dev *pdev);
-static int         nsp32_queuecommand(struct scsi_cmnd *,
-		void (*done)(struct scsi_cmnd *));
+static int         nsp32_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 static const char *nsp32_info        (struct Scsi_Host *);
 static int         nsp32_release     (struct Scsi_Host *);
 
@@ -909,7 +908,7 @@
 	return TRUE;
 }
 
-static int nsp32_queuecommand(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+static int nsp32_queuecommand_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
 	nsp32_hw_data *data = (nsp32_hw_data *)SCpnt->device->host->hostdata;
 	nsp32_target *target;
@@ -1050,6 +1049,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(nsp32_queuecommand)
+
 /* initialize asic */
 static int nsp32hw_init(nsp32_hw_data *data)
 {
diff --git a/drivers/scsi/pas16.h b/drivers/scsi/pas16.h
index 8dc5b1a..a04281c 100644
--- a/drivers/scsi/pas16.h
+++ b/drivers/scsi/pas16.h
@@ -118,7 +118,7 @@
 static int pas16_biosparam(struct scsi_device *, struct block_device *,
 			   sector_t, int*);
 static int pas16_detect(struct scsi_host_template *);
-static int pas16_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+static int pas16_queue_command(struct Scsi_Host *, struct scsi_cmnd *);
 static int pas16_bus_reset(Scsi_Cmnd *);
 
 #ifndef CMD_PER_LUN
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 9326c2c..be3f33d 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -184,7 +184,7 @@
 	SCpnt->scsi_done(SCpnt);
 }
 
-static int nsp_queuecommand(struct scsi_cmnd *SCpnt,
+static int nsp_queuecommand_lck(struct scsi_cmnd *SCpnt,
 			    void (*done)(struct scsi_cmnd *))
 {
 #ifdef NSP_DEBUG
@@ -264,6 +264,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(nsp_queuecommand)
+
 /*
  * setup PIO FIFO transfer mode and enable/disable to data out
  */
diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h
index d68c9f2..7fc9a9d 100644
--- a/drivers/scsi/pcmcia/nsp_cs.h
+++ b/drivers/scsi/pcmcia/nsp_cs.h
@@ -299,8 +299,7 @@
 					 off_t   offset,
 					 int     length,
 					 int     inout);
-static int nsp_queuecommand(struct scsi_cmnd *SCpnt,
-			    void (* done)(struct scsi_cmnd *SCpnt));
+static int nsp_queuecommand(struct Scsi_Host *h, struct scsi_cmnd *SCpnt);
 
 /* Error handler */
 /*static int nsp_eh_abort       (struct scsi_cmnd *SCpnt);*/
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 0ae27cb..8552296 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -547,7 +547,7 @@
 }
 
 static int 
-SYM53C500_queue(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
+SYM53C500_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
 	int i;
 	int port_base = SCpnt->device->host->io_port;
@@ -583,6 +583,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(SYM53C500_queue)
+
 static int 
 SYM53C500_host_reset(struct scsi_cmnd *SCpnt)
 {
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index 8e38ca8..7f064f9 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -50,7 +50,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
-#include <linux/smp_lock.h>
 #include <scsi/libsas.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/sas_ata.h>
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index cf89091..5e76a62 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -3478,7 +3478,7 @@
  *	  SCSI_MLQUEUE_DEVICE_BUSY if device is busy
  *	  SCSI_MLQUEUE_HOST_BUSY if host is busy
  */
-static int pmcraid_queuecommand(
+static int pmcraid_queuecommand_lck(
 	struct scsi_cmnd *scsi_cmd,
 	void (*done) (struct scsi_cmnd *)
 )
@@ -3584,6 +3584,8 @@
 	return rc;
 }
 
+static DEF_SCSI_QCMD(pmcraid_queuecommand)
+
 /**
  * pmcraid_open -char node "open" entry, allowed only users with admin access
  */
diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c
index 7bc2d79..d164c96 100644
--- a/drivers/scsi/ppa.c
+++ b/drivers/scsi/ppa.c
@@ -798,7 +798,7 @@
 	return 0;
 }
 
-static int ppa_queuecommand(struct scsi_cmnd *cmd,
+static int ppa_queuecommand_lck(struct scsi_cmnd *cmd,
 		void (*done) (struct scsi_cmnd *))
 {
 	ppa_struct *dev = ppa_dev(cmd->device->host);
@@ -821,6 +821,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(ppa_queuecommand)
+
 /*
  * Apparently the disk->capacity attribute is off by 1 sector 
  * for all disk drives.  We add the one here, but it should really
diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c
index 92ffbb5..cd178b9 100644
--- a/drivers/scsi/ps3rom.c
+++ b/drivers/scsi/ps3rom.c
@@ -211,7 +211,7 @@
 	return 0;
 }
 
-static int ps3rom_queuecommand(struct scsi_cmnd *cmd,
+static int ps3rom_queuecommand_lck(struct scsi_cmnd *cmd,
 			       void (*done)(struct scsi_cmnd *))
 {
 	struct ps3rom_private *priv = shost_priv(cmd->device->host);
@@ -260,6 +260,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(ps3rom_queuecommand)
+
 static int decode_lv1_status(u64 status, unsigned char *sense_key,
 			     unsigned char *asc, unsigned char *ascq)
 {
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index b8166ec..5dec684 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -727,7 +727,7 @@
  * context which is a big NO! NO!.
  **************************************************************************/
 static int
-qla1280_queuecommand(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *))
+qla1280_queuecommand_lck(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *))
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct scsi_qla_host *ha = (struct scsi_qla_host *)host->hostdata;
@@ -756,6 +756,8 @@
 	return status;
 }
 
+static DEF_SCSI_QCMD(qla1280_queuecommand)
+
 enum action {
 	ABORT_COMMAND,
 	DEVICE_RESET,
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 1830e6e..1644eab 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -179,8 +179,7 @@
 static int qla2xxx_scan_finished(struct Scsi_Host *, unsigned long time);
 static void qla2xxx_scan_start(struct Scsi_Host *);
 static void qla2xxx_slave_destroy(struct scsi_device *);
-static int qla2xxx_queuecommand(struct scsi_cmnd *cmd,
-		void (*fn)(struct scsi_cmnd *));
+static int qla2xxx_queuecommand(struct Scsi_Host *h, struct scsi_cmnd *cmd);
 static int qla2xxx_eh_abort(struct scsi_cmnd *);
 static int qla2xxx_eh_device_reset(struct scsi_cmnd *);
 static int qla2xxx_eh_target_reset(struct scsi_cmnd *);
@@ -535,7 +534,7 @@
 }
 
 static int
-qla2xxx_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+qla2xxx_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
 	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
@@ -609,6 +608,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(qla2xxx_queuecommand)
+
 
 /*
  * qla2x00_eh_wait_on_command
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index f4cd846..0d48fb4 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -79,8 +79,7 @@
 /*
  * SCSI host template entry points
  */
-static int qla4xxx_queuecommand(struct scsi_cmnd *cmd,
-				void (*done) (struct scsi_cmnd *));
+static int qla4xxx_queuecommand(struct Scsi_Host *h, struct scsi_cmnd *cmd);
 static int qla4xxx_eh_abort(struct scsi_cmnd *cmd);
 static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd);
 static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd);
@@ -464,7 +463,7 @@
  * completion handling).   Unfortunely, it sometimes calls the scheduler
  * in interrupt context which is a big NO! NO!.
  **/
-static int qla4xxx_queuecommand(struct scsi_cmnd *cmd,
+static int qla4xxx_queuecommand_lck(struct scsi_cmnd *cmd,
 				void (*done)(struct scsi_cmnd *))
 {
 	struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
@@ -538,6 +537,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(qla4xxx_queuecommand)
+
 /**
  * qla4xxx_mem_free - frees memory allocated to adapter
  * @ha: Pointer to host adapter structure.
diff --git a/drivers/scsi/qlogicfas408.c b/drivers/scsi/qlogicfas408.c
index 1ad5155..c3a9151 100644
--- a/drivers/scsi/qlogicfas408.c
+++ b/drivers/scsi/qlogicfas408.c
@@ -439,7 +439,7 @@
  *	Queued command
  */
 
-int qlogicfas408_queuecommand(struct scsi_cmnd *cmd,
+static int qlogicfas408_queuecommand_lck(struct scsi_cmnd *cmd,
 			      void (*done) (struct scsi_cmnd *))
 {
 	struct qlogicfas408_priv *priv = get_priv_by_cmd(cmd);
@@ -459,6 +459,8 @@
 	return 0;
 }
 
+DEF_SCSI_QCMD(qlogicfas408_queuecommand)
+
 /* 
  *	Return bios parameters 
  */
diff --git a/drivers/scsi/qlogicfas408.h b/drivers/scsi/qlogicfas408.h
index 2606264..2f6c0a1 100644
--- a/drivers/scsi/qlogicfas408.h
+++ b/drivers/scsi/qlogicfas408.h
@@ -103,8 +103,7 @@
 #define get_priv_by_host(x) (struct qlogicfas408_priv *)&((x)->hostdata[0])
 
 irqreturn_t qlogicfas408_ihandl(int irq, void *dev_id);
-int qlogicfas408_queuecommand(struct scsi_cmnd * cmd,
-			      void (*done) (struct scsi_cmnd *));
+int qlogicfas408_queuecommand(struct Scsi_Host *h, struct scsi_cmnd * cmd);
 int qlogicfas408_biosparam(struct scsi_device * disk,
 			   struct block_device *dev,
 			   sector_t capacity, int ip[]);
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index f8c561c..664c957 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1003,7 +1003,7 @@
  *
  * "This code must fly." -davem
  */
-static int qlogicpti_queuecommand(struct scsi_cmnd *Cmnd, void (*done)(struct scsi_cmnd *))
+static int qlogicpti_queuecommand_lck(struct scsi_cmnd *Cmnd, void (*done)(struct scsi_cmnd *))
 {
 	struct Scsi_Host *host = Cmnd->device->host;
 	struct qlogicpti *qpti = (struct qlogicpti *) host->hostdata;
@@ -1052,6 +1052,8 @@
 	return 1;
 }
 
+static DEF_SCSI_QCMD(qlogicpti_queuecommand)
+
 static int qlogicpti_return_status(struct Status_Entry *sts, int id)
 {
 	int host_status = DID_ERROR;
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 348fba0..2aeb2e9 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -634,12 +634,13 @@
  * Description: a serial number identifies a request for error recovery
  * and debugging purposes.  Protected by the Host_Lock of host.
  */
-static inline void scsi_cmd_get_serial(struct Scsi_Host *host, struct scsi_cmnd *cmd)
+void scsi_cmd_get_serial(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 {
 	cmd->serial_number = host->cmd_serial_number++;
 	if (cmd->serial_number == 0) 
 		cmd->serial_number = host->cmd_serial_number++;
 }
+EXPORT_SYMBOL(scsi_cmd_get_serial);
 
 /**
  * scsi_dispatch_command - Dispatch a command to the low-level driver.
@@ -651,7 +652,6 @@
 int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 {
 	struct Scsi_Host *host = cmd->device->host;
-	unsigned long flags = 0;
 	unsigned long timeout;
 	int rtn = 0;
 
@@ -737,23 +737,15 @@
 		goto out;
 	}
 
-	spin_lock_irqsave(host->host_lock, flags);
-	/*
-	 * AK: unlikely race here: for some reason the timer could
-	 * expire before the serial number is set up below.
-	 *
-	 * TODO: kill serial or move to blk layer
-	 */
-	scsi_cmd_get_serial(host, cmd); 
-
 	if (unlikely(host->shost_state == SHOST_DEL)) {
 		cmd->result = (DID_NO_CONNECT << 16);
 		scsi_done(cmd);
 	} else {
 		trace_scsi_dispatch_cmd_start(cmd);
-		rtn = host->hostt->queuecommand(cmd, scsi_done);
+		cmd->scsi_done = scsi_done;
+		rtn = host->hostt->queuecommand(host, cmd);
 	}
-	spin_unlock_irqrestore(host->host_lock, flags);
+
 	if (rtn) {
 		trace_scsi_dispatch_cmd_error(cmd, rtn);
 		if (rtn != SCSI_MLQUEUE_DEVICE_BUSY &&
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 2c36bae..2f1f9b0 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -3538,7 +3538,7 @@
 }
 
 static
-int scsi_debug_queuecommand(struct scsi_cmnd *SCpnt, done_funct_t done)
+int scsi_debug_queuecommand_lck(struct scsi_cmnd *SCpnt, done_funct_t done)
 {
 	unsigned char *cmd = (unsigned char *) SCpnt->cmnd;
 	int len, k;
@@ -3884,6 +3884,8 @@
 			     (delay_override ? 0 : scsi_debug_delay));
 }
 
+static DEF_SCSI_QCMD(scsi_debug_queuecommand)
+
 static struct scsi_host_template sdebug_driver_template = {
 	.proc_info =		scsi_debug_proc_info,
 	.proc_name =		sdebug_proc_name,
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index f3cf924..824b8fc 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -773,17 +773,15 @@
 	struct Scsi_Host *shost = sdev->host;
 	DECLARE_COMPLETION_ONSTACK(done);
 	unsigned long timeleft;
-	unsigned long flags;
 	struct scsi_eh_save ses;
 	int rtn;
 
 	scsi_eh_prep_cmnd(scmd, &ses, cmnd, cmnd_size, sense_bytes);
 	shost->eh_action = &done;
 
-	spin_lock_irqsave(shost->host_lock, flags);
 	scsi_log_send(scmd);
-	shost->hostt->queuecommand(scmd, scsi_eh_done);
-	spin_unlock_irqrestore(shost->host_lock, flags);
+	scmd->scsi_done = scsi_eh_done;
+	shost->hostt->queuecommand(shost, scmd);
 
 	timeleft = wait_for_completion_timeout(&done, timeout);
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b9ab3a5..9564961 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -46,7 +46,6 @@
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/delay.h>
-#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/string_helpers.h>
 #include <linux/async.h>
diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
index 9c73dbd..606215e 100644
--- a/drivers/scsi/stex.c
+++ b/drivers/scsi/stex.c
@@ -572,7 +572,7 @@
 }
 
 static int
-stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
+stex_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
 	struct st_hba *hba;
 	struct Scsi_Host *host;
@@ -698,6 +698,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(stex_queuecommand)
+
 static void stex_scsi_done(struct st_ccb *ccb)
 {
 	struct scsi_cmnd *cmd = ccb->cmd;
diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c
index 713620e..4f0e548 100644
--- a/drivers/scsi/sun3_NCR5380.c
+++ b/drivers/scsi/sun3_NCR5380.c
@@ -908,7 +908,7 @@
  */
 
 /* Only make static if a wrapper function is used */
-static int NCR5380_queue_command(struct scsi_cmnd *cmd,
+static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd,
 				 void (*done)(struct scsi_cmnd *))
 {
     SETUP_HOSTDATA(cmd->device->host);
@@ -1019,6 +1019,8 @@
     return 0;
 }
 
+static DEF_SCSI_QCMD(NCR5380_queue_command)
+
 /*
  * Function : NCR5380_main (void) 
  *
diff --git a/drivers/scsi/sun3_scsi.h b/drivers/scsi/sun3_scsi.h
index b29a9d6..bcefd84 100644
--- a/drivers/scsi/sun3_scsi.h
+++ b/drivers/scsi/sun3_scsi.h
@@ -51,8 +51,7 @@
 static int sun3scsi_detect (struct scsi_host_template *);
 static const char *sun3scsi_info (struct Scsi_Host *);
 static int sun3scsi_bus_reset(struct scsi_cmnd *);
-static int sun3scsi_queue_command(struct scsi_cmnd *,
-				  void (*done)(struct scsi_cmnd *));
+static int sun3scsi_queue_command(struct Scsi_Host *, struct scsi_cmnd *);
 static int sun3scsi_release (struct Scsi_Host *);
 
 #ifndef CMD_PER_LUN
diff --git a/drivers/scsi/sym53c416.c b/drivers/scsi/sym53c416.c
index e5c369b..190107a 100644
--- a/drivers/scsi/sym53c416.c
+++ b/drivers/scsi/sym53c416.c
@@ -734,7 +734,7 @@
 	return info;
 }
 
-int sym53c416_queuecommand(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *))
+static int sym53c416_queuecommand_lck(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *))
 {
 	int base;
 	unsigned long flags = 0;
@@ -761,6 +761,8 @@
 	return 0;
 }
 
+DEF_SCSI_QCMD(sym53c416_queuecommand)
+
 static int sym53c416_host_reset(Scsi_Cmnd *SCpnt)
 {
 	int base;
diff --git a/drivers/scsi/sym53c416.h b/drivers/scsi/sym53c416.h
index 77860d0..387de5d 100644
--- a/drivers/scsi/sym53c416.h
+++ b/drivers/scsi/sym53c416.h
@@ -25,7 +25,7 @@
 static int sym53c416_detect(struct scsi_host_template *);
 static const char *sym53c416_info(struct Scsi_Host *);
 static int sym53c416_release(struct Scsi_Host *);
-static int sym53c416_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+static int sym53c416_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 static int sym53c416_host_reset(Scsi_Cmnd *);
 static int sym53c416_bios_param(struct scsi_device *, struct block_device *,
 		sector_t, int *);
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 8b955b5..6b97ded 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -505,7 +505,7 @@
  * queuecommand method.  Entered with the host adapter lock held and
  * interrupts disabled.
  */
-static int sym53c8xx_queue_command(struct scsi_cmnd *cmd,
+static int sym53c8xx_queue_command_lck(struct scsi_cmnd *cmd,
 					void (*done)(struct scsi_cmnd *))
 {
 	struct sym_hcb *np = SYM_SOFTC_PTR(cmd);
@@ -536,6 +536,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(sym53c8xx_queue_command)
+
 /*
  *  Linux entry point of the interrupt handler.
  */
diff --git a/drivers/scsi/t128.h b/drivers/scsi/t128.h
index 76a069b..ada1115 100644
--- a/drivers/scsi/t128.h
+++ b/drivers/scsi/t128.h
@@ -96,8 +96,7 @@
 static int t128_biosparam(struct scsi_device *, struct block_device *,
 			  sector_t, int*);
 static int t128_detect(struct scsi_host_template *);
-static int t128_queue_command(struct scsi_cmnd *,
-			      void (*done)(struct scsi_cmnd *));
+static int t128_queue_command(struct Scsi_Host *, struct scsi_cmnd *);
 static int t128_bus_reset(struct scsi_cmnd *);
 
 #ifndef CMD_PER_LUN
diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
index 27866b0..a124a28f 100644
--- a/drivers/scsi/tmscsim.c
+++ b/drivers/scsi/tmscsim.c
@@ -1883,7 +1883,7 @@
     return;
 }
 
-static int DC390_queuecommand(struct scsi_cmnd *cmd,
+static int DC390_queuecommand_lck(struct scsi_cmnd *cmd,
 		void (*done)(struct scsi_cmnd *))
 {
 	struct scsi_device *sdev = cmd->device;
@@ -1944,6 +1944,8 @@
 	return SCSI_MLQUEUE_DEVICE_BUSY;
 }
 
+static DEF_SCSI_QCMD(DC390_queuecommand)
+
 static void dc390_dumpinfo (struct dc390_acb* pACB, struct dc390_dcb* pDCB, struct dc390_srb* pSRB)
 {
     struct pci_dev *pdev;
diff --git a/drivers/scsi/u14-34f.c b/drivers/scsi/u14-34f.c
index 5d9fdee..edfc5da 100644
--- a/drivers/scsi/u14-34f.c
+++ b/drivers/scsi/u14-34f.c
@@ -433,7 +433,7 @@
 
 static int u14_34f_detect(struct scsi_host_template *);
 static int u14_34f_release(struct Scsi_Host *);
-static int u14_34f_queuecommand(struct scsi_cmnd *, void (*done)(struct scsi_cmnd *));
+static int u14_34f_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 static int u14_34f_eh_abort(struct scsi_cmnd *);
 static int u14_34f_eh_host_reset(struct scsi_cmnd *);
 static int u14_34f_bios_param(struct scsi_device *, struct block_device *,
@@ -1248,7 +1248,7 @@
 
 }
 
-static int u14_34f_queuecommand(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) {
+static int u14_34f_queuecommand_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) {
    unsigned int i, j, k;
    struct mscp *cpp;
 
@@ -1329,6 +1329,8 @@
    return 0;
 }
 
+static DEF_SCSI_QCMD(u14_34f_queuecommand)
+
 static int u14_34f_eh_abort(struct scsi_cmnd *SCarg) {
    unsigned int i, j;
 
diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c
index 27aa40f..0571ef96 100644
--- a/drivers/scsi/ultrastor.c
+++ b/drivers/scsi/ultrastor.c
@@ -700,7 +700,7 @@
 	mscp->transfer_data_length = transfer_length;
 }
 
-static int ultrastor_queuecommand(struct scsi_cmnd *SCpnt,
+static int ultrastor_queuecommand_lck(struct scsi_cmnd *SCpnt,
 				void (*done) (struct scsi_cmnd *))
 {
     struct mscp *my_mscp;
@@ -825,6 +825,8 @@
     return 0;
 }
 
+static DEF_SCSI_QCMD(ultrastor_queuecommand)
+
 /* This code must deal with 2 cases:
 
    1. The command has not been written to the OGM.  In this case, set
diff --git a/drivers/scsi/ultrastor.h b/drivers/scsi/ultrastor.h
index a692905..165c18b 100644
--- a/drivers/scsi/ultrastor.h
+++ b/drivers/scsi/ultrastor.h
@@ -15,8 +15,7 @@
 
 static int ultrastor_detect(struct scsi_host_template *);
 static const char *ultrastor_info(struct Scsi_Host *shpnt);
-static int ultrastor_queuecommand(struct scsi_cmnd *,
-				void (*done)(struct scsi_cmnd *));
+static int ultrastor_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 static int ultrastor_abort(struct scsi_cmnd *);
 static int ultrastor_host_reset(struct scsi_cmnd *);
 static int ultrastor_biosparam(struct scsi_device *, struct block_device *,
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 2689445..a18996d 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -690,7 +690,7 @@
 	return 0;
 }
 
-static int pvscsi_queue(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct pvscsi_adapter *adapter = shost_priv(host);
@@ -719,6 +719,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(pvscsi_queue)
+
 static int pvscsi_abort(struct scsi_cmnd *cmd)
 {
 	struct pvscsi_adapter *adapter = shost_priv(cmd->device->host);
diff --git a/drivers/scsi/wd33c93.c b/drivers/scsi/wd33c93.c
index b701bf2..5f697e0 100644
--- a/drivers/scsi/wd33c93.c
+++ b/drivers/scsi/wd33c93.c
@@ -371,8 +371,8 @@
 	msg[1] = offset;
 }
 
-int
-wd33c93_queuecommand(struct scsi_cmnd *cmd,
+static int
+wd33c93_queuecommand_lck(struct scsi_cmnd *cmd,
 		void (*done)(struct scsi_cmnd *))
 {
 	struct WD33C93_hostdata *hostdata;
@@ -468,6 +468,8 @@
 	return 0;
 }
 
+DEF_SCSI_QCMD(wd33c93_queuecommand)
+
 /*
  * This routine attempts to start a scsi command. If the host_card is
  * already connected, we give up immediately. Otherwise, look through
diff --git a/drivers/scsi/wd33c93.h b/drivers/scsi/wd33c93.h
index 1ed5f3b..3b463d7 100644
--- a/drivers/scsi/wd33c93.h
+++ b/drivers/scsi/wd33c93.h
@@ -343,8 +343,7 @@
 void wd33c93_init (struct Scsi_Host *instance, const wd33c93_regs regs,
          dma_setup_t setup, dma_stop_t stop, int clock_freq);
 int wd33c93_abort (struct scsi_cmnd *cmd);
-int wd33c93_queuecommand (struct scsi_cmnd *cmd,
-		void (*done)(struct scsi_cmnd *));
+int wd33c93_queuecommand (struct Scsi_Host *h, struct scsi_cmnd *cmd);
 void wd33c93_intr (struct Scsi_Host *instance);
 int wd33c93_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int);
 int wd33c93_host_reset (struct scsi_cmnd *);
diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c
index 333580b..db451ae 100644
--- a/drivers/scsi/wd7000.c
+++ b/drivers/scsi/wd7000.c
@@ -1082,7 +1082,7 @@
 	return IRQ_HANDLED;
 }
 
-static int wd7000_queuecommand(struct scsi_cmnd *SCpnt,
+static int wd7000_queuecommand_lck(struct scsi_cmnd *SCpnt,
 		void (*done)(struct scsi_cmnd *))
 {
 	Scb *scb;
@@ -1139,6 +1139,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(wd7000_queuecommand)
+
 static int wd7000_diagnostics(Adapter * host, int code)
 {
 	static IcbDiag icb = { ICB_OP_DIAGNOSTICS };
diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index fa62578..bcc31f2 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -18,7 +18,6 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/major.h>
-#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index c4ea146..9ffa5be 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -29,7 +29,6 @@
 #include <linux/console.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/serial.h> /* for serial_state and serial_icounter_struct */
 #include <linux/serial_core.h>
diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c
index 09615b5..3f5e387 100644
--- a/drivers/sh/clk/core.c
+++ b/drivers/sh/clk/core.c
@@ -418,8 +418,11 @@
 		list_add(&clk->sibling, &root_clks);
 
 	list_add(&clk->node, &clock_list);
+
+#ifdef CONFIG_SH_CLK_CPG_LEGACY
 	if (clk->ops && clk->ops->init)
 		clk->ops->init(clk);
+#endif
 
 out_unlock:
 	mutex_unlock(&clock_list_sem);
@@ -455,19 +458,13 @@
 
 int clk_set_rate(struct clk *clk, unsigned long rate)
 {
-	return clk_set_rate_ex(clk, rate, 0);
-}
-EXPORT_SYMBOL_GPL(clk_set_rate);
-
-int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id)
-{
 	int ret = -EOPNOTSUPP;
 	unsigned long flags;
 
 	spin_lock_irqsave(&clock_lock, flags);
 
 	if (likely(clk->ops && clk->ops->set_rate)) {
-		ret = clk->ops->set_rate(clk, rate, algo_id);
+		ret = clk->ops->set_rate(clk, rate);
 		if (ret != 0)
 			goto out_unlock;
 	} else {
@@ -485,7 +482,7 @@
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(clk_set_rate_ex);
+EXPORT_SYMBOL_GPL(clk_set_rate);
 
 int clk_set_parent(struct clk *clk, struct clk *parent)
 {
@@ -571,7 +568,7 @@
 					*best_freq = freq_max;
 			}
 
-			pr_debug("too low freq %lu, error %lu\n", freq->frequency,
+			pr_debug("too low freq %u, error %lu\n", freq->frequency,
 				 target - freq_max);
 
 			if (!error)
@@ -591,7 +588,7 @@
 					*best_freq = freq_min;
 			}
 
-			pr_debug("too high freq %lu, error %lu\n", freq->frequency,
+			pr_debug("too high freq %u, error %lu\n", freq->frequency,
 				 freq_min - target);
 
 			if (!error)
@@ -653,8 +650,7 @@
 					clkp->ops->set_parent(clkp,
 						clkp->parent);
 				if (likely(clkp->ops->set_rate))
-					clkp->ops->set_rate(clkp,
-						rate, NO_CHANGE);
+					clkp->ops->set_rate(clkp, rate);
 				else if (likely(clkp->ops->recalc))
 					clkp->rate = clkp->ops->recalc(clkp);
 			}
diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c
index 3aea5f0..6172335 100644
--- a/drivers/sh/clk/cpg.c
+++ b/drivers/sh/clk/cpg.c
@@ -110,8 +110,7 @@
 	return 0;
 }
 
-static int sh_clk_div6_set_rate(struct clk *clk,
-				unsigned long rate, int algo_id)
+static int sh_clk_div6_set_rate(struct clk *clk, unsigned long rate)
 {
 	unsigned long value;
 	int idx;
@@ -132,7 +131,7 @@
 	unsigned long value;
 	int ret;
 
-	ret = sh_clk_div6_set_rate(clk, clk->rate, 0);
+	ret = sh_clk_div6_set_rate(clk, clk->rate);
 	if (ret == 0) {
 		value = __raw_readl(clk->enable_reg);
 		value &= ~0x100; /* clear stop bit to enable clock */
@@ -253,7 +252,7 @@
 	return 0;
 }
 
-static int sh_clk_div4_set_rate(struct clk *clk, unsigned long rate, int algo_id)
+static int sh_clk_div4_set_rate(struct clk *clk, unsigned long rate)
 {
 	struct clk_div4_table *d4t = clk->priv;
 	unsigned long value;
diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
index e5bf5d3..4e0ff71 100644
--- a/drivers/sh/intc/virq.c
+++ b/drivers/sh/intc/virq.c
@@ -215,7 +215,7 @@
 		entry = radix_tree_deref_slot((void **)entries[i]);
 		if (unlikely(!entry))
 			continue;
-		if (unlikely(entry == RADIX_TREE_RETRY))
+		if (radix_tree_deref_retry(entry))
 			goto restart;
 
 		irq = create_irq();
diff --git a/drivers/staging/easycap/easycap.h b/drivers/staging/easycap/easycap.h
index f3c827e..25961c2 100644
--- a/drivers/staging/easycap/easycap.h
+++ b/drivers/staging/easycap/easycap.h
@@ -77,7 +77,6 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kref.h>
-#include <linux/smp_lock.h>
 #include <linux/usb.h>
 #include <linux/uaccess.h>
 
diff --git a/drivers/staging/hv/storvsc_drv.c b/drivers/staging/hv/storvsc_drv.c
index 41d9acf..6f8d67d 100644
--- a/drivers/staging/hv/storvsc_drv.c
+++ b/drivers/staging/hv/storvsc_drv.c
@@ -72,8 +72,7 @@
 
 /* Static decl */
 static int storvsc_probe(struct device *dev);
-static int storvsc_queuecommand(struct scsi_cmnd *scmnd,
-				void (*done)(struct scsi_cmnd *));
+static int storvsc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd);
 static int storvsc_device_alloc(struct scsi_device *);
 static int storvsc_device_configure(struct scsi_device *);
 static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd);
@@ -595,7 +594,7 @@
 /*
  * storvsc_queuecommand - Initiate command processing
  */
-static int storvsc_queuecommand(struct scsi_cmnd *scmnd,
+static int storvsc_queuecommand_lck(struct scsi_cmnd *scmnd,
 				void (*done)(struct scsi_cmnd *))
 {
 	int ret;
@@ -783,6 +782,8 @@
 	return ret;
 }
 
+static DEF_SCSI_QCMD(storvsc_queuecommand)
+
 static int storvsc_merge_bvec(struct request_queue *q,
 			      struct bvec_merge_data *bmd, struct bio_vec *bvec)
 {
diff --git a/drivers/staging/intel_sst/intel_sst_app_interface.c b/drivers/staging/intel_sst/intel_sst_app_interface.c
index 9618c79..9914400 100644
--- a/drivers/staging/intel_sst/intel_sst_app_interface.c
+++ b/drivers/staging/intel_sst/intel_sst_app_interface.c
@@ -34,7 +34,6 @@
 #include <linux/uaccess.h>
 #include <linux/firmware.h>
 #include <linux/ioctl.h>
-#include <linux/smp_lock.h>
 #ifdef CONFIG_MRST_RAR_HANDLER
 #include <linux/rar_register.h>
 #include "../../../drivers/staging/memrar/memrar.h"
diff --git a/drivers/staging/keucr/scsiglue.c b/drivers/staging/keucr/scsiglue.c
index a267140..da4f42a 100644
--- a/drivers/staging/keucr/scsiglue.c
+++ b/drivers/staging/keucr/scsiglue.c
@@ -87,7 +87,7 @@
 
 /* This is always called with scsi_lock(host) held */
 //----- queuecommand() ---------------------
-static int queuecommand(struct scsi_cmnd *srb, void (*done)(struct scsi_cmnd *))
+static int queuecommand_lck(struct scsi_cmnd *srb, void (*done)(struct scsi_cmnd *))
 {
 	struct us_data *us = host_to_us(srb->device->host);
 
@@ -117,6 +117,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(queuecommand)
+
 /***********************************************************************
  * Error handling functions
  ***********************************************************************/
diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c
index 75aa7a36..4ca45ec 100644
--- a/drivers/staging/olpc_dcon/olpc_dcon.c
+++ b/drivers/staging/olpc_dcon/olpc_dcon.c
@@ -17,7 +17,6 @@
 #include <linux/console.h>
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
-#include <linux/i2c-id.h>
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <linux/interrupt.h>
@@ -733,7 +732,6 @@
  edev:
 	platform_device_unregister(dcon_device);
 	dcon_device = NULL;
-	i2c_set_clientdata(client, NULL);
  eirq:
 	free_irq(DCON_IRQ, &dcon_driver);
  einit:
@@ -757,8 +755,6 @@
 		platform_device_unregister(dcon_device);
 	cancel_work_sync(&dcon_work);
 
-	i2c_set_clientdata(client, NULL);
-
 	return 0;
 }
 
diff --git a/drivers/staging/rtl8712/osdep_service.h b/drivers/staging/rtl8712/osdep_service.h
index d1674cd..831d81e 100644
--- a/drivers/staging/rtl8712/osdep_service.h
+++ b/drivers/staging/rtl8712/osdep_service.h
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/kref.h>
-#include <linux/smp_lock.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/usb.h>
diff --git a/drivers/staging/speakup/buffers.c b/drivers/staging/speakup/buffers.c
index b7b60d5..a2db956 100644
--- a/drivers/staging/speakup/buffers.c
+++ b/drivers/staging/speakup/buffers.c
@@ -1,5 +1,4 @@
 #include <linux/console.h>
-#include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 
diff --git a/drivers/staging/stradis/Kconfig b/drivers/staging/stradis/Kconfig
index 92e8911..02f0fc5 100644
--- a/drivers/staging/stradis/Kconfig
+++ b/drivers/staging/stradis/Kconfig
@@ -1,6 +1,6 @@
 config VIDEO_STRADIS
         tristate "Stradis 4:2:2 MPEG-2 video driver (DEPRECATED)"
-        depends on EXPERIMENTAL && PCI && VIDEO_V4L1 && VIRT_TO_BUS
+        depends on EXPERIMENTAL && PCI && VIDEO_V4L1 && VIRT_TO_BUS && BKL
         help
           Say Y here to enable support for the Stradis 4:2:2 MPEG-2 video
           driver for PCI.  There is a product page at
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index eaa5d3e..c556ed9 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -554,7 +554,7 @@
 #ifdef CONFIG_INPUT
 
 /* Simple translation table for the SysRq keys */
-static const unsigned char sysrq_xlate[KEY_MAX + 1] =
+static const unsigned char sysrq_xlate[KEY_CNT] =
         "\000\0331234567890-=\177\t"                    /* 0x00 - 0x0f */
         "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */
         "dfghjkl;'`\000\\zxcv"                          /* 0x20 - 0x2f */
@@ -563,53 +563,129 @@
         "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */
         "\r\000/";                                      /* 0x60 - 0x6f */
 
-static bool sysrq_down;
-static int sysrq_alt_use;
-static int sysrq_alt;
-static DEFINE_SPINLOCK(sysrq_event_lock);
+struct sysrq_state {
+	struct input_handle handle;
+	struct work_struct reinject_work;
+	unsigned long key_down[BITS_TO_LONGS(KEY_CNT)];
+	unsigned int alt;
+	unsigned int alt_use;
+	bool active;
+	bool need_reinject;
+};
 
-static bool sysrq_filter(struct input_handle *handle, unsigned int type,
-		         unsigned int code, int value)
+static void sysrq_reinject_alt_sysrq(struct work_struct *work)
 {
+	struct sysrq_state *sysrq =
+			container_of(work, struct sysrq_state, reinject_work);
+	struct input_handle *handle = &sysrq->handle;
+	unsigned int alt_code = sysrq->alt_use;
+
+	if (sysrq->need_reinject) {
+		/* Simulate press and release of Alt + SysRq */
+		input_inject_event(handle, EV_KEY, alt_code, 1);
+		input_inject_event(handle, EV_KEY, KEY_SYSRQ, 1);
+		input_inject_event(handle, EV_SYN, SYN_REPORT, 1);
+
+		input_inject_event(handle, EV_KEY, KEY_SYSRQ, 0);
+		input_inject_event(handle, EV_KEY, alt_code, 0);
+		input_inject_event(handle, EV_SYN, SYN_REPORT, 1);
+	}
+}
+
+static bool sysrq_filter(struct input_handle *handle,
+			 unsigned int type, unsigned int code, int value)
+{
+	struct sysrq_state *sysrq = handle->private;
+	bool was_active = sysrq->active;
 	bool suppress;
 
-	/* We are called with interrupts disabled, just take the lock */
-	spin_lock(&sysrq_event_lock);
+	switch (type) {
 
-	if (type != EV_KEY)
-		goto out;
-
-	switch (code) {
-
-	case KEY_LEFTALT:
-	case KEY_RIGHTALT:
-		if (value)
-			sysrq_alt = code;
-		else {
-			if (sysrq_down && code == sysrq_alt_use)
-				sysrq_down = false;
-
-			sysrq_alt = 0;
-		}
+	case EV_SYN:
+		suppress = false;
 		break;
 
-	case KEY_SYSRQ:
-		if (value == 1 && sysrq_alt) {
-			sysrq_down = true;
-			sysrq_alt_use = sysrq_alt;
+	case EV_KEY:
+		switch (code) {
+
+		case KEY_LEFTALT:
+		case KEY_RIGHTALT:
+			if (!value) {
+				/* One of ALTs is being released */
+				if (sysrq->active && code == sysrq->alt_use)
+					sysrq->active = false;
+
+				sysrq->alt = KEY_RESERVED;
+
+			} else if (value != 2) {
+				sysrq->alt = code;
+				sysrq->need_reinject = false;
+			}
+			break;
+
+		case KEY_SYSRQ:
+			if (value == 1 && sysrq->alt != KEY_RESERVED) {
+				sysrq->active = true;
+				sysrq->alt_use = sysrq->alt;
+				/*
+				 * If nothing else will be pressed we'll need
+				 * to * re-inject Alt-SysRq keysroke.
+				 */
+				sysrq->need_reinject = true;
+			}
+
+			/*
+			 * Pretend that sysrq was never pressed at all. This
+			 * is needed to properly handle KGDB which will try
+			 * to release all keys after exiting debugger. If we
+			 * do not clear key bit it KGDB will end up sending
+			 * release events for Alt and SysRq, potentially
+			 * triggering print screen function.
+			 */
+			if (sysrq->active)
+				clear_bit(KEY_SYSRQ, handle->dev->key);
+
+			break;
+
+		default:
+			if (sysrq->active && value && value != 2) {
+				sysrq->need_reinject = false;
+				__handle_sysrq(sysrq_xlate[code], true);
+			}
+			break;
+		}
+
+		suppress = sysrq->active;
+
+		if (!sysrq->active) {
+			/*
+			 * If we are not suppressing key presses keep track of
+			 * keyboard state so we can release keys that have been
+			 * pressed before entering SysRq mode.
+			 */
+			if (value)
+				set_bit(code, sysrq->key_down);
+			else
+				clear_bit(code, sysrq->key_down);
+
+			if (was_active)
+				schedule_work(&sysrq->reinject_work);
+
+		} else if (value == 0 &&
+			   test_and_clear_bit(code, sysrq->key_down)) {
+			/*
+			 * Pass on release events for keys that was pressed before
+			 * entering SysRq mode.
+			 */
+			suppress = false;
 		}
 		break;
 
 	default:
-		if (sysrq_down && value && value != 2)
-			__handle_sysrq(sysrq_xlate[code], true);
+		suppress = sysrq->active;
 		break;
 	}
 
-out:
-	suppress = sysrq_down;
-	spin_unlock(&sysrq_event_lock);
-
 	return suppress;
 }
 
@@ -617,28 +693,28 @@
 			 struct input_dev *dev,
 			 const struct input_device_id *id)
 {
-	struct input_handle *handle;
+	struct sysrq_state *sysrq;
 	int error;
 
-	sysrq_down = false;
-	sysrq_alt = 0;
-
-	handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
-	if (!handle)
+	sysrq = kzalloc(sizeof(struct sysrq_state), GFP_KERNEL);
+	if (!sysrq)
 		return -ENOMEM;
 
-	handle->dev = dev;
-	handle->handler = handler;
-	handle->name = "sysrq";
+	INIT_WORK(&sysrq->reinject_work, sysrq_reinject_alt_sysrq);
 
-	error = input_register_handle(handle);
+	sysrq->handle.dev = dev;
+	sysrq->handle.handler = handler;
+	sysrq->handle.name = "sysrq";
+	sysrq->handle.private = sysrq;
+
+	error = input_register_handle(&sysrq->handle);
 	if (error) {
 		pr_err("Failed to register input sysrq handler, error %d\n",
 			error);
 		goto err_free;
 	}
 
-	error = input_open_device(handle);
+	error = input_open_device(&sysrq->handle);
 	if (error) {
 		pr_err("Failed to open input device, error %d\n", error);
 		goto err_unregister;
@@ -647,17 +723,20 @@
 	return 0;
 
  err_unregister:
-	input_unregister_handle(handle);
+	input_unregister_handle(&sysrq->handle);
  err_free:
-	kfree(handle);
+	kfree(sysrq);
 	return error;
 }
 
 static void sysrq_disconnect(struct input_handle *handle)
 {
+	struct sysrq_state *sysrq = handle->private;
+
 	input_close_device(handle);
+	cancel_work_sync(&sysrq->reinject_work);
 	input_unregister_handle(handle);
-	kfree(handle);
+	kfree(sysrq);
 }
 
 /*
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index ea071a5..44447f5 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -2301,7 +2301,7 @@
 	return ret;
 }
 
-static DEVICE_ATTR(stat_status, S_IWUGO | S_IRUGO, read_status, reboot);
+static DEVICE_ATTR(stat_status, S_IWUSR | S_IRUGO, read_status, reboot);
 
 static ssize_t read_human_status(struct device *dev,
 			struct device_attribute *attr, char *buf)
@@ -2364,8 +2364,7 @@
 	return ret;
 }
 
-static DEVICE_ATTR(stat_human_status, S_IWUGO | S_IRUGO,
-				read_human_status, NULL);
+static DEVICE_ATTR(stat_human_status, S_IRUGO, read_human_status, NULL);
 
 static ssize_t read_delin(struct device *dev, struct device_attribute *attr,
 		char *buf)
@@ -2397,7 +2396,7 @@
 	return ret;
 }
 
-static DEVICE_ATTR(stat_delin, S_IWUGO | S_IRUGO, read_delin, NULL);
+static DEVICE_ATTR(stat_delin, S_IRUGO, read_delin, NULL);
 
 #define UEA_ATTR(name, reset)					\
 								\
diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index ddb4dc9..a3d2e23 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -54,7 +54,6 @@
 #include <linux/gfp.h>
 #include <linux/poll.h>
 #include <linux/usb.h>
-#include <linux/smp_lock.h>
 #include <linux/usbdevice_fs.h>
 #include <linux/usb/hcd.h>
 #include <linux/mutex.h>
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 045bb4b..a7131ad 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -37,7 +37,6 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/signal.h>
 #include <linux/poll.h>
 #include <linux/module.h>
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 9fe34fb..cf6a542 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -19,7 +19,6 @@
 #include <linux/errno.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/usb.h>
 
 #include "usb.h"
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 9819a4c..b690aa3 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -39,7 +39,6 @@
 #include <linux/parser.h>
 #include <linux/notifier.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 #include <linux/usb/hcd.h>
 #include <asm/byteorder.h>
 #include "usb.h"
diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index b5e20e8..717ff65 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -2017,7 +2017,7 @@
 			}
 		} else {
 			/* gpio_request fail so use -EINVAL for gpio_is_valid */
-			ubc->vbus_pin = -EINVAL;
+			udc->vbus_pin = -EINVAL;
 		}
 	}
 
diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index 4a830df..484c5ba 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -30,7 +30,6 @@
 #include <linux/blkdev.h>
 #include <linux/pagemap.h>
 #include <asm/unaligned.h>
-#include <linux/smp_lock.h>
 
 #include <linux/usb/composite.h>
 #include <linux/usb/functionfs.h>
diff --git a/drivers/usb/gadget/f_hid.c b/drivers/usb/gadget/f_hid.c
index 4f891ed..598e7e2 100644
--- a/drivers/usb/gadget/f_hid.c
+++ b/drivers/usb/gadget/f_hid.c
@@ -25,7 +25,6 @@
 #include <linux/cdev.h>
 #include <linux/mutex.h>
 #include <linux/poll.h>
-#include <linux/smp_lock.h>
 #include <linux/uaccess.h>
 #include <linux/wait.h>
 #include <linux/usb/g_hid.h>
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 86afdc7..6e25996 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -1067,7 +1067,7 @@
 						    &debug_registers_fops))
 		goto file_error;
 
-	if (!debugfs_create_file("lpm", S_IRUGO|S_IWUGO, ehci->debug_dir, bus,
+	if (!debugfs_create_file("lpm", S_IRUGO|S_IWUSR, ehci->debug_dir, bus,
 						    &debug_lpm_fops))
 		goto file_error;
 
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 502a7e6..e906280 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1063,10 +1063,11 @@
 				tmp && tmp != qh;
 				tmp = tmp->qh_next.qh)
 			continue;
-		/* periodic qh self-unlinks on empty */
-		if (!tmp)
-			goto nogood;
-		unlink_async (ehci, qh);
+		/* periodic qh self-unlinks on empty, and a COMPLETING qh
+		 * may already be unlinked.
+		 */
+		if (tmp)
+			unlink_async(ehci, qh);
 		/* FALL THROUGH */
 	case QH_STATE_UNLINK:		/* wait for hw to finish? */
 	case QH_STATE_UNLINK_WAIT:
@@ -1083,7 +1084,6 @@
 		}
 		/* else FALL THROUGH */
 	default:
-nogood:
 		/* caller was supposed to have unlinked any requests;
 		 * that's not our job.  just leak this memory.
 		 */
diff --git a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c
index d36e4e7..12f70c3 100644
--- a/drivers/usb/host/ehci-mem.c
+++ b/drivers/usb/host/ehci-mem.c
@@ -141,6 +141,10 @@
 		qh_put (ehci->async);
 	ehci->async = NULL;
 
+	if (ehci->dummy)
+		qh_put(ehci->dummy);
+	ehci->dummy = NULL;
+
 	/* DMA consistent memory and pools */
 	if (ehci->qtd_pool)
 		dma_pool_destroy (ehci->qtd_pool);
@@ -227,8 +231,26 @@
 	if (ehci->periodic == NULL) {
 		goto fail;
 	}
-	for (i = 0; i < ehci->periodic_size; i++)
-		ehci->periodic [i] = EHCI_LIST_END(ehci);
+
+	if (ehci->use_dummy_qh) {
+		struct ehci_qh_hw	*hw;
+		ehci->dummy = ehci_qh_alloc(ehci, flags);
+		if (!ehci->dummy)
+			goto fail;
+
+		hw = ehci->dummy->hw;
+		hw->hw_next = EHCI_LIST_END(ehci);
+		hw->hw_qtd_next = EHCI_LIST_END(ehci);
+		hw->hw_alt_next = EHCI_LIST_END(ehci);
+		hw->hw_token &= ~QTD_STS_ACTIVE;
+		ehci->dummy->hw = hw;
+
+		for (i = 0; i < ehci->periodic_size; i++)
+			ehci->periodic[i] = ehci->dummy->qh_dma;
+	} else {
+		for (i = 0; i < ehci->periodic_size; i++)
+			ehci->periodic[i] = EHCI_LIST_END(ehci);
+	}
 
 	/* software shadow of hardware table */
 	ehci->pshadow = kcalloc(ehci->periodic_size, sizeof(void *), flags);
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index a1e8d27..01bb72b 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -103,6 +103,19 @@
 	if (retval)
 		return retval;
 
+	if ((pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x7808) ||
+	    (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x4396)) {
+		/* EHCI controller on AMD SB700/SB800/Hudson-2/3 platforms may
+		 * read/write memory space which does not belong to it when
+		 * there is NULL pointer with T-bit set to 1 in the frame list
+		 * table. To avoid the issue, the frame list link pointer
+		 * should always contain a valid pointer to a inactive qh.
+		 */
+		ehci->use_dummy_qh = 1;
+		ehci_info(ehci, "applying AMD SB700/SB800/Hudson-2/3 EHCI "
+				"dummy qh workaround\n");
+	}
+
 	/* data structure init */
 	retval = ehci_init(hcd);
 	if (retval)
diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index a92526d..d9f78eb 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -98,7 +98,14 @@
 	 */
 	*prev_p = *periodic_next_shadow(ehci, &here,
 			Q_NEXT_TYPE(ehci, *hw_p));
-	*hw_p = *shadow_next_periodic(ehci, &here, Q_NEXT_TYPE(ehci, *hw_p));
+
+	if (!ehci->use_dummy_qh ||
+	    *shadow_next_periodic(ehci, &here, Q_NEXT_TYPE(ehci, *hw_p))
+			!= EHCI_LIST_END(ehci))
+		*hw_p = *shadow_next_periodic(ehci, &here,
+				Q_NEXT_TYPE(ehci, *hw_p));
+	else
+		*hw_p = ehci->dummy->qh_dma;
 }
 
 /* how many of the uframe's 125 usecs are allocated? */
@@ -2335,7 +2342,11 @@
 				 * pointer for much longer, if at all.
 				 */
 				*q_p = q.itd->itd_next;
-				*hw_p = q.itd->hw_next;
+				if (!ehci->use_dummy_qh ||
+				    q.itd->hw_next != EHCI_LIST_END(ehci))
+					*hw_p = q.itd->hw_next;
+				else
+					*hw_p = ehci->dummy->qh_dma;
 				type = Q_NEXT_TYPE(ehci, q.itd->hw_next);
 				wmb();
 				modified = itd_complete (ehci, q.itd);
@@ -2368,7 +2379,11 @@
 				 * URB completion.
 				 */
 				*q_p = q.sitd->sitd_next;
-				*hw_p = q.sitd->hw_next;
+				if (!ehci->use_dummy_qh ||
+				    q.sitd->hw_next != EHCI_LIST_END(ehci))
+					*hw_p = q.sitd->hw_next;
+				else
+					*hw_p = ehci->dummy->qh_dma;
 				type = Q_NEXT_TYPE(ehci, q.sitd->hw_next);
 				wmb();
 				modified = sitd_complete (ehci, q.sitd);
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index bde823f..ba8eab3 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -73,6 +73,7 @@
 
 	/* async schedule support */
 	struct ehci_qh		*async;
+	struct ehci_qh		*dummy;		/* For AMD quirk use */
 	struct ehci_qh		*reclaim;
 	unsigned		scanning : 1;
 
@@ -131,6 +132,7 @@
 	unsigned		need_io_watchdog:1;
 	unsigned		broken_periodic:1;
 	unsigned		fs_i_thresh:1;	/* Intel iso scheduling */
+	unsigned		use_dummy_qh:1;	/* AMD Frame List table quirk*/
 
 	/* required for usb32 quirk */
 	#define OHCI_CTRL_HCFS          (3 << 6)
diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
index 8196fa1..43a39eb 100644
--- a/drivers/usb/host/isp1362-hcd.c
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -70,7 +70,6 @@
 #include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/list.h>
@@ -2684,7 +2683,7 @@
 	return 0;
 }
 
-static int __init isp1362_probe(struct platform_device *pdev)
+static int __devinit isp1362_probe(struct platform_device *pdev)
 {
 	struct usb_hcd *hcd;
 	struct isp1362_hcd *isp1362_hcd;
diff --git a/drivers/usb/host/uhci-debug.c b/drivers/usb/host/uhci-debug.c
index 6e7fb5f..ee60cd3 100644
--- a/drivers/usb/host/uhci-debug.c
+++ b/drivers/usb/host/uhci-debug.c
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/debugfs.h>
-#include <linux/smp_lock.h>
 #include <asm/io.h>
 
 #include "uhci-hcd.h"
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 2027706..d178761 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1045,7 +1045,7 @@
 	if (udev->speed == USB_SPEED_SUPER)
 		return ep->ss_ep_comp.wBytesPerInterval;
 
-	max_packet = ep->desc.wMaxPacketSize & 0x3ff;
+	max_packet = GET_MAX_PACKET(ep->desc.wMaxPacketSize);
 	max_burst = (ep->desc.wMaxPacketSize & 0x1800) >> 11;
 	/* A 0 in max burst means 1 transfer per ESIT */
 	return max_packet * (max_burst + 1);
@@ -1135,7 +1135,7 @@
 		/* Fall through */
 	case USB_SPEED_FULL:
 	case USB_SPEED_LOW:
-		max_packet = ep->desc.wMaxPacketSize & 0x3ff;
+		max_packet = GET_MAX_PACKET(ep->desc.wMaxPacketSize);
 		ep_ctx->ep_info2 |= MAX_PACKET(max_packet);
 		break;
 	default:
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 9f3115e..df558f6 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2104,7 +2104,6 @@
 
 	if (!(status & STS_EINT)) {
 		spin_unlock(&xhci->lock);
-		xhci_warn(xhci, "Spurious interrupt.\n");
 		return IRQ_NONE;
 	}
 	xhci_dbg(xhci, "op reg status = %08x\n", status);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 5d7d4e9..06fca08 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -577,6 +577,65 @@
 	xhci_write_64(xhci, xhci->s3.erst_base, &xhci->ir_set->erst_base);
 }
 
+static void xhci_set_cmd_ring_deq(struct xhci_hcd *xhci)
+{
+	u64	val_64;
+
+	/* step 2: initialize command ring buffer */
+	val_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
+	val_64 = (val_64 & (u64) CMD_RING_RSVD_BITS) |
+		(xhci_trb_virt_to_dma(xhci->cmd_ring->deq_seg,
+				      xhci->cmd_ring->dequeue) &
+		 (u64) ~CMD_RING_RSVD_BITS) |
+		xhci->cmd_ring->cycle_state;
+	xhci_dbg(xhci, "// Setting command ring address to 0x%llx\n",
+			(long unsigned long) val_64);
+	xhci_write_64(xhci, val_64, &xhci->op_regs->cmd_ring);
+}
+
+/*
+ * The whole command ring must be cleared to zero when we suspend the host.
+ *
+ * The host doesn't save the command ring pointer in the suspend well, so we
+ * need to re-program it on resume.  Unfortunately, the pointer must be 64-byte
+ * aligned, because of the reserved bits in the command ring dequeue pointer
+ * register.  Therefore, we can't just set the dequeue pointer back in the
+ * middle of the ring (TRBs are 16-byte aligned).
+ */
+static void xhci_clear_command_ring(struct xhci_hcd *xhci)
+{
+	struct xhci_ring *ring;
+	struct xhci_segment *seg;
+
+	ring = xhci->cmd_ring;
+	seg = ring->deq_seg;
+	do {
+		memset(seg->trbs, 0, SEGMENT_SIZE);
+		seg = seg->next;
+	} while (seg != ring->deq_seg);
+
+	/* Reset the software enqueue and dequeue pointers */
+	ring->deq_seg = ring->first_seg;
+	ring->dequeue = ring->first_seg->trbs;
+	ring->enq_seg = ring->deq_seg;
+	ring->enqueue = ring->dequeue;
+
+	/*
+	 * Ring is now zeroed, so the HW should look for change of ownership
+	 * when the cycle bit is set to 1.
+	 */
+	ring->cycle_state = 1;
+
+	/*
+	 * Reset the hardware dequeue pointer.
+	 * Yes, this will need to be re-written after resume, but we're paranoid
+	 * and want to make sure the hardware doesn't access bogus memory
+	 * because, say, the BIOS or an SMI started the host without changing
+	 * the command ring pointers.
+	 */
+	xhci_set_cmd_ring_deq(xhci);
+}
+
 /*
  * Stop HC (not bus-specific)
  *
@@ -604,6 +663,7 @@
 		spin_unlock_irq(&xhci->lock);
 		return -ETIMEDOUT;
 	}
+	xhci_clear_command_ring(xhci);
 
 	/* step 3: save registers */
 	xhci_save_registers(xhci);
@@ -635,7 +695,6 @@
 	u32			command, temp = 0;
 	struct usb_hcd		*hcd = xhci_to_hcd(xhci);
 	struct pci_dev		*pdev = to_pci_dev(hcd->self.controller);
-	u64	val_64;
 	int	old_state, retval;
 
 	old_state = hcd->state;
@@ -648,15 +707,7 @@
 		/* step 1: restore register */
 		xhci_restore_registers(xhci);
 		/* step 2: initialize command ring buffer */
-		val_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
-		val_64 = (val_64 & (u64) CMD_RING_RSVD_BITS) |
-			 (xhci_trb_virt_to_dma(xhci->cmd_ring->deq_seg,
-					       xhci->cmd_ring->dequeue) &
-			 (u64) ~CMD_RING_RSVD_BITS) |
-			 xhci->cmd_ring->cycle_state;
-		xhci_dbg(xhci, "// Setting command ring address to 0x%llx\n",
-				(long unsigned long) val_64);
-		xhci_write_64(xhci, val_64, &xhci->op_regs->cmd_ring);
+		xhci_set_cmd_ring_deq(xhci);
 		/* step 3: restore state and start state*/
 		/* step 3: set CRS flag */
 		command = xhci_readl(xhci, &xhci->op_regs->command);
@@ -714,6 +765,7 @@
 		return retval;
 	}
 
+	spin_unlock_irq(&xhci->lock);
 	/* Re-setup MSI-X */
 	if (hcd->irq)
 		free_irq(hcd->irq, hcd);
@@ -736,6 +788,7 @@
 		hcd->irq = pdev->irq;
 	}
 
+	spin_lock_irq(&xhci->lock);
 	/* step 4: set Run/Stop bit */
 	command = xhci_readl(xhci, &xhci->op_regs->command);
 	command |= CMD_RUN;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 93d3bf4..85e6564 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -621,6 +621,11 @@
 #define MAX_PACKET_MASK		(0xffff << 16)
 #define MAX_PACKET_DECODED(p)	(((p) >> 16) & 0xffff)
 
+/* Get max packet size from ep desc. Bit 10..0 specify the max packet size.
+ * USB2.0 spec 9.6.6.
+ */
+#define GET_MAX_PACKET(p)	((p) & 0x7ff)
+
 /* tx_info bitmasks */
 #define AVG_TRB_LENGTH_FOR_EP(p)	((p) & 0xffff)
 #define MAX_ESIT_PAYLOAD_FOR_EP(p)	(((p) & 0xffff) << 16)
diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index 5a47805..c90c89d 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -364,7 +364,7 @@
 }
 
 static int
-mts_scsi_queuecommand(struct scsi_cmnd *srb, mts_scsi_cmnd_callback callback);
+mts_scsi_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *srb);
 
 static void mts_transfer_cleanup( struct urb *transfer );
 static void mts_do_sg(struct urb * transfer);
@@ -573,7 +573,7 @@
 
 
 static int
-mts_scsi_queuecommand(struct scsi_cmnd *srb, mts_scsi_cmnd_callback callback)
+mts_scsi_queuecommand_lck(struct scsi_cmnd *srb, mts_scsi_cmnd_callback callback)
 {
 	struct mts_desc* desc = (struct mts_desc*)(srb->device->host->hostdata[0]);
 	int err = 0;
@@ -626,6 +626,8 @@
 	return err;
 }
 
+static DEF_SCSI_QCMD(mts_scsi_queuecommand)
+
 static struct scsi_host_template mts_scsi_host_template = {
 	.module			= THIS_MODULE,
 	.name			= "microtekX6",
diff --git a/drivers/usb/misc/cypress_cy7c63.c b/drivers/usb/misc/cypress_cy7c63.c
index 2f43c57..9251773 100644
--- a/drivers/usb/misc/cypress_cy7c63.c
+++ b/drivers/usb/misc/cypress_cy7c63.c
@@ -196,11 +196,9 @@
 	return read_port(dev, attr, buf, 1, CYPRESS_READ_PORT_ID1);
 }
 
-static DEVICE_ATTR(port0, S_IWUGO | S_IRUGO,
-		   get_port0_handler, set_port0_handler);
+static DEVICE_ATTR(port0, S_IRUGO | S_IWUSR, get_port0_handler, set_port0_handler);
 
-static DEVICE_ATTR(port1, S_IWUGO | S_IRUGO,
-		   get_port1_handler, set_port1_handler);
+static DEVICE_ATTR(port1, S_IRUGO | S_IWUSR, get_port1_handler, set_port1_handler);
 
 
 static int cypress_probe(struct usb_interface *interface,
diff --git a/drivers/usb/misc/trancevibrator.c b/drivers/usb/misc/trancevibrator.c
index d77aba4..f63776a 100644
--- a/drivers/usb/misc/trancevibrator.c
+++ b/drivers/usb/misc/trancevibrator.c
@@ -86,7 +86,7 @@
 	return count;
 }
 
-static DEVICE_ATTR(speed, S_IWUGO | S_IRUGO, show_speed, set_speed);
+static DEVICE_ATTR(speed, S_IRUGO | S_IWUSR, show_speed, set_speed);
 
 static int tv_probe(struct usb_interface *interface,
 		    const struct usb_device_id *id)
diff --git a/drivers/usb/misc/usbled.c b/drivers/usb/misc/usbled.c
index 63da2c3..c96f51d 100644
--- a/drivers/usb/misc/usbled.c
+++ b/drivers/usb/misc/usbled.c
@@ -94,7 +94,7 @@
 	change_color(led);						\
 	return count;							\
 }									\
-static DEVICE_ATTR(value, S_IWUGO | S_IRUGO, show_##value, set_##value);
+static DEVICE_ATTR(value, S_IRUGO | S_IWUSR, show_##value, set_##value);
 show_set(blue);
 show_set(red);
 show_set(green);
diff --git a/drivers/usb/misc/usbsevseg.c b/drivers/usb/misc/usbsevseg.c
index de8ef94..417b8f2 100644
--- a/drivers/usb/misc/usbsevseg.c
+++ b/drivers/usb/misc/usbsevseg.c
@@ -192,7 +192,7 @@
 								\
 	return count;						\
 }								\
-static DEVICE_ATTR(name, S_IWUGO | S_IRUGO, show_attr_##name, set_attr_##name);
+static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_attr_##name, set_attr_##name);
 
 static ssize_t show_attr_text(struct device *dev,
 	struct device_attribute *attr, char *buf)
@@ -223,7 +223,7 @@
 	return count;
 }
 
-static DEVICE_ATTR(text, S_IWUGO | S_IRUGO, show_attr_text, set_attr_text);
+static DEVICE_ATTR(text, S_IRUGO | S_IWUSR, show_attr_text, set_attr_text);
 
 static ssize_t show_attr_decimals(struct device *dev,
 	struct device_attribute *attr, char *buf)
@@ -272,8 +272,7 @@
 	return count;
 }
 
-static DEVICE_ATTR(decimals, S_IWUGO | S_IRUGO,
-	show_attr_decimals, set_attr_decimals);
+static DEVICE_ATTR(decimals, S_IRUGO | S_IWUSR, show_attr_decimals, set_attr_decimals);
 
 static ssize_t show_attr_textmode(struct device *dev,
 	struct device_attribute *attr, char *buf)
@@ -319,8 +318,7 @@
 	return -EINVAL;
 }
 
-static DEVICE_ATTR(textmode, S_IWUGO | S_IRUGO,
-	show_attr_textmode, set_attr_textmode);
+static DEVICE_ATTR(textmode, S_IRUGO | S_IWUSR, show_attr_textmode, set_attr_textmode);
 
 
 MYDEV_ATTR_SIMPLE_UNSIGNED(powered, update_display_powered);
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 44cb37b..c436e1e 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -15,7 +15,6 @@
 #include <linux/poll.h>
 #include <linux/compat.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 
diff --git a/drivers/usb/mon/mon_stat.c b/drivers/usb/mon/mon_stat.c
index 8ec94f1..e5ce42b 100644
--- a/drivers/usb/mon/mon_stat.c
+++ b/drivers/usb/mon/mon_stat.c
@@ -11,7 +11,6 @@
 #include <linux/slab.h>
 #include <linux/usb.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
 #include "usb_mon.h"
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
index bdc3ea6..9fea482 100644
--- a/drivers/usb/otg/langwell_otg.c
+++ b/drivers/usb/otg/langwell_otg.c
@@ -1896,7 +1896,7 @@
 	}
 	return count;
 }
-static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUGO, get_a_bus_req, set_a_bus_req);
+static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUSR, get_a_bus_req, set_a_bus_req);
 
 static ssize_t
 get_a_bus_drop(struct device *dev, struct device_attribute *attr, char *buf)
@@ -1942,8 +1942,7 @@
 	}
 	return count;
 }
-static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUGO,
-	get_a_bus_drop, set_a_bus_drop);
+static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUSR, get_a_bus_drop, set_a_bus_drop);
 
 static ssize_t
 get_b_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
@@ -1988,7 +1987,7 @@
 	}
 	return count;
 }
-static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUGO, get_b_bus_req, set_b_bus_req);
+static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUSR, get_b_bus_req, set_b_bus_req);
 
 static ssize_t
 set_a_clr_err(struct device *dev, struct device_attribute *attr,
@@ -2012,7 +2011,7 @@
 	}
 	return count;
 }
-static DEVICE_ATTR(a_clr_err, S_IWUGO, NULL, set_a_clr_err);
+static DEVICE_ATTR(a_clr_err, S_IWUSR, NULL, set_a_clr_err);
 
 static struct attribute *inputs_attrs[] = {
 	&dev_attr_a_bus_req.attr,
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index e64da74..861223f 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -21,7 +21,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index a688b1e..689ee1f 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -285,7 +285,7 @@
 
 /* queue a command */
 /* This is always called with scsi_lock(host) held */
-static int queuecommand(struct scsi_cmnd *srb,
+static int queuecommand_lck(struct scsi_cmnd *srb,
 			void (*done)(struct scsi_cmnd *))
 {
 	struct us_data *us = host_to_us(srb->device->host);
@@ -315,6 +315,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(queuecommand)
+
 /***********************************************************************
  * Error handling functions
  ***********************************************************************/
diff --git a/drivers/usb/storage/sierra_ms.c b/drivers/usb/storage/sierra_ms.c
index 57fc2f5..ceba512 100644
--- a/drivers/usb/storage/sierra_ms.c
+++ b/drivers/usb/storage/sierra_ms.c
@@ -121,7 +121,7 @@
 	}
 	return result;
 }
-static DEVICE_ATTR(truinst, S_IWUGO | S_IRUGO, show_truinst, NULL);
+static DEVICE_ATTR(truinst, S_IRUGO, show_truinst, NULL);
 
 int sierra_ms_init(struct us_data *us)
 {
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index d126819..339fac3 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -430,7 +430,7 @@
 	return 0;
 }
 
-static int uas_queuecommand(struct scsi_cmnd *cmnd,
+static int uas_queuecommand_lck(struct scsi_cmnd *cmnd,
 					void (*done)(struct scsi_cmnd *))
 {
 	struct scsi_device *sdev = cmnd->device;
@@ -488,6 +488,8 @@
 	return 0;
 }
 
+static DEF_SCSI_QCMD(uas_queuecommand)
+
 static int uas_eh_abort_handler(struct scsi_cmnd *cmnd)
 {
 	struct scsi_device *sdev = cmnd->device;
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index e207810..0870329 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -197,12 +197,12 @@
 {
 	struct backlight_device *bd = to_backlight_device(dev);
 
-	if (bd->ops->options & BL_CORE_SUSPENDRESUME) {
-		mutex_lock(&bd->ops_lock);
+	mutex_lock(&bd->ops_lock);
+	if (bd->ops && bd->ops->options & BL_CORE_SUSPENDRESUME) {
 		bd->props.state |= BL_CORE_SUSPENDED;
 		backlight_update_status(bd);
-		mutex_unlock(&bd->ops_lock);
 	}
+	mutex_unlock(&bd->ops_lock);
 
 	return 0;
 }
@@ -211,12 +211,12 @@
 {
 	struct backlight_device *bd = to_backlight_device(dev);
 
-	if (bd->ops->options & BL_CORE_SUSPENDRESUME) {
-		mutex_lock(&bd->ops_lock);
+	mutex_lock(&bd->ops_lock);
+	if (bd->ops && bd->ops->options & BL_CORE_SUSPENDRESUME) {
 		bd->props.state &= ~BL_CORE_SUSPENDED;
 		backlight_update_status(bd);
-		mutex_unlock(&bd->ops_lock);
 	}
+	mutex_unlock(&bd->ops_lock);
 
 	return 0;
 }
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 54e32c5..915448e 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -47,7 +47,6 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/screen_info.h>
-#include <linux/smp_lock.h>
 #include <video/vga.h>
 #include <asm/io.h>
 
diff --git a/drivers/video/fbcmap.c b/drivers/video/fbcmap.c
index f53b9f1..affdf3e 100644
--- a/drivers/video/fbcmap.c
+++ b/drivers/video/fbcmap.c
@@ -88,34 +88,48 @@
  *
  */
 
-int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp)
+int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags)
 {
-    int size = len*sizeof(u16);
+	int size = len * sizeof(u16);
+	int ret = -ENOMEM;
 
-    if (cmap->len != len) {
-	fb_dealloc_cmap(cmap);
-	if (!len)
-	    return 0;
-	if (!(cmap->red = kmalloc(size, GFP_ATOMIC)))
-	    goto fail;
-	if (!(cmap->green = kmalloc(size, GFP_ATOMIC)))
-	    goto fail;
-	if (!(cmap->blue = kmalloc(size, GFP_ATOMIC)))
-	    goto fail;
-	if (transp) {
-	    if (!(cmap->transp = kmalloc(size, GFP_ATOMIC)))
+	if (cmap->len != len) {
+		fb_dealloc_cmap(cmap);
+		if (!len)
+			return 0;
+
+		cmap->red = kmalloc(size, flags);
+		if (!cmap->red)
+			goto fail;
+		cmap->green = kmalloc(size, flags);
+		if (!cmap->green)
+			goto fail;
+		cmap->blue = kmalloc(size, flags);
+		if (!cmap->blue)
+			goto fail;
+		if (transp) {
+			cmap->transp = kmalloc(size, flags);
+			if (!cmap->transp)
+				goto fail;
+		} else {
+			cmap->transp = NULL;
+		}
+	}
+	cmap->start = 0;
+	cmap->len = len;
+	ret = fb_copy_cmap(fb_default_cmap(len), cmap);
+	if (ret)
 		goto fail;
-	} else
-	    cmap->transp = NULL;
-    }
-    cmap->start = 0;
-    cmap->len = len;
-    fb_copy_cmap(fb_default_cmap(len), cmap);
-    return 0;
+	return 0;
 
 fail:
-    fb_dealloc_cmap(cmap);
-    return -ENOMEM;
+	fb_dealloc_cmap(cmap);
+	return ret;
+}
+
+int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp)
+{
+	return fb_alloc_cmap_gfp(cmap, len, transp, GFP_ATOMIC);
 }
 
 /**
@@ -250,8 +264,12 @@
 	int rc, size = cmap->len * sizeof(u16);
 	struct fb_cmap umap;
 
+	if (size < 0 || size < cmap->len)
+		return -E2BIG;
+
 	memset(&umap, 0, sizeof(struct fb_cmap));
-	rc = fb_alloc_cmap(&umap, cmap->len, cmap->transp != NULL);
+	rc = fb_alloc_cmap_gfp(&umap, cmap->len, cmap->transp != NULL,
+				GFP_KERNEL);
 	if (rc)
 		return rc;
 	if (copy_from_user(umap.red, cmap->red, size) ||
diff --git a/drivers/video/geode/lxfb_ops.c b/drivers/video/geode/lxfb_ops.c
index bc35a95..85ec7f6 100644
--- a/drivers/video/geode/lxfb_ops.c
+++ b/drivers/video/geode/lxfb_ops.c
@@ -276,10 +276,10 @@
 		write_fp(par, FP_PT1, 0);
 		temp = FP_PT2_SCRC;
 
-		if (info->var.sync & FB_SYNC_HOR_HIGH_ACT)
+		if (!(info->var.sync & FB_SYNC_HOR_HIGH_ACT))
 			temp |= FP_PT2_HSP;
 
-		if (info->var.sync & FB_SYNC_VERT_HIGH_ACT)
+		if (!(info->var.sync & FB_SYNC_VERT_HIGH_ACT))
 			temp |= FP_PT2_VSP;
 
 		write_fp(par, FP_PT2, temp);
diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c
index 7cfc170..ca0f6be 100644
--- a/drivers/video/mx3fb.c
+++ b/drivers/video/mx3fb.c
@@ -27,6 +27,7 @@
 #include <linux/clk.h>
 #include <linux/mutex.h>
 
+#include <mach/dma.h>
 #include <mach/hardware.h>
 #include <mach/ipu.h>
 #include <mach/mx3fb.h>
@@ -1420,6 +1421,9 @@
 	struct device *dev;
 	struct mx3fb_platform_data *mx3fb_pdata;
 
+	if (!imx_dma_is_ipu(chan))
+		return false;
+
 	if (!rq)
 		return false;
 
diff --git a/drivers/video/omap2/vram.c b/drivers/video/omap2/vram.c
index fed2a72..2fd7e52 100644
--- a/drivers/video/omap2/vram.c
+++ b/drivers/video/omap2/vram.c
@@ -554,9 +554,15 @@
 	size = PAGE_ALIGN(size);
 
 	if (paddr) {
-		if ((paddr & ~PAGE_MASK) ||
-		    !memblock_is_region_memory(paddr, size)) {
-			pr_err("Illegal SDRAM region for VRAM\n");
+		if (paddr & ~PAGE_MASK) {
+			pr_err("VRAM start address 0x%08x not page aligned\n",
+					paddr);
+			return;
+		}
+
+		if (!memblock_is_region_memory(paddr, size)) {
+			pr_err("Illegal SDRAM region 0x%08x..0x%08x for VRAM\n",
+					paddr, paddr + size - 1);
 			return;
 		}
 
@@ -570,9 +576,12 @@
 			return;
 		}
 	} else {
-		paddr = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_REAL_LIMIT);
+		paddr = memblock_alloc(size, PAGE_SIZE);
 	}
 
+	memblock_free(paddr, size);
+	memblock_remove(paddr, size);
+
 	omap_vram_add_region(paddr, size);
 
 	pr_info("Reserving %u bytes SDRAM for VRAM\n", size);
diff --git a/drivers/video/riva/rivafb-i2c.c b/drivers/video/riva/rivafb-i2c.c
index a0e22ac..167400e 100644
--- a/drivers/video/riva/rivafb-i2c.c
+++ b/drivers/video/riva/rivafb-i2c.c
@@ -94,7 +94,6 @@
 
 	strcpy(chan->adapter.name, name);
 	chan->adapter.owner		= THIS_MODULE;
-	chan->adapter.id		= I2C_HW_B_RIVA;
 	chan->adapter.class		= i2c_class;
 	chan->adapter.algo_data		= &chan->algo;
 	chan->adapter.dev.parent	= &chan->par->pdev->dev;
diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c
index 55b3077..d7df103 100644
--- a/drivers/video/sh_mobile_hdmi.c
+++ b/drivers/video/sh_mobile_hdmi.c
@@ -1071,6 +1071,10 @@
 		if (!hdmi->info)
 			goto out;
 
+		hdmi->monspec.modedb_len = 0;
+		fb_destroy_modedb(hdmi->monspec.modedb);
+		hdmi->monspec.modedb = NULL;
+
 		acquire_console_sem();
 
 		/* HDMI disconnect */
@@ -1078,7 +1082,6 @@
 
 		release_console_sem();
 		pm_runtime_put(hdmi->dev);
-		fb_destroy_modedb(hdmi->monspec.modedb);
 	}
 
 out:
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index 5096373..b02d97a 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -115,15 +115,16 @@
 	.xres = 1280,
 	.yres = 720,
 
-	.left_margin = 200,
-	.right_margin = 88,
-	.hsync_len = 48,
+	.left_margin = 220,
+	.right_margin = 110,
+	.hsync_len = 40,
 
 	.upper_margin = 20,
 	.lower_margin = 5,
 	.vsync_len = 5,
 
 	.pixclock = 13468,
+	.refresh = 60,
 	.sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT,
 };
 
@@ -859,7 +860,7 @@
 		/* Couldn't reconfigure, hopefully, can continue as before */
 		return;
 
-	info->fix.line_length = mode2.xres * (ch->cfg.bpp / 8);
+	info->fix.line_length = mode1.xres * (ch->cfg.bpp / 8);
 
 	/*
 	 * fb_set_var() calls the notifier change internally, only if
@@ -867,7 +868,7 @@
 	 * user event, we have to call the chain ourselves.
 	 */
 	event.info = info;
-	event.data = &mode2;
+	event.data = &mode1;
 	fb_notifier_call_chain(evnt, &event);
 }
 
@@ -1197,6 +1198,7 @@
 		const struct fb_videomode *mode = cfg->lcd_cfg;
 		unsigned long max_size = 0;
 		int k;
+		int num_cfg;
 
 		ch->info = framebuffer_alloc(0, &pdev->dev);
 		if (!ch->info) {
@@ -1232,8 +1234,14 @@
 		info->fix = sh_mobile_lcdc_fix;
 		info->fix.smem_len = max_size * (cfg->bpp / 8) * 2;
 
-		if (!mode)
+		if (!mode) {
 			mode = &default_720p;
+			num_cfg = 1;
+		} else {
+			num_cfg = ch->cfg.num_cfg;
+		}
+
+		fb_videomode_to_modelist(mode, num_cfg, &info->modelist);
 
 		fb_videomode_to_var(var, mode);
 		/* Default Y virtual resolution is 2x panel size */
@@ -1281,10 +1289,6 @@
 
 	for (i = 0; i < j; i++) {
 		struct sh_mobile_lcdc_chan *ch = priv->ch + i;
-		const struct fb_videomode *mode = ch->cfg.lcd_cfg;
-
-		if (!mode)
-			mode = &default_720p;
 
 		info = ch->info;
 
@@ -1297,7 +1301,6 @@
 			}
 		}
 
-		fb_videomode_to_modelist(mode, ch->cfg.num_cfg, &info->modelist);
 		error = register_framebuffer(info);
 		if (error < 0)
 			goto err1;
diff --git a/drivers/video/sis/init.c b/drivers/video/sis/init.c
index c311ad3..31137ad 100644
--- a/drivers/video/sis/init.c
+++ b/drivers/video/sis/init.c
@@ -62,11 +62,11 @@
 
 #include "init.h"
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 #include "300vtbl.h"
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 #include "310vtbl.h"
 #endif
 
@@ -78,7 +78,7 @@
 /*         POINTER INITIALIZATION            */
 /*********************************************/
 
-#if defined(SIS300) || defined(SIS315H)
+#if defined(CONFIG_FB_SIS_300) || defined(CONFIG_FB_SIS_315)
 static void
 InitCommonPointer(struct SiS_Private *SiS_Pr)
 {
@@ -160,7 +160,7 @@
 }
 #endif
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 static void
 InitTo300Pointer(struct SiS_Private *SiS_Pr)
 {
@@ -237,7 +237,7 @@
 }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static void
 InitTo310Pointer(struct SiS_Private *SiS_Pr)
 {
@@ -321,13 +321,13 @@
 SiSInitPtr(struct SiS_Private *SiS_Pr)
 {
    if(SiS_Pr->ChipType < SIS_315H) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
       InitTo300Pointer(SiS_Pr);
 #else
       return false;
 #endif
    } else {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
       InitTo310Pointer(SiS_Pr);
 #else
       return false;
@@ -340,9 +340,7 @@
 /*            HELPER: Get ModeID             */
 /*********************************************/
 
-#ifndef SIS_XORG_XF86
 static
-#endif
 unsigned short
 SiS_GetModeID(int VGAEngine, unsigned int VBFlags, int HDisplay, int VDisplay,
 		int Depth, bool FSTN, int LCDwidth, int LCDheight)
@@ -884,51 +882,51 @@
 void
 SiS_SetReg(SISIOADDRESS port, unsigned short index, unsigned short data)
 {
-   OutPortByte(port, index);
-   OutPortByte(port + 1, data);
+	outb((u8)index, port);
+	outb((u8)data, port + 1);
 }
 
 void
 SiS_SetRegByte(SISIOADDRESS port, unsigned short data)
 {
-   OutPortByte(port, data);
+	outb((u8)data, port);
 }
 
 void
 SiS_SetRegShort(SISIOADDRESS port, unsigned short data)
 {
-   OutPortWord(port, data);
+	outw((u16)data, port);
 }
 
 void
 SiS_SetRegLong(SISIOADDRESS port, unsigned int data)
 {
-   OutPortLong(port, data);
+	outl((u32)data, port);
 }
 
 unsigned char
 SiS_GetReg(SISIOADDRESS port, unsigned short index)
 {
-   OutPortByte(port, index);
-   return(InPortByte(port + 1));
+	outb((u8)index, port);
+	return inb(port + 1);
 }
 
 unsigned char
 SiS_GetRegByte(SISIOADDRESS port)
 {
-   return(InPortByte(port));
+	return inb(port);
 }
 
 unsigned short
 SiS_GetRegShort(SISIOADDRESS port)
 {
-   return(InPortWord(port));
+	return inw(port);
 }
 
 unsigned int
 SiS_GetRegLong(SISIOADDRESS port)
 {
-   return(InPortLong(port));
+	return inl(port);
 }
 
 void
@@ -1089,7 +1087,7 @@
 SiSInitPCIetc(struct SiS_Private *SiS_Pr)
 {
    switch(SiS_Pr->ChipType) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
    case SIS_300:
    case SIS_540:
    case SIS_630:
@@ -1108,7 +1106,7 @@
       SiS_SetRegOR(SiS_Pr->SiS_P3c4,0x1E,0x5A);
       break;
 #endif
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    case SIS_315H:
    case SIS_315:
    case SIS_315PRO:
@@ -1152,9 +1150,7 @@
 /*             HELPER: SetLVDSetc            */
 /*********************************************/
 
-#ifdef SIS_LINUX_KERNEL
 static
-#endif
 void
 SiSSetLVDSetc(struct SiS_Private *SiS_Pr)
 {
@@ -1174,7 +1170,7 @@
    if((temp == 1) || (temp == 2)) return;
 
    switch(SiS_Pr->ChipType) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
    case SIS_540:
    case SIS_630:
    case SIS_730:
@@ -1188,7 +1184,7 @@
 	}
 	break;
 #endif
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    case SIS_550:
    case SIS_650:
    case SIS_740:
@@ -1420,9 +1416,7 @@
 /*             HELPER: GetVBType             */
 /*********************************************/
 
-#ifdef SIS_LINUX_KERNEL
 static
-#endif
 void
 SiS_GetVBType(struct SiS_Private *SiS_Pr)
 {
@@ -1487,7 +1481,6 @@
 /*           HELPER: Check RAM size          */
 /*********************************************/
 
-#ifdef SIS_LINUX_KERNEL
 static bool
 SiS_CheckMemorySize(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
 		unsigned short ModeIdIndex)
@@ -1501,13 +1494,12 @@
    if(AdapterMemSize < memorysize) return false;
    return true;
 }
-#endif
 
 /*********************************************/
 /*           HELPER: Get DRAM type           */
 /*********************************************/
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static unsigned char
 SiS_Get310DRAMType(struct SiS_Private *SiS_Pr)
 {
@@ -1574,7 +1566,6 @@
 /*           HELPER: ClearBuffer             */
 /*********************************************/
 
-#ifdef SIS_LINUX_KERNEL
 static void
 SiS_ClearBuffer(struct SiS_Private *SiS_Pr, unsigned short ModeNo)
 {
@@ -1587,7 +1578,7 @@
 
    if(SiS_Pr->SiS_ModeType >= ModeEGA) {
       if(ModeNo > 0x13) {
-	 SiS_SetMemory(memaddr, memsize, 0);
+	 memset_io(memaddr, 0, memsize);
       } else {
 	 pBuffer = (unsigned short SISIOMEMTYPE *)memaddr;
 	 for(i = 0; i < 0x4000; i++) writew(0x0000, &pBuffer[i]);
@@ -1596,10 +1587,9 @@
       pBuffer = (unsigned short SISIOMEMTYPE *)memaddr;
       for(i = 0; i < 0x4000; i++) writew(0x0720, &pBuffer[i]);
    } else {
-      SiS_SetMemory(memaddr, 0x8000, 0);
+      memset_io(memaddr, 0, 0x8000);
    }
 }
-#endif
 
 /*********************************************/
 /*           HELPER: SearchModeID            */
@@ -2132,7 +2122,7 @@
       SiS_SetReg(SiS_Pr->SiS_P3d4,0x14,0x4F);
    }
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    if(SiS_Pr->ChipType == XGI_20) {
       SiS_SetReg(SiS_Pr->SiS_P3d4,0x04,crt1data[4] - 1);
       if(!(temp = crt1data[5] & 0x1f)) {
@@ -2215,7 +2205,7 @@
    SiS_SetReg(SiS_Pr->SiS_P3c4,0x2c,clkb);
 
    if(SiS_Pr->ChipType >= SIS_315H) {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
       SiS_SetReg(SiS_Pr->SiS_P3c4,0x2D,0x01);
       if(SiS_Pr->ChipType == XGI_20) {
          unsigned short mf = SiS_GetModeFlag(SiS_Pr, ModeNo, ModeIdIndex);
@@ -2236,7 +2226,7 @@
 /*                  FIFO                     */
 /*********************************************/
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 void
 SiS_GetFIFOThresholdIndex300(struct SiS_Private *SiS_Pr, unsigned short *idx1,
 		unsigned short *idx2)
@@ -2506,11 +2496,7 @@
    SiS_SetRegANDOR(SiS_Pr->SiS_P3c4,0x09,0x80,data);
 
   /* Write foreground and background queue */
-#ifdef SIS_LINUX_KERNEL
    templ = sisfb_read_nbridge_pci_dword(SiS_Pr, 0x50);
-#else
-   templ = pciReadLong(0x00000000, 0x50);
-#endif
 
    if(SiS_Pr->ChipType == SIS_730) {
 
@@ -2530,13 +2516,8 @@
 
    }
 
-#ifdef SIS_LINUX_KERNEL
    sisfb_write_nbridge_pci_dword(SiS_Pr, 0x50, templ);
    templ = sisfb_read_nbridge_pci_dword(SiS_Pr, 0xA0);
-#else
-   pciWriteLong(0x00000000, 0x50, templ);
-   templ = pciReadLong(0x00000000, 0xA0);
-#endif
 
    /* GUI grant timer (PCI config 0xA3) */
    if(SiS_Pr->ChipType == SIS_730) {
@@ -2552,15 +2533,11 @@
 
    }
 
-#ifdef SIS_LINUX_KERNEL
    sisfb_write_nbridge_pci_dword(SiS_Pr, 0xA0, templ);
-#else
-   pciWriteLong(0x00000000, 0xA0, templ);
-#endif
 }
-#endif /* SIS300 */
+#endif /* CONFIG_FB_SIS_300 */
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static void
 SiS_SetCRT1FIFO_310(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex)
 {
@@ -2612,7 +2589,7 @@
    }
 
    if(SiS_Pr->ChipType < SIS_315H) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
       if(VCLK > 150) data |= 0x80;
       SiS_SetRegANDOR(SiS_Pr->SiS_P3c4,0x07,0x7B,data);
 
@@ -2621,7 +2598,7 @@
       SiS_SetRegANDOR(SiS_Pr->SiS_P3c4,0x32,0xF7,data);
 #endif
    } else if(SiS_Pr->ChipType < XGI_20) {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
       if(VCLK >= 166) data |= 0x0c;
       SiS_SetRegANDOR(SiS_Pr->SiS_P3c4,0x32,0xf3,data);
 
@@ -2630,7 +2607,7 @@
       }
 #endif
    } else {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
       if(VCLK >= 200) data |= 0x0c;
       if(SiS_Pr->ChipType == XGI_20) data &= ~0x04;
       SiS_SetRegANDOR(SiS_Pr->SiS_P3c4,0x32,0xf3,data);
@@ -2675,7 +2652,7 @@
 		unsigned short ModeIdIndex, unsigned short RRTI)
 {
    unsigned short data, infoflag = 0, modeflag, resindex;
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    unsigned char  *ROMAddr  = SiS_Pr->VirtualRomBase;
    unsigned short data2, data3;
 #endif
@@ -2736,7 +2713,7 @@
       SiS_SetRegANDOR(SiS_Pr->SiS_P3c4,0x0F,0xB7,data);
    }
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    if(SiS_Pr->ChipType >= SIS_315H) {
       SiS_SetRegAND(SiS_Pr->SiS_P3c4,0x31,0xfb);
    }
@@ -2826,7 +2803,7 @@
 
    SiS_SetVCLKState(SiS_Pr, ModeNo, RRTI, ModeIdIndex);
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    if(((SiS_Pr->ChipType >= SIS_315H) && (SiS_Pr->ChipType < SIS_661)) ||
        (SiS_Pr->ChipType == XGI_40)) {
       if(SiS_GetReg(SiS_Pr->SiS_P3d4,0x31) & 0x40) {
@@ -2845,7 +2822,7 @@
 #endif
 }
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static void
 SiS_SetupDualChip(struct SiS_Private *SiS_Pr)
 {
@@ -2999,11 +2976,6 @@
    SiS_Pr->SiS_SelectCRT2Rate = 0;
    SiS_Pr->SiS_SetFlag &= (~ProgrammingCRT2);
 
-#ifdef SIS_XORG_XF86
-   xf86DrvMsgVerb(0, X_PROBED, 4, "(init: VBType=0x%04x, VBInfo=0x%04x)\n",
-                    SiS_Pr->SiS_VBType, SiS_Pr->SiS_VBInfo);
-#endif
-
    if(SiS_Pr->SiS_VBInfo & SetSimuScanMode) {
       if(SiS_Pr->SiS_VBInfo & SetInSlaveMode) {
          SiS_Pr->SiS_SetFlag |= ProgrammingCRT2;
@@ -3028,7 +3000,7 @@
    }
 
    switch(SiS_Pr->ChipType) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
    case SIS_300:
       SiS_SetCRT1FIFO_300(SiS_Pr, ModeNo, RefreshRateTableIndex);
       break;
@@ -3039,7 +3011,7 @@
       break;
 #endif
    default:
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
       if(SiS_Pr->ChipType == XGI_20) {
          unsigned char sr2b = 0, sr2c = 0;
          switch(ModeNo) {
@@ -3062,7 +3034,7 @@
 
    SiS_SetCRT1ModeRegs(SiS_Pr, ModeNo, ModeIdIndex, RefreshRateTableIndex);
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    if(SiS_Pr->ChipType == XGI_40) {
       SiS_SetupDualChip(SiS_Pr);
    }
@@ -3070,11 +3042,9 @@
 
    SiS_LoadDAC(SiS_Pr, ModeNo, ModeIdIndex);
 
-#ifdef SIS_LINUX_KERNEL
    if(SiS_Pr->SiS_flag_clearbuffer) {
       SiS_ClearBuffer(SiS_Pr, ModeNo);
    }
-#endif
 
    if(!(SiS_Pr->SiS_VBInfo & (SetSimuScanMode | SwitchCRT2 | SetCRT2ToLCDA))) {
       SiS_WaitRetrace1(SiS_Pr);
@@ -3104,7 +3074,7 @@
 static void
 SiS_ResetVB(struct SiS_Private *SiS_Pr)
 {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    unsigned char  *ROMAddr = SiS_Pr->VirtualRomBase;
    unsigned short temp;
 
@@ -3139,7 +3109,7 @@
     * which locks CRT2 in some way to CRT1 timing. Disable
     * this here.
     */
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    if((IS_SIS651) || (IS_SISM650) ||
       SiS_Pr->ChipType == SIS_340 ||
       SiS_Pr->ChipType == XGI_40) {
@@ -3160,7 +3130,7 @@
 static void
 SiS_Handle760(struct SiS_Private *SiS_Pr)
 {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    unsigned int somebase;
    unsigned char temp1, temp2, temp3;
 
@@ -3170,11 +3140,7 @@
        (!(SiS_Pr->SiS_SysFlags & SF_760UMA)) )
       return;
 
-#ifdef SIS_LINUX_KERNEL
    somebase = sisfb_read_mio_pci_word(SiS_Pr, 0x74);
-#else
-   somebase = pciReadWord(0x00001000, 0x74);
-#endif
    somebase &= 0xffff;
 
    if(somebase == 0) return;
@@ -3190,105 +3156,34 @@
       temp2 = 0x0b;
    }
 
-#ifdef SIS_LINUX_KERNEL
    sisfb_write_nbridge_pci_byte(SiS_Pr, 0x7e, temp1);
    sisfb_write_nbridge_pci_byte(SiS_Pr, 0x8d, temp2);
-#else
-   pciWriteByte(0x00000000, 0x7e, temp1);
-   pciWriteByte(0x00000000, 0x8d, temp2);
-#endif
 
    SiS_SetRegByte((somebase + 0x85), temp3);
 #endif
 }
 
 /*********************************************/
-/*      X.org/XFree86: SET SCREEN PITCH      */
-/*********************************************/
-
-#ifdef SIS_XORG_XF86
-static void
-SiS_SetPitchCRT1(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn)
-{
-   SISPtr pSiS = SISPTR(pScrn);
-   unsigned short HDisplay = pSiS->scrnPitch >> 3;
-
-   SiS_SetReg(SiS_Pr->SiS_P3d4,0x13,(HDisplay & 0xFF));
-   SiS_SetRegANDOR(SiS_Pr->SiS_P3c4,0x0E,0xF0,(HDisplay >> 8));
-}
-
-static void
-SiS_SetPitchCRT2(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn)
-{
-   SISPtr pSiS = SISPTR(pScrn);
-   unsigned short HDisplay = pSiS->scrnPitch2 >> 3;
-
-    /* Unlock CRT2 */
-   if(pSiS->VGAEngine == SIS_315_VGA)
-      SiS_SetRegOR(SiS_Pr->SiS_Part1Port,0x2F, 0x01);
-   else
-      SiS_SetRegOR(SiS_Pr->SiS_Part1Port,0x24, 0x01);
-
-   SiS_SetReg(SiS_Pr->SiS_Part1Port,0x07,(HDisplay & 0xFF));
-   SiS_SetRegANDOR(SiS_Pr->SiS_Part1Port,0x09,0xF0,(HDisplay >> 8));
-}
-
-static void
-SiS_SetPitch(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn)
-{
-   SISPtr pSiS = SISPTR(pScrn);
-   bool isslavemode = false;
-
-   if( (pSiS->VBFlags2 & VB2_VIDEOBRIDGE) &&
-       ( ((pSiS->VGAEngine == SIS_300_VGA) &&
-	  (SiS_GetReg(SiS_Pr->SiS_Part1Port,0x00) & 0xa0) == 0x20) ||
-	 ((pSiS->VGAEngine == SIS_315_VGA) &&
-	  (SiS_GetReg(SiS_Pr->SiS_Part1Port,0x00) & 0x50) == 0x10) ) ) {
-      isslavemode = true;
-   }
-
-   /* We need to set pitch for CRT1 if bridge is in slave mode, too */
-   if((pSiS->VBFlags & DISPTYPE_DISP1) || (isslavemode)) {
-      SiS_SetPitchCRT1(SiS_Pr, pScrn);
-   }
-   /* We must not set the pitch for CRT2 if bridge is in slave mode */
-   if((pSiS->VBFlags & DISPTYPE_DISP2) && (!isslavemode)) {
-      SiS_SetPitchCRT2(SiS_Pr, pScrn);
-   }
-}
-#endif
-
-/*********************************************/
 /*                 SiSSetMode()              */
 /*********************************************/
 
-#ifdef SIS_XORG_XF86
-/* We need pScrn for setting the pitch correctly */
-bool
-SiSSetMode(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn, unsigned short ModeNo, bool dosetpitch)
-#else
 bool
 SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo)
-#endif
 {
    SISIOADDRESS BaseAddr = SiS_Pr->IOAddress;
    unsigned short RealModeNo, ModeIdIndex;
    unsigned char  backupreg = 0;
-#ifdef SIS_LINUX_KERNEL
    unsigned short KeepLockReg;
 
    SiS_Pr->UseCustomMode = false;
    SiS_Pr->CRT1UsesCustomMode = false;
-#endif
 
    SiS_Pr->SiS_flag_clearbuffer = 0;
 
    if(SiS_Pr->UseCustomMode) {
       ModeNo = 0xfe;
    } else {
-#ifdef SIS_LINUX_KERNEL
       if(!(ModeNo & 0x80)) SiS_Pr->SiS_flag_clearbuffer = 1;
-#endif
       ModeNo &= 0x7f;
    }
 
@@ -3301,13 +3196,8 @@
    SiS_GetSysFlags(SiS_Pr);
 
    SiS_Pr->SiS_VGAINFO = 0x11;
-#if defined(SIS_XORG_XF86) && (defined(i386) || defined(__i386) || defined(__i386__) || defined(__AMD64__) || defined(__amd64__) || defined(__x86_64__))
-   if(pScrn) SiS_Pr->SiS_VGAINFO = SiS_GetSetBIOSScratch(pScrn, 0x489, 0xff);
-#endif
 
-#ifdef SIS_LINUX_KERNEL
    KeepLockReg = SiS_GetReg(SiS_Pr->SiS_P3c4,0x05);
-#endif
    SiS_SetReg(SiS_Pr->SiS_P3c4,0x05,0x86);
 
    SiSInitPCIetc(SiS_Pr);
@@ -3344,12 +3234,10 @@
    SiS_GetLCDResInfo(SiS_Pr, ModeNo, ModeIdIndex);
    SiS_SetLowModeTest(SiS_Pr, ModeNo);
 
-#ifdef SIS_LINUX_KERNEL
    /* Check memory size (kernel framebuffer driver only) */
    if(!SiS_CheckMemorySize(SiS_Pr, ModeNo, ModeIdIndex)) {
       return false;
    }
-#endif
 
    SiS_OpenCRTC(SiS_Pr);
 
@@ -3384,7 +3272,7 @@
    SiS_DisplayOn(SiS_Pr);
    SiS_SetRegByte(SiS_Pr->SiS_P3c6,0xFF);
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    if(SiS_Pr->ChipType >= SIS_315H) {
       if(SiS_Pr->SiS_IF_DEF_LVDS == 1) {
 	 if(!(SiS_IsDualEdge(SiS_Pr))) {
@@ -3396,7 +3284,7 @@
 
    if(SiS_Pr->SiS_VBType & VB_SIS30xBLV) {
       if(SiS_Pr->ChipType >= SIS_315H) {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	 if(!SiS_Pr->SiS_ROMNew) {
 	    if(SiS_IsVAMode(SiS_Pr)) {
 	       SiS_SetRegOR(SiS_Pr->SiS_P3d4,0x35,0x01);
@@ -3424,424 +3312,16 @@
       }
    }
 
-#ifdef SIS_XORG_XF86
-   if(pScrn) {
-      /* SetPitch: Adapt to virtual size & position */
-      if((ModeNo > 0x13) && (dosetpitch)) {
-	 SiS_SetPitch(SiS_Pr, pScrn);
-      }
-
-      /* Backup/Set ModeNo in BIOS scratch area */
-      SiS_GetSetModeID(pScrn, ModeNo);
-   }
-#endif
-
    SiS_CloseCRTC(SiS_Pr);
 
    SiS_Handle760(SiS_Pr);
 
-#ifdef SIS_LINUX_KERNEL
    /* We never lock registers in XF86 */
    if(KeepLockReg != 0xA1) SiS_SetReg(SiS_Pr->SiS_P3c4,0x05,0x00);
-#endif
 
    return true;
 }
 
-/*********************************************/
-/*       X.org/XFree86: SiSBIOSSetMode()     */
-/*           for non-Dual-Head mode          */
-/*********************************************/
-
-#ifdef SIS_XORG_XF86
-bool
-SiSBIOSSetMode(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn,
-               DisplayModePtr mode, bool IsCustom)
-{
-   SISPtr pSiS = SISPTR(pScrn);
-   unsigned short ModeNo = 0;
-
-   SiS_Pr->UseCustomMode = false;
-
-   if((IsCustom) && (SiS_CheckBuildCustomMode(pScrn, mode, pSiS->VBFlags))) {
-
-      xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 3, "Setting custom mode %dx%d\n",
-		SiS_Pr->CHDisplay,
-		(mode->Flags & V_INTERLACE ? SiS_Pr->CVDisplay * 2 :
-		   (mode->Flags & V_DBLSCAN ? SiS_Pr->CVDisplay / 2 :
-		      SiS_Pr->CVDisplay)));
-
-   } else {
-
-      /* Don't need vbflags here; checks done earlier */
-      ModeNo = SiS_GetModeNumber(pScrn, mode, pSiS->VBFlags);
-      if(!ModeNo) return false;
-
-      xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 3, "Setting standard mode 0x%x\n", ModeNo);
-
-   }
-
-   return(SiSSetMode(SiS_Pr, pScrn, ModeNo, true));
-}
-
-/*********************************************/
-/*    X.org/XFree86: SiSBIOSSetModeCRT2()    */
-/*           for Dual-Head modes             */
-/*********************************************/
-
-bool
-SiSBIOSSetModeCRT2(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn,
-               DisplayModePtr mode, bool IsCustom)
-{
-   SISIOADDRESS BaseAddr = SiS_Pr->IOAddress;
-   SISPtr  pSiS = SISPTR(pScrn);
-#ifdef SISDUALHEAD
-   SISEntPtr pSiSEnt = pSiS->entityPrivate;
-#endif
-   unsigned short ModeIdIndex;
-   unsigned short ModeNo = 0;
-   unsigned char  backupreg = 0;
-
-   SiS_Pr->UseCustomMode = false;
-
-   /* Remember: Custom modes for CRT2 are ONLY supported
-    *     -) on the 30x/B/C, and
-    *     -) if CRT2 is LCD or VGA, or CRT1 is LCDA
-    */
-
-   if((IsCustom) && (SiS_CheckBuildCustomMode(pScrn, mode, pSiS->VBFlags))) {
-
-	 ModeNo = 0xfe;
-
-   } else {
-
-	 ModeNo = SiS_GetModeNumber(pScrn, mode, pSiS->VBFlags);
-	 if(!ModeNo) return false;
-
-   }
-
-   SiSRegInit(SiS_Pr, BaseAddr);
-   SiSInitPtr(SiS_Pr);
-   SiS_GetSysFlags(SiS_Pr);
-#if defined(i386) || defined(__i386) || defined(__i386__) || defined(__AMD64__) || defined(__amd64__) || defined(__x86_64__)
-   SiS_Pr->SiS_VGAINFO = SiS_GetSetBIOSScratch(pScrn, 0x489, 0xff);
-#else
-   SiS_Pr->SiS_VGAINFO = 0x11;
-#endif
-
-   SiS_SetReg(SiS_Pr->SiS_P3c4,0x05,0x86);
-
-   SiSInitPCIetc(SiS_Pr);
-   SiSSetLVDSetc(SiS_Pr);
-   SiSDetermineROMUsage(SiS_Pr);
-
-   /* Save mode info so we can set it from within SetMode for CRT1 */
-#ifdef SISDUALHEAD
-   if(pSiS->DualHeadMode) {
-      pSiSEnt->CRT2ModeNo = ModeNo;
-      pSiSEnt->CRT2DMode = mode;
-      pSiSEnt->CRT2IsCustom = IsCustom;
-      pSiSEnt->CRT2CR30 = SiS_GetReg(SiS_Pr->SiS_P3d4,0x30);
-      pSiSEnt->CRT2CR31 = SiS_GetReg(SiS_Pr->SiS_P3d4,0x31);
-      pSiSEnt->CRT2CR35 = SiS_GetReg(SiS_Pr->SiS_P3d4,0x35);
-      pSiSEnt->CRT2CR38 = SiS_GetReg(SiS_Pr->SiS_P3d4,0x38);
-#if 0
-      /* We can't set CRT2 mode before CRT1 mode is set - says who...? */
-      if(pSiSEnt->CRT1ModeNo == -1) {
-	 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 3,
-		"Setting CRT2 mode delayed until after setting CRT1 mode\n");
-	 return true;
-      }
-#endif
-      pSiSEnt->CRT2ModeSet = true;
-   }
-#endif
-
-   if(SiS_Pr->UseCustomMode) {
-
-      unsigned short temptemp = SiS_Pr->CVDisplay;
-
-      if(SiS_Pr->CModeFlag & DoubleScanMode)     temptemp >>= 1;
-      else if(SiS_Pr->CInfoFlag & InterlaceMode) temptemp <<= 1;
-
-      xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 3,
-	  "Setting custom mode %dx%d on CRT2\n",
-	  SiS_Pr->CHDisplay, temptemp);
-
-   } else {
-
-      xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 3,
-	  "Setting standard mode 0x%x on CRT2\n", ModeNo);
-
-   }
-
-   SiS_UnLockCRT2(SiS_Pr);
-
-   if(!SiS_Pr->UseCustomMode) {
-      if(!(SiS_SearchModeID(SiS_Pr, &ModeNo, &ModeIdIndex))) return false;
-   } else {
-      ModeIdIndex = 0;
-   }
-
-   SiS_GetVBType(SiS_Pr);
-
-   SiS_InitVB(SiS_Pr);
-   if(SiS_Pr->SiS_VBType & VB_SIS30xBLV) {
-      if(SiS_Pr->ChipType >= SIS_315H) {
-	 SiS_ResetVB(SiS_Pr);
-	 SiS_SetRegOR(SiS_Pr->SiS_P3c4,0x32,0x10);
-	 SiS_SetRegOR(SiS_Pr->SiS_Part2Port,0x00,0x0c);
-	 backupreg = SiS_GetReg(SiS_Pr->SiS_P3d4,0x38);
-      } else {
-	 backupreg = SiS_GetReg(SiS_Pr->SiS_P3d4,0x35);
-      }
-   }
-
-   /* Get VB information (connectors, connected devices) */
-   if(!SiS_Pr->UseCustomMode) {
-      SiS_GetVBInfo(SiS_Pr, ModeNo, ModeIdIndex, 1);
-   } else {
-      /* If this is a custom mode, we don't check the modeflag for CRT2Mode */
-      SiS_GetVBInfo(SiS_Pr, ModeNo, ModeIdIndex, 0);
-   }
-   SiS_SetYPbPr(SiS_Pr);
-   SiS_SetTVMode(SiS_Pr, ModeNo, ModeIdIndex);
-   SiS_GetLCDResInfo(SiS_Pr, ModeNo, ModeIdIndex);
-   SiS_SetLowModeTest(SiS_Pr, ModeNo);
-
-   SiS_ResetSegmentRegisters(SiS_Pr);
-
-   /* Set mode on CRT2 */
-   if( (SiS_Pr->SiS_VBType & VB_SISVB)    ||
-       (SiS_Pr->SiS_IF_DEF_LVDS     == 1) ||
-       (SiS_Pr->SiS_IF_DEF_CH70xx   != 0) ||
-       (SiS_Pr->SiS_IF_DEF_TRUMPION != 0) ) {
-      SiS_SetCRT2Group(SiS_Pr, ModeNo);
-   }
-
-   SiS_StrangeStuff(SiS_Pr);
-
-   SiS_DisplayOn(SiS_Pr);
-   SiS_SetRegByte(SiS_Pr->SiS_P3c6,0xFF);
-
-   if(SiS_Pr->ChipType >= SIS_315H) {
-      if(SiS_Pr->SiS_IF_DEF_LVDS == 1) {
-	 if(!(SiS_IsDualEdge(SiS_Pr))) {
-	    SiS_SetRegAND(SiS_Pr->SiS_Part1Port,0x13,0xfb);
-	 }
-      }
-   }
-
-   if(SiS_Pr->SiS_VBType & VB_SIS30xBLV) {
-      if(SiS_Pr->ChipType >= SIS_315H) {
-	 if(!SiS_Pr->SiS_ROMNew) {
-	    if(SiS_IsVAMode(SiS_Pr)) {
-	       SiS_SetRegOR(SiS_Pr->SiS_P3d4,0x35,0x01);
-	    } else {
-	       SiS_SetRegAND(SiS_Pr->SiS_P3d4,0x35,0xFE);
-	    }
-	 }
-
-	 SiS_SetReg(SiS_Pr->SiS_P3d4,0x38,backupreg);
-
-	 if(SiS_GetReg(SiS_Pr->SiS_P3d4,0x30) & SetCRT2ToLCD) {
-	    SiS_SetRegAND(SiS_Pr->SiS_P3d4,0x38,0xfc);
-	 }
-      } else if((SiS_Pr->ChipType == SIS_630) ||
-	        (SiS_Pr->ChipType == SIS_730)) {
-         SiS_SetReg(SiS_Pr->SiS_P3d4,0x35,backupreg);
-      }
-   }
-
-   /* SetPitch: Adapt to virtual size & position */
-   SiS_SetPitchCRT2(SiS_Pr, pScrn);
-
-   SiS_Handle760(SiS_Pr);
-
-   return true;
-}
-
-/*********************************************/
-/*    X.org/XFree86: SiSBIOSSetModeCRT1()    */
-/*           for Dual-Head modes             */
-/*********************************************/
-
-bool
-SiSBIOSSetModeCRT1(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn,
-                   DisplayModePtr mode, bool IsCustom)
-{
-   SISIOADDRESS BaseAddr = SiS_Pr->IOAddress;
-   SISPtr  pSiS = SISPTR(pScrn);
-   unsigned short ModeIdIndex, ModeNo = 0;
-   unsigned char  backupreg = 0;
-#ifdef SISDUALHEAD
-   SISEntPtr pSiSEnt = pSiS->entityPrivate;
-   unsigned char  backupcr30, backupcr31, backupcr38, backupcr35, backupp40d=0;
-   bool backupcustom;
-#endif
-
-   SiS_Pr->UseCustomMode = false;
-
-   if((IsCustom) && (SiS_CheckBuildCustomMode(pScrn, mode, pSiS->VBFlags))) {
-
-	 unsigned short temptemp = SiS_Pr->CVDisplay;
-
-	 if(SiS_Pr->CModeFlag & DoubleScanMode)     temptemp >>= 1;
-	 else if(SiS_Pr->CInfoFlag & InterlaceMode) temptemp <<= 1;
-
-	 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 3,
-	 	"Setting custom mode %dx%d on CRT1\n",
-	 	SiS_Pr->CHDisplay, temptemp);
-	 ModeNo = 0xfe;
-
-   } else {
-
-	 ModeNo = SiS_GetModeNumber(pScrn, mode, 0); /* don't give VBFlags */
-	 if(!ModeNo) return false;
-
-	 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 3,
-	 	"Setting standard mode 0x%x on CRT1\n", ModeNo);
-   }
-
-   SiSInitPtr(SiS_Pr);
-   SiSRegInit(SiS_Pr, BaseAddr);
-   SiS_GetSysFlags(SiS_Pr);
-#if defined(i386) || defined(__i386) || defined(__i386__) || defined(__AMD64__) || defined(__amd64__) || defined(__x86_64__)
-   SiS_Pr->SiS_VGAINFO = SiS_GetSetBIOSScratch(pScrn, 0x489, 0xff);
-#else
-   SiS_Pr->SiS_VGAINFO = 0x11;
-#endif
-
-   SiS_SetReg(SiS_Pr->SiS_P3c4,0x05,0x86);
-
-   SiSInitPCIetc(SiS_Pr);
-   SiSSetLVDSetc(SiS_Pr);
-   SiSDetermineROMUsage(SiS_Pr);
-
-   SiS_UnLockCRT2(SiS_Pr);
-
-   if(!SiS_Pr->UseCustomMode) {
-      if(!(SiS_SearchModeID(SiS_Pr, &ModeNo, &ModeIdIndex))) return false;
-   } else {
-      ModeIdIndex = 0;
-   }
-
-   /* Determine VBType */
-   SiS_GetVBType(SiS_Pr);
-
-   SiS_InitVB(SiS_Pr);
-   if(SiS_Pr->SiS_VBType & VB_SIS30xBLV) {
-      if(SiS_Pr->ChipType >= SIS_315H) {
-         backupreg = SiS_GetReg(SiS_Pr->SiS_P3d4,0x38);
-      } else {
-         backupreg = SiS_GetReg(SiS_Pr->SiS_P3d4,0x35);
-      }
-   }
-
-   /* Get VB information (connectors, connected devices) */
-   /* (We don't care if the current mode is a CRT2 mode) */
-   SiS_GetVBInfo(SiS_Pr, ModeNo, ModeIdIndex, 0);
-   SiS_SetYPbPr(SiS_Pr);
-   SiS_SetTVMode(SiS_Pr, ModeNo, ModeIdIndex);
-   SiS_GetLCDResInfo(SiS_Pr, ModeNo, ModeIdIndex);
-   SiS_SetLowModeTest(SiS_Pr, ModeNo);
-
-   SiS_OpenCRTC(SiS_Pr);
-
-   /* Set mode on CRT1 */
-   SiS_SetCRT1Group(SiS_Pr, ModeNo, ModeIdIndex);
-   if(SiS_Pr->SiS_VBInfo & SetCRT2ToLCDA) {
-      SiS_SetCRT2Group(SiS_Pr, ModeNo);
-   }
-
-   /* SetPitch: Adapt to virtual size & position */
-   SiS_SetPitchCRT1(SiS_Pr, pScrn);
-
-   SiS_HandleCRT1(SiS_Pr);
-
-   SiS_StrangeStuff(SiS_Pr);
-
-   SiS_CloseCRTC(SiS_Pr);
-
-#ifdef SISDUALHEAD
-   if(pSiS->DualHeadMode) {
-      pSiSEnt->CRT1ModeNo = ModeNo;
-      pSiSEnt->CRT1DMode = mode;
-   }
-#endif
-
-   if(SiS_Pr->UseCustomMode) {
-      SiS_Pr->CRT1UsesCustomMode = true;
-      SiS_Pr->CSRClock_CRT1 = SiS_Pr->CSRClock;
-      SiS_Pr->CModeFlag_CRT1 = SiS_Pr->CModeFlag;
-   } else {
-      SiS_Pr->CRT1UsesCustomMode = false;
-   }
-
-   /* Reset CRT2 if changing mode on CRT1 */
-#ifdef SISDUALHEAD
-   if(pSiS->DualHeadMode) {
-      if(pSiSEnt->CRT2ModeNo != -1) {
-	 xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, 3,
-				"(Re-)Setting mode for CRT2\n");
-	 backupcustom = SiS_Pr->UseCustomMode;
-	 backupcr30 = SiS_GetReg(SiS_Pr->SiS_P3d4,0x30);
-	 backupcr31 = SiS_GetReg(SiS_Pr->SiS_P3d4,0x31);
-	 backupcr35 = SiS_GetReg(SiS_Pr->SiS_P3d4,0x35);
-	 backupcr38 = SiS_GetReg(SiS_Pr->SiS_P3d4,0x38);
-	 if(SiS_Pr->SiS_VBType & VB_SISVB) {
-	    /* Backup LUT-enable */
-	    if(pSiSEnt->CRT2ModeSet) {
-	       backupp40d = SiS_GetReg(SiS_Pr->SiS_Part4Port,0x0d) & 0x08;
-	    }
-	 }
-	 if(SiS_Pr->SiS_VBInfo & SetCRT2ToLCDA) {
-	    SiS_SetReg(SiS_Pr->SiS_P3d4,0x30,pSiSEnt->CRT2CR30);
-	    SiS_SetReg(SiS_Pr->SiS_P3d4,0x31,pSiSEnt->CRT2CR31);
-	    SiS_SetReg(SiS_Pr->SiS_P3d4,0x35,pSiSEnt->CRT2CR35);
-	    SiS_SetReg(SiS_Pr->SiS_P3d4,0x38,pSiSEnt->CRT2CR38);
-	 }
-
-	 SiSBIOSSetModeCRT2(SiS_Pr, pSiSEnt->pScrn_1,
-			    pSiSEnt->CRT2DMode, pSiSEnt->CRT2IsCustom);
-
-	 SiS_SetReg(SiS_Pr->SiS_P3d4,0x30,backupcr30);
-	 SiS_SetReg(SiS_Pr->SiS_P3d4,0x31,backupcr31);
-	 SiS_SetReg(SiS_Pr->SiS_P3d4,0x35,backupcr35);
-	 SiS_SetReg(SiS_Pr->SiS_P3d4,0x38,backupcr38);
-	 if(SiS_Pr->SiS_VBType & VB_SISVB) {
-	    SiS_SetRegANDOR(SiS_Pr->SiS_Part4Port,0x0d, ~0x08, backupp40d);
-	 }
-	 SiS_Pr->UseCustomMode = backupcustom;
-      }
-   }
-#endif
-
-   /* Warning: From here, the custom mode entries in SiS_Pr are
-    * possibly overwritten
-    */
-
-   SiS_DisplayOn(SiS_Pr);
-   SiS_SetRegByte(SiS_Pr->SiS_P3c6,0xFF);
-
-   if(SiS_Pr->SiS_VBType & VB_SIS30xBLV) {
-      if(SiS_Pr->ChipType >= SIS_315H) {
-	 SiS_SetReg(SiS_Pr->SiS_P3d4,0x38,backupreg);
-      } else if((SiS_Pr->ChipType == SIS_630) ||
-                (SiS_Pr->ChipType == SIS_730)) {
-         SiS_SetReg(SiS_Pr->SiS_P3d4,0x35,backupreg);
-      }
-   }
-
-   SiS_Handle760(SiS_Pr);
-
-   /* Backup/Set ModeNo in BIOS scratch area */
-   SiS_GetSetModeID(pScrn,ModeNo);
-
-   return true;
-}
-#endif /* Linux_XF86 */
-
 #ifndef GETBITSTR
 #define BITMASK(h,l)    	(((unsigned)(1U << ((h)-(l)+1))-1)<<(l))
 #define GENMASK(mask)   	BITMASK(1?mask,0?mask)
@@ -3927,7 +3407,7 @@
    SiS_Pr->CVBlankStart = SiS_Pr->SiS_VGAVDE;
 
    if(SiS_Pr->ChipType < SIS_315H) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
       tempbx = SiS_Pr->SiS_VGAHT;
       if(SiS_Pr->SiS_LCDInfo & DontExpandLCD) {
          tempbx = SiS_Pr->PanelHT;
@@ -3936,7 +3416,7 @@
       remaining = tempbx % 8;
 #endif
    } else {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
       /* OK for LCDA, LVDS */
       tempbx = SiS_Pr->PanelHT - SiS_Pr->PanelXRes;
       tempax = SiS_Pr->SiS_VGAHDE;  /* not /2 ! */
@@ -3950,7 +3430,7 @@
    SiS_Pr->CHTotal = SiS_Pr->CHBlankEnd = tempbx;
 
    if(SiS_Pr->ChipType < SIS_315H) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
       if(SiS_Pr->SiS_VGAHDE == SiS_Pr->PanelXRes) {
 	 SiS_Pr->CHSyncStart = SiS_Pr->SiS_VGAHDE + ((SiS_Pr->PanelHRS + 1) & ~1);
 	 SiS_Pr->CHSyncEnd = SiS_Pr->CHSyncStart + SiS_Pr->PanelHRE;
@@ -3982,7 +3462,7 @@
       }
 #endif
    } else {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
       tempax = VGAHDE;
       if(SiS_Pr->SiS_LCDInfo & DontExpandLCD) {
 	 tempbx = SiS_Pr->PanelXRes;
@@ -4001,7 +3481,7 @@
    if(SiS_Pr->SiS_LCDInfo & DontExpandLCD) {
       tempax = SiS_Pr->PanelYRes;
    } else if(SiS_Pr->ChipType < SIS_315H) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
       /* Stupid hack for 640x400/320x200 */
       if(SiS_Pr->SiS_LCDResInfo == Panel_1024x768) {
 	 if((tempax + tempbx) == 438) tempbx += 16;
@@ -4054,36 +3534,12 @@
    if(modeflag & DoubleScanMode) tempax |= 0x80;
    SiS_SetRegANDOR(SiS_Pr->SiS_P3d4,0x09,0x5F,tempax);
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-   xf86DrvMsg(0, X_INFO, "%d %d %d %d  %d %d %d %d  (%d %d %d %d)\n",
-	SiS_Pr->CHDisplay, SiS_Pr->CHSyncStart, SiS_Pr->CHSyncEnd, SiS_Pr->CHTotal,
-	SiS_Pr->CVDisplay, SiS_Pr->CVSyncStart, SiS_Pr->CVSyncEnd, SiS_Pr->CVTotal,
-	SiS_Pr->CHBlankStart, SiS_Pr->CHBlankEnd, SiS_Pr->CVBlankStart, SiS_Pr->CVBlankEnd);
-   xf86DrvMsg(0, X_INFO, " {{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,\n",
-	SiS_Pr->CCRT1CRTC[0], SiS_Pr->CCRT1CRTC[1],
-	SiS_Pr->CCRT1CRTC[2], SiS_Pr->CCRT1CRTC[3],
-	SiS_Pr->CCRT1CRTC[4], SiS_Pr->CCRT1CRTC[5],
-	SiS_Pr->CCRT1CRTC[6], SiS_Pr->CCRT1CRTC[7]);
-   xf86DrvMsg(0, X_INFO, "   0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,\n",
-	SiS_Pr->CCRT1CRTC[8], SiS_Pr->CCRT1CRTC[9],
-	SiS_Pr->CCRT1CRTC[10], SiS_Pr->CCRT1CRTC[11],
-	SiS_Pr->CCRT1CRTC[12], SiS_Pr->CCRT1CRTC[13],
-	SiS_Pr->CCRT1CRTC[14], SiS_Pr->CCRT1CRTC[15]);
-   xf86DrvMsg(0, X_INFO, "   0x%02x}},\n", SiS_Pr->CCRT1CRTC[16]);
-#endif
-#endif
 }
 
 void
 SiS_Generic_ConvertCRData(struct SiS_Private *SiS_Pr, unsigned char *crdata,
 			int xres, int yres,
-#ifdef SIS_XORG_XF86
-			DisplayModePtr current
-#endif
-#ifdef SIS_LINUX_KERNEL
 			struct fb_var_screeninfo *var, bool writeres
-#endif
 )
 {
    unsigned short HRE, HBE, HRS, HBS, HDE, HT;
@@ -4127,25 +3583,10 @@
 
    D = B - F - C;
 
-#ifdef SIS_XORG_XF86
-   current->HDisplay   = (E * 8);
-   current->HSyncStart = (E * 8) + (F * 8);
-   current->HSyncEnd   = (E * 8) + (F * 8) + (C * 8);
-   current->HTotal     = (E * 8) + (F * 8) + (C * 8) + (D * 8);
-#ifdef TWDEBUG
-   xf86DrvMsg(0, X_INFO,
-		"H: A %d B %d C %d D %d E %d F %d  HT %d HDE %d HRS %d HBS %d HBE %d HRE %d\n",
-		A, B, C, D, E, F, HT, HDE, HRS, HBS, HBE, HRE);
-#else
-   (void)VBS;  (void)HBS;  (void)A;
-#endif
-#endif
-#ifdef SIS_LINUX_KERNEL
    if(writeres) var->xres = xres = E * 8;
    var->left_margin = D * 8;
    var->right_margin = F * 8;
    var->hsync_len = C * 8;
-#endif
 
    /* Vertical */
    sr_data = crdata[13];
@@ -4192,30 +3633,10 @@
 
    D = B - F - C;
 
-#ifdef SIS_XORG_XF86
-   current->VDisplay   = VDE + 1;
-   current->VSyncStart = VRS + 1;
-   current->VSyncEnd   = ((VRS & ~0x1f) | VRE) + 1;
-   if(VRE <= (VRS & 0x1f)) current->VSyncEnd += 32;
-   current->VTotal     = E + D + C + F;
-#if 0
-   current->VDisplay   = E;
-   current->VSyncStart = E + D;
-   current->VSyncEnd   = E + D + C;
-   current->VTotal     = E + D + C + F;
-#endif
-#ifdef TWDEBUG
-   xf86DrvMsg(0, X_INFO,
-	"V: A %d B %d C %d D %d E %d F %d  VT %d VDE %d VRS %d VBS %d VBE %d VRE %d\n",
-	A, B, C, D, E, F, VT, VDE, VRS, VBS, VBE, VRE);
-#endif
-#endif
-#ifdef SIS_LINUX_KERNEL
    if(writeres) var->yres = yres = E;
    var->upper_margin = D;
    var->lower_margin = F;
    var->vsync_len = C;
-#endif
 
    if((xres == 320) && ((yres == 200) || (yres == 240))) {
 	/* Terrible hack, but correct CRTC data for
@@ -4224,17 +3645,9 @@
 	 * a negative D. The CRT controller does not
 	 * seem to like correcting HRE to 50)
 	 */
-#ifdef SIS_XORG_XF86
-      current->HDisplay   = 320;
-      current->HSyncStart = 328;
-      current->HSyncEnd   = 376;
-      current->HTotal     = 400;
-#endif
-#ifdef SIS_LINUX_KERNEL
       var->left_margin = (400 - 376);
       var->right_margin = (328 - 320);
       var->hsync_len = (376 - 328);
-#endif
 
    }
 
diff --git a/drivers/video/sis/init.h b/drivers/video/sis/init.h
index b96005c..ee8ed3c 100644
--- a/drivers/video/sis/init.h
+++ b/drivers/video/sis/init.h
@@ -53,21 +53,8 @@
 #ifndef _INIT_H_
 #define _INIT_H_
 
-#include "osdef.h"
 #include "initdef.h"
 
-#ifdef SIS_XORG_XF86
-#include "sis.h"
-#define SIS_NEED_inSISREG
-#define SIS_NEED_inSISREGW
-#define SIS_NEED_inSISREGL
-#define SIS_NEED_outSISREG
-#define SIS_NEED_outSISREGW
-#define SIS_NEED_outSISREGL
-#include "sis_regs.h"
-#endif
-
-#ifdef SIS_LINUX_KERNEL
 #include "vgatypes.h"
 #include "vstruct.h"
 #ifdef SIS_CP
@@ -78,7 +65,6 @@
 #include <linux/fb.h>
 #include "sis.h"
 #include <video/sisfb.h>
-#endif
 
 /* Mode numbers */
 static const unsigned short ModeIndex_320x200[]      = {0x59, 0x41, 0x00, 0x4f};
@@ -286,7 +272,7 @@
 	{ 1280, 854, 8,16}    /* 0x22 */
 };
 
-#if defined(SIS300) || defined(SIS315H)
+#if defined(CONFIG_FB_SIS_300) || defined(CONFIG_FB_SIS_315)
 static const struct SiS_StandTable_S SiS_StandTable[]=
 {
 /* 0x00: MD_0_200 */
@@ -1521,10 +1507,6 @@
 };
 
 bool		SiSInitPtr(struct SiS_Private *SiS_Pr);
-#ifdef SIS_XORG_XF86
-unsigned short	SiS_GetModeID(int VGAEngine, unsigned int VBFlags, int HDisplay, int VDisplay,
-				int Depth, bool FSTN, int LCDwith, int LCDheight);
-#endif
 unsigned short	SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
 				int VDisplay, int Depth, bool FSTN,
 				unsigned short CustomT, int LCDwith, int LCDheight,
@@ -1550,17 +1532,11 @@
 void		SiS_DisplayOn(struct SiS_Private *SiS_Pr);
 void		SiS_DisplayOff(struct SiS_Private *SiS_Pr);
 void		SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
-#ifndef SIS_LINUX_KERNEL
-void		SiSSetLVDSetc(struct SiS_Private *SiS_Pr);
-#endif
 void		SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
 void		SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
 unsigned short	SiS_GetModeFlag(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
 				unsigned short ModeIdIndex);
 bool		SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
-#ifndef SIS_LINUX_KERNEL
-void		SiS_GetVBType(struct SiS_Private *SiS_Pr);
-#endif
 
 bool		SiS_SearchModeID(struct SiS_Private *SiS_Pr, unsigned short *ModeNo,
 				unsigned short *ModeIdIndex);
@@ -1572,37 +1548,19 @@
 				unsigned short ModeIdIndex);
 unsigned short	SiS_GetOffset(struct SiS_Private *SiS_Pr,unsigned short ModeNo,
 				unsigned short ModeIdIndex, unsigned short RRTI);
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 void		SiS_GetFIFOThresholdIndex300(struct SiS_Private *SiS_Pr, unsigned short *idx1,
 				unsigned short *idx2);
 unsigned short	SiS_GetFIFOThresholdB300(unsigned short idx1, unsigned short idx2);
 unsigned short	SiS_GetLatencyFactor630(struct SiS_Private *SiS_Pr, unsigned short index);
 #endif
 void		SiS_LoadDAC(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
-#ifdef SIS_XORG_XF86
-bool		SiSSetMode(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn, unsigned short ModeNo,
-				bool dosetpitch);
-bool		SiSBIOSSetMode(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn,
-				DisplayModePtr mode, bool IsCustom);
-bool		SiSBIOSSetModeCRT2(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn,
-				DisplayModePtr mode, bool IsCustom);
-bool		SiSBIOSSetModeCRT1(struct SiS_Private *SiS_Pr, ScrnInfoPtr pScrn,
-				DisplayModePtr mode, bool IsCustom);
-#endif
-#ifdef SIS_LINUX_KERNEL
 bool		SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
-#endif
 void		SiS_CalcCRRegisters(struct SiS_Private *SiS_Pr, int depth);
 void		SiS_CalcLCDACRT1Timing(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
 				unsigned short ModeIdIndex);
-#ifdef SIS_XORG_XF86
-void		SiS_Generic_ConvertCRData(struct SiS_Private *SiS_Pr, unsigned char *crdata, int xres,
-				int yres, DisplayModePtr current);
-#endif
-#ifdef SIS_LINUX_KERNEL
 void		SiS_Generic_ConvertCRData(struct SiS_Private *SiS_Pr, unsigned char *crdata, int xres,
 				int yres, struct fb_var_screeninfo *var, bool writeres);
-#endif
 
 /* From init301.c: */
 extern void		SiS_GetVBInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
@@ -1626,29 +1584,16 @@
 extern bool		SiS_IsVAMode(struct SiS_Private *);
 extern bool		SiS_IsDualEdge(struct SiS_Private *);
 
-#ifdef SIS_XORG_XF86
-/* From other modules: */
-extern unsigned short	SiS_CheckBuildCustomMode(ScrnInfoPtr pScrn, DisplayModePtr mode,
-				unsigned int VBFlags);
-extern unsigned char	SiS_GetSetBIOSScratch(ScrnInfoPtr pScrn, unsigned short offset,
-				unsigned char value);
-extern unsigned char	SiS_GetSetModeID(ScrnInfoPtr pScrn, unsigned char id);
-extern unsigned short 	SiS_GetModeNumber(ScrnInfoPtr pScrn, DisplayModePtr mode,
-				unsigned int VBFlags);
-#endif
-
-#ifdef SIS_LINUX_KERNEL
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 extern unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
 extern void		sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg,
 				unsigned int val);
 #endif
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 extern void		sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg,
 				unsigned char val);
 extern unsigned int	sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
 #endif
-#endif
 
 #endif
 
diff --git a/drivers/video/sis/init301.c b/drivers/video/sis/init301.c
index da33d80..9fa66fd 100644
--- a/drivers/video/sis/init301.c
+++ b/drivers/video/sis/init301.c
@@ -75,11 +75,11 @@
 
 #include "init301.h"
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 #include "oem300.h"
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 #include "oem310.h"
 #endif
 
@@ -87,9 +87,7 @@
 #define SiS_I2CDELAYSHORT  150
 
 static unsigned short	SiS_GetBIOSLCDResInfo(struct SiS_Private *SiS_Pr);
-#ifdef SIS_LINUX_KERNEL
 static void		SiS_SetCH70xx(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
-#endif
 
 /*********************************************/
 /*         HELPER: Lock/Unlock CRT2          */
@@ -106,9 +104,7 @@
       SiS_SetRegOR(SiS_Pr->SiS_Part1Port,0x24,0x01);
 }
 
-#ifdef SIS_LINUX_KERNEL
 static
-#endif
 void
 SiS_LockCRT2(struct SiS_Private *SiS_Pr)
 {
@@ -138,7 +134,7 @@
 /*    HELPER: Get Pointer to LCD structure   */
 /*********************************************/
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static unsigned char *
 GetLCDStructPtr661(struct SiS_Private *SiS_Pr)
 {
@@ -404,7 +400,7 @@
 /*    HELPER: GET SOME DATA FROM BIOS ROM    */
 /*********************************************/
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 static bool
 SiS_CR36BIOSWord23b(struct SiS_Private *SiS_Pr)
 {
@@ -449,7 +445,7 @@
       SiS_GetReg(SiS_Pr->SiS_P3c4, 0x05);
 }
 
-#if defined(SIS300) || defined(SIS315H)
+#if defined(CONFIG_FB_SIS_300) || defined(CONFIG_FB_SIS_315)
 static void
 SiS_GenericDelay(struct SiS_Private *SiS_Pr, unsigned short delay)
 {
@@ -457,7 +453,7 @@
 }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static void
 SiS_LongDelay(struct SiS_Private *SiS_Pr, unsigned short delay)
 {
@@ -467,7 +463,7 @@
 }
 #endif
 
-#if defined(SIS300) || defined(SIS315H)
+#if defined(CONFIG_FB_SIS_300) || defined(CONFIG_FB_SIS_315)
 static void
 SiS_ShortDelay(struct SiS_Private *SiS_Pr, unsigned short delay)
 {
@@ -480,14 +476,14 @@
 static void
 SiS_PanelDelay(struct SiS_Private *SiS_Pr, unsigned short DelayTime)
 {
-#if defined(SIS300) || defined(SIS315H)
+#if defined(CONFIG_FB_SIS_300) || defined(CONFIG_FB_SIS_315)
    unsigned char  *ROMAddr = SiS_Pr->VirtualRomBase;
    unsigned short PanelID, DelayIndex, Delay=0;
 #endif
 
    if(SiS_Pr->ChipType < SIS_315H) {
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 
       PanelID = SiS_GetReg(SiS_Pr->SiS_P3d4,0x36);
       if(SiS_Pr->SiS_VBType & VB_SISVB) {
@@ -513,11 +509,11 @@
       }
       SiS_ShortDelay(SiS_Pr, Delay);
 
-#endif  /* SIS300 */
+#endif  /* CONFIG_FB_SIS_300 */
 
    } else {
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 
       if((SiS_Pr->ChipType >= SIS_661)    ||
 	 (SiS_Pr->ChipType <= SIS_315PRO) ||
@@ -579,12 +575,12 @@
 
       }
 
-#endif /* SIS315H */
+#endif /* CONFIG_FB_SIS_315 */
 
    }
 }
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static void
 SiS_PanelDelayLoop(struct SiS_Private *SiS_Pr, unsigned short DelayTime, unsigned short DelayLoop)
 {
@@ -613,7 +609,7 @@
    while((!(SiS_GetRegByte(SiS_Pr->SiS_P3da) & 0x08)) && --watchdog);
 }
 
-#if defined(SIS300) || defined(SIS315H)
+#if defined(CONFIG_FB_SIS_300) || defined(CONFIG_FB_SIS_315)
 static void
 SiS_WaitRetrace2(struct SiS_Private *SiS_Pr, unsigned short reg)
 {
@@ -630,7 +626,7 @@
 SiS_WaitVBRetrace(struct SiS_Private *SiS_Pr)
 {
    if(SiS_Pr->ChipType < SIS_315H) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
       if(SiS_Pr->SiS_VBType & VB_SIS30xBLV) {
 	 if(!(SiS_GetReg(SiS_Pr->SiS_Part1Port,0x00) & 0x20)) return;
       }
@@ -641,7 +637,7 @@
       }
 #endif
    } else {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
       if(!(SiS_GetReg(SiS_Pr->SiS_Part1Port,0x00) & 0x40)) {
 	 SiS_WaitRetrace1(SiS_Pr);
       } else {
@@ -686,7 +682,7 @@
 /*               HELPER: MISC                */
 /*********************************************/
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 static bool
 SiS_Is301B(struct SiS_Private *SiS_Pr)
 {
@@ -708,7 +704,7 @@
 bool
 SiS_IsDualEdge(struct SiS_Private *SiS_Pr)
 {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    if(SiS_Pr->ChipType >= SIS_315H) {
       if((SiS_Pr->ChipType != SIS_650) || (SiS_GetReg(SiS_Pr->SiS_P3d4,0x5f) & 0xf0)) {
 	 if(SiS_GetReg(SiS_Pr->SiS_P3d4,0x38) & EnableDualEdge) return true;
@@ -721,7 +717,7 @@
 bool
 SiS_IsVAMode(struct SiS_Private *SiS_Pr)
 {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    unsigned short flag;
 
    if(SiS_Pr->ChipType >= SIS_315H) {
@@ -732,7 +728,7 @@
    return false;
 }
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static bool
 SiS_IsVAorLCD(struct SiS_Private *SiS_Pr)
 {
@@ -745,7 +741,7 @@
 static bool
 SiS_IsDualLink(struct SiS_Private *SiS_Pr)
 {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    if(SiS_Pr->ChipType >= SIS_315H) {
       if((SiS_CRT2IsLCD(SiS_Pr)) ||
          (SiS_IsVAMode(SiS_Pr))) {
@@ -756,7 +752,7 @@
    return false;
 }
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static bool
 SiS_TVEnabled(struct SiS_Private *SiS_Pr)
 {
@@ -768,7 +764,7 @@
 }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static bool
 SiS_LCDAEnabled(struct SiS_Private *SiS_Pr)
 {
@@ -777,7 +773,7 @@
 }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static bool
 SiS_WeHaveBacklightCtrl(struct SiS_Private *SiS_Pr)
 {
@@ -788,7 +784,7 @@
 }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static bool
 SiS_IsNotM650orLater(struct SiS_Private *SiS_Pr)
 {
@@ -804,7 +800,7 @@
 }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static bool
 SiS_IsYPbPr(struct SiS_Private *SiS_Pr)
 {
@@ -816,7 +812,7 @@
 }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static bool
 SiS_IsChScart(struct SiS_Private *SiS_Pr)
 {
@@ -828,7 +824,7 @@
 }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static bool
 SiS_IsTVOrYPbPrOrScart(struct SiS_Private *SiS_Pr)
 {
@@ -848,7 +844,7 @@
 }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static bool
 SiS_IsLCDOrLCDA(struct SiS_Private *SiS_Pr)
 {
@@ -914,7 +910,7 @@
 /*********************************************/
 
 /* Setup general purpose IO for Chrontel communication */
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 void
 SiS_SetChrontelGPIO(struct SiS_Private *SiS_Pr, unsigned short myvbinfo)
 {
@@ -923,11 +919,7 @@
 
    if(!(SiS_Pr->SiS_ChSW)) return;
 
-#ifdef SIS_LINUX_KERNEL
    acpibase = sisfb_read_lpc_pci_dword(SiS_Pr, 0x74);
-#else
-   acpibase = pciReadLong(0x00000800, 0x74);
-#endif
    acpibase &= 0xFFFF;
    if(!acpibase) return;
    temp = SiS_GetRegShort((acpibase + 0x3c));	/* ACPI register 0x3c: GP Event 1 I/O mode select */
@@ -969,7 +961,7 @@
 	tempax &= (DriverMode | LoadDACFlag | SetNotSimuMode | SetPALTV);
 	tempbx |= tempax;
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	if(SiS_Pr->ChipType >= SIS_315H) {
 	   if(SiS_Pr->SiS_VBType & VB_SISLCDA) {
 	      if(ModeNo == 0x03) {
@@ -1019,7 +1011,7 @@
 	   }
 	}
 
-#endif  /* SIS315H */
+#endif  /* CONFIG_FB_SIS_315 */
 
         if(!(SiS_Pr->SiS_VBType & VB_SISVGA2)) {
 	   tempbx &= ~(SetCRT2ToRAMDAC);
@@ -1154,24 +1146,16 @@
 
    SiS_Pr->SiS_VBInfo = tempbx;
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
    if(SiS_Pr->ChipType == SIS_630) {
       SiS_SetChrontelGPIO(SiS_Pr, SiS_Pr->SiS_VBInfo);
    }
 #endif
 
-#ifdef SIS_LINUX_KERNEL
 #if 0
    printk(KERN_DEBUG "sisfb: (init301: VBInfo= 0x%04x, SetFlag=0x%04x)\n",
       SiS_Pr->SiS_VBInfo, SiS_Pr->SiS_SetFlag);
 #endif
-#endif
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-   xf86DrvMsg(0, X_PROBED, "(init301: VBInfo=0x%04x, SetFlag=0x%04x)\n",
-      SiS_Pr->SiS_VBInfo, SiS_Pr->SiS_SetFlag);
-#endif
-#endif
 }
 
 /*********************************************/
@@ -1415,12 +1399,6 @@
    }
 
    SiS_Pr->SiS_VBInfo &= ~SetPALTV;
-
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-   xf86DrvMsg(0, X_INFO, "(init301: TVMode %x, VBInfo %x)\n", SiS_Pr->SiS_TVMode, SiS_Pr->SiS_VBInfo);
-#endif
-#endif
 }
 
 /*********************************************/
@@ -1443,22 +1421,10 @@
 static void
 SiS_GetLCDInfoBIOS(struct SiS_Private *SiS_Pr)
 {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    unsigned char  *ROMAddr;
    unsigned short temp;
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-   xf86DrvMsg(0, X_INFO, "Paneldata driver: [%d %d] [H %d %d] [V %d %d] [C %d 0x%02x 0x%02x]\n",
-	SiS_Pr->PanelHT, SiS_Pr->PanelVT,
-	SiS_Pr->PanelHRS, SiS_Pr->PanelHRE,
-	SiS_Pr->PanelVRS, SiS_Pr->PanelVRE,
-	SiS_Pr->SiS_VBVCLKData[SiS_Pr->PanelVCLKIdx315].CLOCK,
-	SiS_Pr->SiS_VBVCLKData[SiS_Pr->PanelVCLKIdx315].Part4_A,
-	SiS_Pr->SiS_VBVCLKData[SiS_Pr->PanelVCLKIdx315].Part4_B);
-#endif
-#endif
-
    if((ROMAddr = GetLCDStructPtr661(SiS_Pr))) {
       if((temp = SISGETROMW(6)) != SiS_Pr->PanelHT) {
 	 SiS_Pr->SiS_NeedRomModeData = true;
@@ -1480,18 +1446,6 @@
       SiS_Pr->SiS_VCLKData[VCLK_CUSTOM_315].SR2C =
 	 SiS_Pr->SiS_VBVCLKData[VCLK_CUSTOM_315].Part4_B = ROMAddr[20];
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-      xf86DrvMsg(0, X_INFO, "Paneldata BIOS:  [%d %d] [H %d %d] [V %d %d] [C %d 0x%02x 0x%02x]\n",
-	SiS_Pr->PanelHT, SiS_Pr->PanelVT,
-	SiS_Pr->PanelHRS, SiS_Pr->PanelHRE,
-	SiS_Pr->PanelVRS, SiS_Pr->PanelVRE,
-	SiS_Pr->SiS_VBVCLKData[SiS_Pr->PanelVCLKIdx315].CLOCK,
-	SiS_Pr->SiS_VBVCLKData[SiS_Pr->PanelVCLKIdx315].Part4_A,
-	SiS_Pr->SiS_VBVCLKData[SiS_Pr->PanelVCLKIdx315].Part4_B);
-#endif
-#endif
-
    }
 #endif
 }
@@ -1517,13 +1471,13 @@
 {
   unsigned short temp,modeflag,resinfo=0,modexres=0,modeyres=0;
   bool panelcanscale = false;
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
   unsigned char *ROMAddr = SiS_Pr->VirtualRomBase;
   static const unsigned char SiS300SeriesLCDRes[] =
           { 0,  1,  2,  3,  7,  4,  5,  8,
 	    0,  0, 10,  0,  0,  0,  0, 15 };
 #endif
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   unsigned char   *myptr = NULL;
 #endif
 
@@ -1562,7 +1516,7 @@
      SiS_Pr->SiS_LCDTypeInfo = (temp & 0x0F) - 1;
   }
   temp &= 0x0f;
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
   if(SiS_Pr->ChipType < SIS_315H) {
      /* Very old BIOSes only know 7 sizes (NetVista 2179, 1.01g) */
      if(SiS_Pr->SiS_VBType & VB_SIS301) {
@@ -1574,7 +1528,7 @@
 #endif
 
   /* Translate to our internal types */
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   if(SiS_Pr->ChipType == SIS_550) {
      if     (temp == Panel310_1152x768)  temp = Panel_320x240_2; /* Verified working */
      else if(temp == Panel310_320x240_2) temp = Panel_320x240_2;
@@ -1597,7 +1551,7 @@
 
   SiS_Pr->SiS_LCDResInfo = temp;
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
   if(SiS_Pr->SiS_IF_DEF_LVDS == 1) {
      if(SiS_Pr->SiS_CustomT == CUT_BARCO1366) {
 	SiS_Pr->SiS_LCDResInfo = Panel_Barco1366;
@@ -1639,7 +1593,7 @@
   else if(SiS_Pr->UsePanelScaler == 1) SiS_Pr->SiS_LCDInfo |= DontExpandLCD;
 
   /* Dual link, Pass 1:1 BIOS default, etc. */
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   if(SiS_Pr->ChipType >= SIS_661) {
      if(SiS_Pr->SiS_LCDInfo & DontExpandLCD) {
 	if(temp & 0x08) SiS_Pr->SiS_LCDInfo |= LCDPass11;
@@ -2076,7 +2030,7 @@
      }
   }
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
   if(SiS_Pr->SiS_IF_DEF_LVDS == 1) {
      if(SiS_Pr->SiS_CustomT == CUT_PANEL848 || SiS_Pr->SiS_CustomT == CUT_PANEL856) {
 	SiS_Pr->SiS_LCDInfo = 0x80 | 0x40 | 0x20;   /* neg h/v sync, RGB24(D0 = 0) */
@@ -2186,17 +2140,10 @@
      SiS_Pr->SiS_SetFlag |= LCDVESATiming;
   }
 
-#ifdef SIS_LINUX_KERNEL
 #if 0
   printk(KERN_DEBUG "sisfb: (LCDInfo=0x%04x LCDResInfo=0x%02x LCDTypeInfo=0x%02x)\n",
 	SiS_Pr->SiS_LCDInfo, SiS_Pr->SiS_LCDResInfo, SiS_Pr->SiS_LCDTypeInfo);
 #endif
-#endif
-#ifdef SIS_XORG_XF86
-  xf86DrvMsgVerb(0, X_PROBED, 4,
-	"(init301: LCDInfo=0x%04x LCDResInfo=0x%02x LCDTypeInfo=0x%02x SetFlag=0x%04x)\n",
-	SiS_Pr->SiS_LCDInfo, SiS_Pr->SiS_LCDResInfo, SiS_Pr->SiS_LCDTypeInfo, SiS_Pr->SiS_SetFlag);
-#endif
 }
 
 /*********************************************/
@@ -2359,7 +2306,7 @@
 	      VCLKIndex = SiS_Pr->PanelVCLKIdx315;
 	   }
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	   /* Special Timing: Barco iQ Pro R series */
 	   if(SiS_Pr->SiS_CustomT == CUT_BARCO1366) VCLKIndex = 0x44;
 
@@ -2410,12 +2357,6 @@
 
   }
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-  xf86DrvMsg(0, X_INFO, "VCLKIndex %d (0x%x)\n", VCLKIndex, VCLKIndex);
-#endif
-#endif
-
   return VCLKIndex;
 }
 
@@ -2428,10 +2369,10 @@
 {
   unsigned short i, j, modeflag, tempah=0;
   short tempcl;
-#if defined(SIS300) || defined(SIS315H)
+#if defined(CONFIG_FB_SIS_300) || defined(CONFIG_FB_SIS_315)
   unsigned short tempbl;
 #endif
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   unsigned char  *ROMAddr = SiS_Pr->VirtualRomBase;
   unsigned short tempah2, tempbl2;
 #endif
@@ -2454,7 +2395,7 @@
 
      if(SiS_Pr->ChipType < SIS_315H) {
 
-#ifdef SIS300    /* ---- 300 series ---- */
+#ifdef CONFIG_FB_SIS_300    /* ---- 300 series ---- */
 
 	/* For 301BDH: (with LCD via LVDS) */
 	if(SiS_Pr->SiS_VBType & VB_NoLCD) {
@@ -2477,11 +2418,11 @@
 
 	if(SiS_Pr->SiS_VBInfo & SetInSlaveMode)  tempah ^= 0xA0;
 
-#endif  /* SIS300 */
+#endif  /* CONFIG_FB_SIS_300 */
 
      } else {
 
-#ifdef SIS315H    /* ------- 315/330 series ------ */
+#ifdef CONFIG_FB_SIS_315    /* ------- 315/330 series ------ */
 
 	if(ModeNo > 0x13) {
 	   tempcl -= ModeVGA;
@@ -2494,7 +2435,7 @@
 
 	if(SiS_Pr->SiS_VBInfo & SetInSlaveMode) tempah ^= 0x50;
 
-#endif  /* SIS315H */
+#endif  /* CONFIG_FB_SIS_315 */
 
      }
 
@@ -2503,7 +2444,7 @@
      if(SiS_Pr->ChipType < SIS_315H) {
 	SiS_SetReg(SiS_Pr->SiS_Part1Port,0x00,tempah);
      } else {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	if(SiS_Pr->SiS_IF_DEF_LVDS == 1) {
 	   SiS_SetRegANDOR(SiS_Pr->SiS_Part1Port,0x00,0xa0,tempah);
 	} else if(SiS_Pr->SiS_VBType & VB_SISVB) {
@@ -2584,7 +2525,7 @@
 
 	if(SiS_Pr->ChipType >= SIS_315H) {
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	   /* LVDS can only be slave in 8bpp modes */
 	   tempah = 0x80;
 	   if((modeflag & CRT2Mode) && (SiS_Pr->SiS_ModeType > ModeVGA)) {
@@ -2604,7 +2545,7 @@
 
 	} else {
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	   tempah = 0;
 	   if( (!(SiS_Pr->SiS_VBInfo & SetInSlaveMode)) && (SiS_Pr->SiS_ModeType > ModeVGA) ) {
 	      tempah |= 0x02;
@@ -2626,7 +2567,7 @@
 
      if(SiS_Pr->ChipType >= SIS_315H) {
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	/* unsigned char bridgerev = SiS_GetReg(SiS_Pr->SiS_Part4Port,0x01); */
 
 	/* The following is nearly unpreditable and varies from machine
@@ -2718,11 +2659,11 @@
 	   SiS_SetRegANDOR(SiS_Pr->SiS_Part4Port,0x23,tempbl,tempah);
 	}
 
-#endif /* SIS315H */
+#endif /* CONFIG_FB_SIS_315 */
 
      } else if(SiS_Pr->SiS_VBType & VB_SIS30xBLV) {
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	SiS_SetRegAND(SiS_Pr->SiS_Part4Port,0x21,0x3f);
 
 	if((SiS_Pr->SiS_VBInfo & DisableCRT2Display) ||
@@ -2745,7 +2686,7 @@
 
   } else {  /* LVDS */
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
      if(SiS_Pr->ChipType >= SIS_315H) {
 
 	if(SiS_Pr->SiS_IF_DEF_CH70xx != 0) {
@@ -2931,7 +2872,7 @@
 	   }
 	}
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	if(SiS_Pr->SiS_CustomT == CUT_COMPAQ1280) {
 	   if(SiS_Pr->SiS_LCDResInfo == Panel_1280x1024) {
 	      if(!(SiS_Pr->SiS_LCDInfo & DontExpandLCD)) {
@@ -3036,7 +2977,7 @@
 	case Panel_1280x1024: tempbx = 24; break;
 	case Panel_1400x1050: tempbx = 26; break;
 	case Panel_1600x1200: tempbx = 28; break;
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	case Panel_Barco1366: tempbx = 80; break;
 #endif
 	}
@@ -3053,7 +2994,7 @@
 
 	if(SiS_Pr->SiS_LCDInfo & LCDPass11) tempbx = 30;
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	if(SiS_Pr->SiS_CustomT == CUT_BARCO1024) {
 	   tempbx = 82;
 	   if(SiS_Pr->SiS_LCDInfo & DontExpandLCD) tempbx++;
@@ -3189,7 +3130,7 @@
 
    if((SiS_Pr->SiS_VBType & VB_SISVB) && (SiS_Pr->SiS_VBInfo & SetCRT2ToLCDA)) {
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
       SiS_CalcPanelLinkTiming(SiS_Pr, ModeNo, ModeIdIndex, RefreshRateTableIndex);
       SiS_CalcLCDACRT1Timing(SiS_Pr, ModeNo, ModeIdIndex);
 #endif
@@ -3214,7 +3155,7 @@
 	 case 16: LVDSData = SiS_Pr->SiS_LVDS800x600Data_1;    break;
 	 case 18: LVDSData = SiS_Pr->SiS_LVDS1024x600Data_1;   break;
 	 case 20: LVDSData = SiS_Pr->SiS_LVDS1024x768Data_1;   break;
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	 case 80: LVDSData = SiS_Pr->SiS_LVDSBARCO1366Data_1;  break;
 	 case 81: LVDSData = SiS_Pr->SiS_LVDSBARCO1366Data_2;  break;
 	 case 82: LVDSData = SiS_Pr->SiS_LVDSBARCO1024Data_1;  break;
@@ -3248,7 +3189,7 @@
 	     (SiS_Pr->SiS_SetFlag & SetDOSMode) ) {
 	    SiS_Pr->SiS_HDE = SiS_Pr->PanelXRes;
             SiS_Pr->SiS_VDE = SiS_Pr->PanelYRes;
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	    if(SiS_Pr->SiS_CustomT == CUT_BARCO1366) {
 	       if(ResIndex < 0x08) {
 		  SiS_Pr->SiS_HDE = 1280;
@@ -3270,7 +3211,7 @@
   unsigned short resinfo, CRT2Index, ResIndex;
   const struct SiS_LCDData *LCDPtr = NULL;
   const struct SiS_TVData  *TVPtr  = NULL;
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   short resinfo661;
 #endif
 
@@ -3283,7 +3224,7 @@
   } else {
      modeflag = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag;
      resinfo = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_RESINFO;
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
      resinfo661 = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].ROMMODEIDX661;
      if( (SiS_Pr->SiS_VBInfo & SetCRT2ToLCD)   &&
 	 (SiS_Pr->SiS_SetFlag & LCDVESATiming) &&
@@ -3460,7 +3401,7 @@
 
 	} else if( (!(SiS_Pr->SiS_LCDInfo & DontExpandLCD)) && (romptr) && (ROMAddr) ) {
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	   SiS_Pr->SiS_RVBHCMAX  = ROMAddr[romptr];
 	   SiS_Pr->SiS_RVBHCFACT = ROMAddr[romptr+1];
 	   SiS_Pr->SiS_VGAHT     = ROMAddr[romptr+2] | ((ROMAddr[romptr+3] & 0x0f) << 8);
@@ -3520,19 +3461,13 @@
 	      case Panel_1680x1050     :
 	      case Panel_1680x1050 + 32: LCDPtr = SiS_Pr->SiS_LCD1680x1050Data;     break;
 	      case 100		       : LCDPtr = SiS_Pr->SiS_NoScaleData;	    break;
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	      case 200                 : LCDPtr = SiS310_ExtCompaq1280x1024Data;    break;
 	      case 201                 : LCDPtr = SiS_Pr->SiS_St2LCD1280x1024Data;  break;
 #endif
 	      default                  : LCDPtr = SiS_Pr->SiS_ExtLCD1024x768Data;   break;
 	   }
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-	   xf86DrvMsg(0, X_INFO, "GetCRT2Data: Index %d ResIndex %d\n", CRT2Index, ResIndex);
-#endif
-#endif
-
 	   SiS_Pr->SiS_RVBHCMAX  = (LCDPtr+ResIndex)->RVBHCMAX;
 	   SiS_Pr->SiS_RVBHCFACT = (LCDPtr+ResIndex)->RVBHCFACT;
 	   SiS_Pr->SiS_VGAHT     = (LCDPtr+ResIndex)->VGAHT;
@@ -3624,7 +3559,7 @@
 {
    const struct SiS_LVDSDes *PanelDesPtr = NULL;
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
    if(SiS_Pr->SiS_VBInfo & SetCRT2ToLCD) {
 
       if(SiS_Pr->ChipType < SIS_315H) {
@@ -3696,7 +3631,7 @@
 
   if((SiS_Pr->SiS_VBType & VB_SIS30xBLV) && (SiS_Pr->SiS_VBInfo & SetCRT2ToLCDA)) {
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
      if(SiS_Pr->SiS_LCDInfo & DontExpandLCD) {
 	/* non-pass 1:1 only, see above */
 	if(SiS_Pr->SiS_VGAHDE != SiS_Pr->PanelXRes) {
@@ -3771,7 +3706,7 @@
      } else {
 
         if(SiS_Pr->ChipType < SIS_315H) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	   switch(SiS_Pr->SiS_LCDResInfo) {
 	   case Panel_800x600:
 	      if(SiS_Pr->SiS_VGAVDE == SiS_Pr->PanelYRes) {
@@ -3816,7 +3751,7 @@
 	   }
 #endif
         } else {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	   switch(SiS_Pr->SiS_LCDResInfo) {
 	   case Panel_1024x768:
 	   case Panel_1280x1024:
@@ -3844,7 +3779,7 @@
 	         if(SiS_Pr->ChipType < SIS_315H) {
 	            if(!(modeflag & HalfDCLK)) SiS_Pr->SiS_LCDHDES = 320;
 	         } else {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 		    if(SiS_Pr->SiS_LCDResInfo == Panel_1024x768)  SiS_Pr->SiS_LCDHDES = 480;
 		    if(SiS_Pr->SiS_LCDResInfo == Panel_1400x1050) SiS_Pr->SiS_LCDHDES = 804;
 		    if(SiS_Pr->SiS_LCDResInfo == Panel_1600x1200) SiS_Pr->SiS_LCDHDES = 704;
@@ -3866,7 +3801,7 @@
 /*           DISABLE VIDEO BRIDGE            */
 /*********************************************/
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static int
 SiS_HandlePWD(struct SiS_Private *SiS_Pr)
 {
@@ -3891,11 +3826,6 @@
 	 ret = 1;
       }
       SiS_SetRegANDOR(SiS_Pr->SiS_Part4Port,0x27,0x7f,temp);
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-      xf86DrvMsg(0, 0, "Setting PWD %x\n", temp);
-#endif
-#endif
    }
 #endif
    return ret;
@@ -3909,7 +3839,7 @@
 void
 SiS_DisableBridge(struct SiS_Private *SiS_Pr)
 {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   unsigned short tempah, pushax=0, modenum;
 #endif
   unsigned short temp=0;
@@ -3920,7 +3850,7 @@
 
 	if(SiS_Pr->ChipType < SIS_315H) {
 
-#ifdef SIS300	   /* 300 series */
+#ifdef CONFIG_FB_SIS_300	   /* 300 series */
 
 	   if(!(SiS_CR36BIOSWord23b(SiS_Pr))) {
 	      if(SiS_Pr->SiS_VBType & VB_SISLVDS) {
@@ -3953,11 +3883,11 @@
 	      }
 	   }
 
-#endif  /* SIS300 */
+#endif  /* CONFIG_FB_SIS_300 */
 
         } else {
 
-#ifdef SIS315H	   /* 315 series */
+#ifdef CONFIG_FB_SIS_315	   /* 315 series */
 
 	   int didpwd = 0;
 	   bool custom1 = (SiS_Pr->SiS_CustomT == CUT_COMPAQ1280) ||
@@ -4081,14 +4011,14 @@
 
 	   }
 
-#endif /* SIS315H */
+#endif /* CONFIG_FB_SIS_315 */
 
 	}
 
      } else {     /* ============ For 301 ================ */
 
         if(SiS_Pr->ChipType < SIS_315H) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	   if(!(SiS_CR36BIOSWord23b(SiS_Pr))) {
 	      SiS_SetRegSR11ANDOR(SiS_Pr,0xF7,0x08);
 	      SiS_PanelDelay(SiS_Pr, 3);
@@ -4111,7 +4041,7 @@
 	    SiS_SetRegOR(SiS_Pr->SiS_P3c4,0x1E,0x20);
 	    SiS_SetReg(SiS_Pr->SiS_Part1Port,0x00,temp);
 	} else {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	    SiS_SetRegAND(SiS_Pr->SiS_P3c4,0x1E,0xDF);            /* disable CRT2 */
 	    if( (!(SiS_CRT2IsLCD(SiS_Pr))) ||
 		(!(SiS_CR36BIOSWord23d(SiS_Pr))) ) {
@@ -4127,7 +4057,7 @@
 
     if(SiS_Pr->ChipType < SIS_315H) {
 
-#ifdef SIS300	/* 300 series */
+#ifdef CONFIG_FB_SIS_300	/* 300 series */
 
 	if(SiS_Pr->SiS_IF_DEF_CH70xx == 1) {
 	   SiS_SetCH700x(SiS_Pr,0x0E,0x09);
@@ -4171,11 +4101,11 @@
 	   SiS_SetRegSR11ANDOR(SiS_Pr,0xFB,0x04);
 	}
 
-#endif  /* SIS300 */
+#endif  /* CONFIG_FB_SIS_300 */
 
     } else {
 
-#ifdef SIS315H	/* 315 series */
+#ifdef CONFIG_FB_SIS_315	/* 315 series */
 
 	if(!(SiS_IsNotM650orLater(SiS_Pr))) {
 	   /*if(SiS_Pr->ChipType < SIS_340) { */ /* XGI needs this */
@@ -4288,7 +4218,7 @@
 	   }
         }
 
-#endif  /* SIS315H */
+#endif  /* CONFIG_FB_SIS_315 */
 
     }  /* 315 series */
 
@@ -4304,14 +4234,12 @@
  * from outside the context of a mode switch!
  * MUST call getVBType before calling this
  */
-#ifdef SIS_LINUX_KERNEL
 static
-#endif
 void
 SiS_EnableBridge(struct SiS_Private *SiS_Pr)
 {
   unsigned short temp=0, tempah;
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   unsigned short temp1, pushax=0;
   bool delaylong = false;
 #endif
@@ -4322,7 +4250,7 @@
 
       if(SiS_Pr->ChipType < SIS_315H) {
 
-#ifdef SIS300     /* 300 series */
+#ifdef CONFIG_FB_SIS_300     /* 300 series */
 
 	 if(SiS_CRT2IsLCD(SiS_Pr)) {
 	    if(SiS_Pr->SiS_VBType & VB_SISLVDS) {
@@ -4385,11 +4313,11 @@
 	 }
 
 
-#endif /* SIS300 */
+#endif /* CONFIG_FB_SIS_300 */
 
       } else {
 
-#ifdef SIS315H    /* 315 series */
+#ifdef CONFIG_FB_SIS_315    /* 315 series */
 
 #ifdef SET_EMI
 	 unsigned char   r30=0, r31=0, r32=0, r33=0, cr36=0;
@@ -4688,7 +4616,7 @@
 	    SiS_SetRegAND(SiS_Pr->SiS_Part1Port,0x00,0x7f);
 	 }
 
-#endif /* SIS315H */
+#endif /* CONFIG_FB_SIS_315 */
 
       }
 
@@ -4739,7 +4667,7 @@
 
     if(SiS_Pr->ChipType < SIS_315H) {
 
-#ifdef SIS300    /* 300 series */
+#ifdef CONFIG_FB_SIS_300    /* 300 series */
 
        if(SiS_CRT2IsLCD(SiS_Pr)) {
 	  if(SiS_Pr->ChipType == SIS_730) {
@@ -4783,11 +4711,11 @@
 	  }
        }
 
-#endif  /* SIS300 */
+#endif  /* CONFIG_FB_SIS_300 */
 
     } else {
 
-#ifdef SIS315H    /* 315 series */
+#ifdef CONFIG_FB_SIS_315    /* 315 series */
 
        if(!(SiS_IsNotM650orLater(SiS_Pr))) {
 	  /*if(SiS_Pr->ChipType < SIS_340) {*/  /* XGI needs this */
@@ -4881,7 +4809,7 @@
 	  }
        }
 
-#endif  /* SIS315H */
+#endif  /* CONFIG_FB_SIS_315 */
 
     } /* 310 series */
 
@@ -4971,7 +4899,7 @@
 
       if(SiS_Pr->ChipType < SIS_315H) {
 
-#ifdef SIS300  /* ---- 300 series --- */
+#ifdef CONFIG_FB_SIS_300  /* ---- 300 series --- */
 
 	 if(SiS_Pr->SiS_VBType & VB_SIS30xBLV) {			/* 630 - 301B(-DH) */
 
@@ -5000,11 +4928,11 @@
 
 	 }
 
-#endif /* SIS300 */
+#endif /* CONFIG_FB_SIS_300 */
 
       } else {
 
-#ifdef SIS315H  /* ------- 315 series ------ */
+#ifdef CONFIG_FB_SIS_315  /* ------- 315 series ------ */
 
 	 if(SiS_Pr->SiS_VBType & VB_SISLVDS) {	  		/* 315 - LVDS */
 
@@ -5076,13 +5004,13 @@
 	    }
 
          }
-#endif  /* SIS315H */
+#endif  /* CONFIG_FB_SIS_315 */
       }
    }
 }
 
 /* Set CRT2 FIFO on 300/540/630/730 */
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 static void
 SiS_SetCRT2FIFO_300(struct SiS_Private *SiS_Pr,unsigned short ModeNo)
 {
@@ -5154,13 +5082,8 @@
 
      } else {
 
-#ifdef SIS_LINUX_KERNEL
 	pci50 = sisfb_read_nbridge_pci_dword(SiS_Pr, 0x50);
 	pciA0 = sisfb_read_nbridge_pci_dword(SiS_Pr, 0xa0);
-#else
-	pci50 = pciReadLong(0x00000000, 0x50);
-	pciA0 = pciReadLong(0x00000000, 0xA0);
-#endif
 
         if(SiS_Pr->ChipType == SIS_730) {
 
@@ -5262,7 +5185,7 @@
 #endif
 
 /* Set CRT2 FIFO on 315/330 series */
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static void
 SiS_SetCRT2FIFO_310(struct SiS_Private *SiS_Pr)
 {
@@ -5420,27 +5343,6 @@
 
   temp = SiS_GetRegByte((SiS_Pr->SiS_P3ca+0x02));
   SiS_SetReg(SiS_Pr->SiS_Part1Port,0x1b,temp);			/* ? */
-
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-   xf86DrvMsg(0, X_INFO, "%d %d %d %d  %d %d %d %d  (%d %d %d %d)\n",
-	SiS_Pr->CHDisplay, SiS_Pr->CHSyncStart, SiS_Pr->CHSyncEnd, SiS_Pr->CHTotal,
-	SiS_Pr->CVDisplay, SiS_Pr->CVSyncStart, SiS_Pr->CVSyncEnd, SiS_Pr->CVTotal,
-	SiS_Pr->CHBlankStart, SiS_Pr->CHBlankEnd, SiS_Pr->CVBlankStart, SiS_Pr->CVBlankEnd);
-
-   xf86DrvMsg(0, X_INFO, " {{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,\n",
-	SiS_Pr->CCRT1CRTC[0], SiS_Pr->CCRT1CRTC[1],
-	SiS_Pr->CCRT1CRTC[2], SiS_Pr->CCRT1CRTC[3],
-	SiS_Pr->CCRT1CRTC[4], SiS_Pr->CCRT1CRTC[5],
-	SiS_Pr->CCRT1CRTC[6], SiS_Pr->CCRT1CRTC[7]);
-   xf86DrvMsg(0, X_INFO, "   0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,\n",
-	SiS_Pr->CCRT1CRTC[8], SiS_Pr->CCRT1CRTC[9],
-	SiS_Pr->CCRT1CRTC[10], SiS_Pr->CCRT1CRTC[11],
-	SiS_Pr->CCRT1CRTC[12], SiS_Pr->CCRT1CRTC[13],
-	SiS_Pr->CCRT1CRTC[14], SiS_Pr->CCRT1CRTC[15]);
-   xf86DrvMsg(0, X_INFO, "   0x%02x}},\n", SiS_Pr->CCRT1CRTC[16]);
-#endif
-#endif
 }
 
 /* Setup panel link
@@ -5455,17 +5357,17 @@
   unsigned short push2, tempax, tempbx, tempcx, temp;
   unsigned int   tempeax = 0, tempebx, tempecx, tempvcfact = 0;
   bool islvds = false, issis  = false, chkdclkfirst = false;
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
   unsigned short crt2crtc = 0;
 #endif
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   unsigned short pushcx;
 #endif
 
   if(ModeNo <= 0x13) {
      modeflag = SiS_Pr->SiS_SModeIDTable[ModeIdIndex].St_ModeFlag;
      resinfo = SiS_Pr->SiS_SModeIDTable[ModeIdIndex].St_ResInfo;
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
      crt2crtc = SiS_Pr->SiS_SModeIDTable[ModeIdIndex].St_CRT2CRTC;
 #endif
   } else if(SiS_Pr->UseCustomMode) {
@@ -5473,7 +5375,7 @@
   } else {
      modeflag = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag;
      resinfo = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_RESINFO;
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
      crt2crtc = SiS_Pr->SiS_RefIndex[RefreshRateTableIndex].Ext_CRT2CRTC;
 #endif
   }
@@ -5494,7 +5396,7 @@
      }
   }
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   if((SiS_Pr->ChipType >= SIS_315H) && (SiS_Pr->SiS_VBInfo & SetCRT2ToLCDA)) {
      if(IS_SIS330) {
         SiS_SetRegOR(SiS_Pr->SiS_Part1Port,0x2D,0x10);
@@ -5744,7 +5646,7 @@
 
   if(SiS_Pr->ChipType < SIS_315H) {
 
-#ifdef SIS300      /* 300 series */
+#ifdef CONFIG_FB_SIS_300      /* 300 series */
      tempeax = SiS_Pr->SiS_VGAVDE << 6;
      temp = (tempeax % (unsigned int)SiS_Pr->SiS_VDE);
      tempeax = tempeax / (unsigned int)SiS_Pr->SiS_VDE;
@@ -5755,11 +5657,11 @@
      temp = (unsigned short)(tempeax & 0x00FF);
      SiS_SetReg(SiS_Pr->SiS_Part1Port,0x1E,temp);      	/* BPLVCFACT */
      tempvcfact = temp;
-#endif /* SIS300 */
+#endif /* CONFIG_FB_SIS_300 */
 
   } else {
 
-#ifdef SIS315H  /* 315 series */
+#ifdef CONFIG_FB_SIS_315  /* 315 series */
      tempeax = SiS_Pr->SiS_VGAVDE << 18;
      tempebx = SiS_Pr->SiS_VDE;
      temp = (tempeax % tempebx);
@@ -5845,7 +5747,7 @@
   temp = (unsigned short)(tempecx & 0x00FF);
   SiS_SetReg(SiS_Pr->SiS_Part1Port,0x23,temp);
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   if(SiS_Pr->ChipType >= SIS_315H) {
      if(SiS_Pr->SiS_VBInfo & SetCRT2ToLCDA) {
         if((islvds) || (SiS_Pr->SiS_VBInfo & VB_SISLVDS)) {
@@ -5863,7 +5765,7 @@
   }
 #endif
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
   if(SiS_Pr->SiS_IF_DEF_TRUMPION) {
      unsigned char *ROMAddr = SiS_Pr->VirtualRomBase;
      unsigned char *trumpdata;
@@ -5899,7 +5801,7 @@
   }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   if(SiS_Pr->SiS_IF_DEF_FSTN || SiS_Pr->SiS_IF_DEF_DSTN) {
      SiS_SetReg(SiS_Pr->SiS_Part1Port,0x25,0x00);
      SiS_SetReg(SiS_Pr->SiS_Part1Port,0x26,0x00);
@@ -5999,7 +5901,7 @@
         SiS_SetReg(SiS_Pr->SiS_Part1Port,0x45,0x0a);
      }
   }
-#endif  /* SIS315H */
+#endif  /* CONFIG_FB_SIS_315 */
 }
 
 /* Set Part 1 */
@@ -6007,12 +5909,12 @@
 SiS_SetGroup1(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex,
 		unsigned short RefreshRateTableIndex)
 {
-#if defined(SIS300) || defined(SIS315H)
+#if defined(CONFIG_FB_SIS_300) || defined(CONFIG_FB_SIS_315)
   unsigned char   *ROMAddr = SiS_Pr->VirtualRomBase;
 #endif
   unsigned short  temp=0, tempax=0, tempbx=0, tempcx=0, bridgeadd=0;
   unsigned short  pushbx=0, CRT1Index=0, modeflag, resinfo=0;
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   unsigned short  tempbl=0;
 #endif
 
@@ -6038,11 +5940,11 @@
          (SiS_Pr->SiS_VBInfo & SetInSlaveMode)) ) {
 
      if(SiS_Pr->ChipType < SIS_315H ) {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 	SiS_SetCRT2FIFO_300(SiS_Pr, ModeNo);
 #endif
      } else {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	SiS_SetCRT2FIFO_310(SiS_Pr);
 #endif
      }
@@ -6051,7 +5953,7 @@
 
      if(SiS_Pr->ChipType < SIS_315H ) {
 
-#ifdef SIS300   /* ------------- 300 series --------------*/
+#ifdef CONFIG_FB_SIS_300   /* ------------- 300 series --------------*/
 
 	temp = (SiS_Pr->SiS_VGAHT - 1) & 0x0FF;   		  /* BTVGA2HT 0x08,0x09 */
 	SiS_SetReg(SiS_Pr->SiS_Part1Port,0x08,temp);              /* CRT2 Horizontal Total */
@@ -6070,11 +5972,11 @@
 
 	bridgeadd = 12;
 
-#endif /* SIS300 */
+#endif /* CONFIG_FB_SIS_300 */
 
      } else {
 
-#ifdef SIS315H  /* ------------------- 315/330 series --------------- */
+#ifdef CONFIG_FB_SIS_315  /* ------------------- 315/330 series --------------- */
 
 	tempcx = SiS_Pr->SiS_VGAHT;				  /* BTVGA2HT 0x08,0x09 */
 	if(modeflag & HalfDCLK) {
@@ -6125,7 +6027,7 @@
 	   }
         }
 
-#endif  /* SIS315H */
+#endif  /* CONFIG_FB_SIS_315 */
 
      }  /* 315/330 series */
 
@@ -6256,7 +6158,7 @@
 
      if(SiS_Pr->ChipType < SIS_315H) {
 
-#ifdef SIS300  /* ---------- 300 series -------------- */
+#ifdef CONFIG_FB_SIS_300  /* ---------- 300 series -------------- */
 
 	if(SiS_Pr->SiS_VBType & VB_SISVB) {
 	   temp = 0x20;
@@ -6310,11 +6212,11 @@
 
 	SiS_SetRegANDOR(SiS_Pr->SiS_Part1Port,0x13,~0x3C,temp);   /* Panel Link Delay Compensation; (Software Command Reset; Power Saving) */
 
-#endif  /* SIS300 */
+#endif  /* CONFIG_FB_SIS_300 */
 
      } else {
 
-#ifdef SIS315H   /* --------------- 315/330 series ---------------*/
+#ifdef CONFIG_FB_SIS_315   /* --------------- 315/330 series ---------------*/
 
 	if(SiS_Pr->ChipType < SIS_661) {
 
@@ -6349,7 +6251,7 @@
 	if(modeflag & HalfDCLK)       tempax |= 0x40;
 	SiS_SetRegANDOR(SiS_Pr->SiS_Part1Port,0x2C,0x3f,tempax);
 
-#endif  /* SIS315H */
+#endif  /* CONFIG_FB_SIS_315 */
 
      }
 
@@ -6381,7 +6283,7 @@
 /*         SET PART 2 REGISTER GROUP         */
 /*********************************************/
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static unsigned char *
 SiS_GetGroup2CLVXPtr(struct SiS_Private *SiS_Pr, int tabletype)
 {
@@ -6478,7 +6380,7 @@
 }
 #endif
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 static void
 SiS_Group2LCDSpecial(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short crt2crtc)
 {
@@ -6690,7 +6592,7 @@
   unsigned int   longtemp, PhaseIndex;
   bool           newtvphase;
   const unsigned char *TimingPoint;
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   unsigned short resindex, CRT2Index;
   const struct SiS_Part2PortTbl *CRT2Part2Ptr = NULL;
 
@@ -7069,7 +6971,7 @@
   SiS_SetRegAND(SiS_Pr->SiS_Part2Port,0x17,0xFB);
   SiS_SetRegAND(SiS_Pr->SiS_Part2Port,0x18,0xDF);
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   if(SiS_GetCRT2Part2Ptr(SiS_Pr, ModeNo, ModeIdIndex, RefreshRateTableIndex,
                           			&CRT2Index, &resindex)) {
       switch(CRT2Index) {
@@ -7130,12 +7032,6 @@
 
     /* Non-expanding: lcdvdes = tempcx = VT-1; lcdvdee = tempbx = VDE-1 */
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-    xf86DrvMsg(0, X_INFO, "lcdvdes 0x%x lcdvdee 0x%x\n", tempcx, tempbx);
-#endif
-#endif
-
     SiS_SetReg(SiS_Pr->SiS_Part2Port,0x05,tempcx);	/* lcdvdes  */
     SiS_SetReg(SiS_Pr->SiS_Part2Port,0x06,tempbx);	/* lcdvdee  */
 
@@ -7184,12 +7080,6 @@
        tempbx = SiS_Pr->CVSyncStart;
     }
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-    xf86DrvMsg(0, X_INFO, "lcdvrs 0x%x\n", tempbx);
-#endif
-#endif
-
     SiS_SetReg(SiS_Pr->SiS_Part2Port,0x04,tempbx);	    /* lcdvrs */
 
     temp = (tempbx >> 4) & 0xF0;
@@ -7201,15 +7091,9 @@
        temp |= (SiS_Pr->CVSyncEnd & 0x0f);
     }
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-    xf86DrvMsg(0, X_INFO, "lcdvre[3:0] 0x%x\n", (temp & 0x0f));
-#endif
-#endif
-
     SiS_SetReg(SiS_Pr->SiS_Part2Port,0x01,temp);
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
     SiS_Group2LCDSpecial(SiS_Pr, ModeNo, crt2crtc);
 #endif
 
@@ -7245,12 +7129,6 @@
        tempax >>= 1;
     }
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-    xf86DrvMsg(0, X_INFO, "lcdhdee 0x%x\n", tempbx);
-#endif
-#endif
-
     tempbx += bridgeoffset;
 
     SiS_SetReg(SiS_Pr->SiS_Part2Port,0x23,tempbx);	    /* lcdhdee */
@@ -7276,12 +7154,6 @@
        tempbx += bridgeoffset;
     }
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-    xf86DrvMsg(0, X_INFO, "lcdhrs 0x%x\n", tempbx);
-#endif
-#endif
-
     SiS_SetReg(SiS_Pr->SiS_Part2Port,0x1C,tempbx);	    /* lcdhrs */
     SiS_SetRegANDOR(SiS_Pr->SiS_Part2Port,0x1D,0x0F,((tempbx >> 4) & 0xf0));
 
@@ -7300,20 +7172,14 @@
        tempbx += bridgeoffset;
     }
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-    xf86DrvMsg(0, X_INFO, "lcdhre 0x%x\n", tempbx);
-#endif
-#endif
-
     SiS_SetReg(SiS_Pr->SiS_Part2Port,0x21,tempbx);	    /* lcdhre */
 
     SiS_SetGroup2_Tail(SiS_Pr, ModeNo);
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
     SiS_Set300Part2Regs(SiS_Pr, ModeIdIndex, RefreshRateTableIndex, ModeNo);
 #endif
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
   } /* CRT2-LCD from table */
 #endif
 }
@@ -7382,7 +7248,7 @@
 /*         SET PART 4 REGISTER GROUP         */
 /*********************************************/
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 #if 0
 static void
 SiS_ShiftXPos(struct SiS_Private *SiS_Pr, int shift)
@@ -8011,7 +7877,7 @@
 
    if(SiS_Pr->SiS_IF_DEF_CH70xx == 1) {
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 
       /* Chrontel 7005 - I assume that it does not come with a 315 series chip */
 
@@ -8124,7 +7990,7 @@
 
       /* Chrontel 7019 - assumed that it does not come with a 300 series chip */
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 
       unsigned short temp;
 
@@ -8175,7 +8041,7 @@
 
 }
 
-#ifdef SIS315H  /* ----------- 315 series only ---------- */
+#ifdef CONFIG_FB_SIS_315  /* ----------- 315 series only ---------- */
 
 void
 SiS_Chrontel701xBLOn(struct SiS_Private *SiS_Pr)
@@ -8657,7 +8523,7 @@
 bool
 SiS_SetCRT2Group(struct SiS_Private *SiS_Pr, unsigned short ModeNo)
 {
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
    unsigned char  *ROMAddr  = SiS_Pr->VirtualRomBase;
 #endif
    unsigned short ModeIdIndex, RefreshRateTableIndex;
@@ -8703,16 +8569,6 @@
       SiS_GetLVDSDesData(SiS_Pr, ModeNo, ModeIdIndex, RefreshRateTableIndex);
    }
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-  xf86DrvMsg(0, X_INFO, "(init301: LCDHDES 0x%03x LCDVDES 0x%03x)\n", SiS_Pr->SiS_LCDHDES, SiS_Pr->SiS_LCDVDES);
-  xf86DrvMsg(0, X_INFO, "(init301: HDE     0x%03x VDE     0x%03x)\n", SiS_Pr->SiS_HDE, SiS_Pr->SiS_VDE);
-  xf86DrvMsg(0, X_INFO, "(init301: VGAHDE  0x%03x VGAVDE  0x%03x)\n", SiS_Pr->SiS_VGAHDE, SiS_Pr->SiS_VGAVDE);
-  xf86DrvMsg(0, X_INFO, "(init301: HT      0x%03x VT      0x%03x)\n", SiS_Pr->SiS_HT, SiS_Pr->SiS_VT);
-  xf86DrvMsg(0, X_INFO, "(init301: VGAHT   0x%03x VGAVT   0x%03x)\n", SiS_Pr->SiS_VGAHT, SiS_Pr->SiS_VGAVT);
-#endif
-#endif
-
    if(SiS_Pr->SiS_SetFlag & LowModeTests) {
       SiS_SetGroup1(SiS_Pr, ModeNo, ModeIdIndex, RefreshRateTableIndex);
    }
@@ -8722,12 +8578,12 @@
       if(SiS_Pr->SiS_SetFlag & LowModeTests) {
 
 	 SiS_SetGroup2(SiS_Pr, ModeNo, ModeIdIndex, RefreshRateTableIndex);
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	 SiS_SetGroup2_C_ELV(SiS_Pr, ModeNo, ModeIdIndex, RefreshRateTableIndex);
 #endif
 	 SiS_SetGroup3(SiS_Pr, ModeNo, ModeIdIndex);
 	 SiS_SetGroup4(SiS_Pr, ModeNo, ModeIdIndex, RefreshRateTableIndex);
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 	 SiS_SetGroup4_C_ELV(SiS_Pr, ModeNo, ModeIdIndex);
 #endif
 	 SiS_SetGroup5(SiS_Pr, ModeNo, ModeIdIndex);
@@ -8758,7 +8614,7 @@
 	 if(SiS_Pr->SiS_IF_DEF_CH70xx != 0) {
 	    if(SiS_Pr->SiS_VBInfo & (SetCRT2ToLCD | SetCRT2ToLCDA)) {
 	       if(SiS_Pr->SiS_IF_DEF_CH70xx == 2) {
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 		  SiS_SetCH701xForLCD(SiS_Pr);
 #endif
 	       }
@@ -8771,7 +8627,7 @@
 
    }
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
    if(SiS_Pr->ChipType < SIS_315H) {
       if(SiS_Pr->SiS_SetFlag & LowModeTests) {
 	 if(SiS_Pr->SiS_UseOEM) {
@@ -8794,7 +8650,7 @@
    }
 #endif
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
    if(SiS_Pr->ChipType >= SIS_315H) {
       if(SiS_Pr->SiS_SetFlag & LowModeTests) {
 	 if(SiS_Pr->ChipType < SIS_661) {
@@ -8873,7 +8729,7 @@
   }
 }
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 static unsigned char *
 SiS_SetTrumpBlockLoop(struct SiS_Private *SiS_Pr, unsigned char *dataptr)
 {
@@ -8923,11 +8779,6 @@
      dataptr = SiS_SetTrumpBlockLoop(SiS_Pr, dataptr);
      if(!dataptr) return false;
   }
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-  xf86DrvMsg(0, X_INFO, "Trumpion block success\n");
-#endif
-#endif
   return true;
 }
 #endif
@@ -9002,9 +8853,7 @@
   SiS_SetChReg(SiS_Pr, reg, val, 0);
 }
 
-#ifdef SIS_LINUX_KERNEL
 static
-#endif
 void
 SiS_SetCH70xx(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val)
 {
@@ -9091,9 +8940,7 @@
 
 /* Read from Chrontel 70xx */
 /* Parameter is [Register no (S7-S0)] */
-#ifdef SIS_LINUX_KERNEL
 static
-#endif
 unsigned short
 SiS_GetCH70xx(struct SiS_Private *SiS_Pr, unsigned short tempbx)
 {
@@ -9114,9 +8961,7 @@
 }
 
 /* Our own DDC functions */
-#ifndef SIS_XORG_XF86
 static
-#endif
 unsigned short
 SiS_InitDDCRegs(struct SiS_Private *SiS_Pr, unsigned int VBFlags, int VGAEngine,
                 unsigned short adaptnum, unsigned short DDCdatatype, bool checkcr32,
@@ -9224,12 +9069,6 @@
 
     SiS_SetupDDCN(SiS_Pr);
 
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-    xf86DrvMsg(0, X_INFO, "DDC Port %x Index %x Shift %d\n",
-    		SiS_Pr->SiS_DDC_Port, SiS_Pr->SiS_DDC_Index, temp);
-#endif
-#endif
     return 0;
 }
 
@@ -9292,11 +9131,6 @@
     SiS_SetSwitchDDC2(SiS_Pr);
     if(SiS_PrepareDDC(SiS_Pr)) {
          SiS_SetStop(SiS_Pr);
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-         xf86DrvMsg(0, X_INFO, "Probe: Prepare failed\n");
-#endif
-#endif
          return 0xFFFF;
     }
     mask = 0xf0;
@@ -9310,11 +9144,6 @@
        } else {
            failed = true;
 	   ret = 0xFFFF;
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-           xf86DrvMsg(0, X_INFO, "Probe: Read 1 failed\n");
-#endif
-#endif
        }
     }
     if(!failed) {
@@ -9324,11 +9153,6 @@
        if(temp == value) ret = 0;
        else {
           ret = 0xFFFF;
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-          xf86DrvMsg(0, X_INFO, "Probe: Read 2 failed\n");
-#endif
-#endif
           if(SiS_Pr->SiS_DDC_DeviceAddr == 0xa0) {
              if(temp == 0x30) ret = 0;
           }
@@ -9338,9 +9162,7 @@
     return ret;
 }
 
-#ifndef SIS_XORG_XF86
 static
-#endif
 unsigned short
 SiS_ProbeDDC(struct SiS_Private *SiS_Pr)
 {
@@ -9357,9 +9179,7 @@
    return flag;
 }
 
-#ifndef SIS_XORG_XF86
 static
-#endif
 unsigned short
 SiS_ReadDDC(struct SiS_Private *SiS_Pr, unsigned short DDCdatatype, unsigned char *buffer)
 {
@@ -9606,11 +9426,6 @@
     temp = SiS_GetReg(SiS_Pr->SiS_DDC_Port,SiS_Pr->SiS_DDC_Index);
   } while((!(temp & SiS_Pr->SiS_DDC_Clk)) && --watchdog);
   if (!watchdog) {
-#ifdef SIS_XORG_XF86
-#ifdef TWDEBUG
-        xf86DrvMsg(0, X_INFO, "SetClkHigh failed\n");
-#endif
-#endif
   	return 0xFFFF;
   }
   SiS_DDC2Delay(SiS_Pr,SiS_I2CDELAYSHORT);
@@ -9641,7 +9456,7 @@
 
 /* =============== SiS 315/330 O.E.M. ================= */
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 
 static unsigned short
 GetRAMDACromptr(struct SiS_Private *SiS_Pr)
@@ -10829,7 +10644,7 @@
 
 /*  =================  SiS 300 O.E.M. ================== */
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 
 static void
 SetOEMLCDData2(struct SiS_Private *SiS_Pr, unsigned short ModeNo,unsigned short ModeIdIndex,
diff --git a/drivers/video/sis/init301.h b/drivers/video/sis/init301.h
index 51d9922..e1fd31d 100644
--- a/drivers/video/sis/init301.h
+++ b/drivers/video/sis/init301.h
@@ -53,15 +53,8 @@
 #ifndef  _INIT301_H_
 #define  _INIT301_H_
 
-#include "osdef.h"
 #include "initdef.h"
 
-#ifdef SIS_XORG_XF86
-#include "sis.h"
-#include "sis_regs.h"
-#endif
-
-#ifdef SIS_LINUX_KERNEL
 #include "vgatypes.h"
 #include "vstruct.h"
 #ifdef SIS_CP
@@ -72,7 +65,6 @@
 #include <linux/fb.h>
 #include "sis.h"
 #include <video/sisfb.h>
-#endif
 
 static const unsigned char SiS_YPbPrTable[3][64] = {
   {
@@ -237,7 +229,7 @@
     0xFF,0xFF,
 };
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 /* 661 et al LCD data structure (2.03.00) */
 static const unsigned char SiS_LCDStruct661[] = {
     /* 1024x768 */
@@ -279,7 +271,7 @@
 };
 #endif
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 static unsigned char SiS300_TrumpionData[14][80] = {
   { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
     0x20,0x03,0x0B,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x10,0x00,0x00,0x04,0x23,
@@ -356,9 +348,6 @@
 #endif
 
 void		SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
-#ifndef SIS_LINUX_KERNEL
-void		SiS_LockCRT2(struct SiS_Private *SiS_Pr);
-#endif
 void		SiS_EnableCRT2(struct SiS_Private *SiS_Pr);
 unsigned short	SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
 void		SiS_WaitRetrace1(struct SiS_Private *SiS_Pr);
@@ -375,9 +364,6 @@
 			unsigned short RefreshRateTableIndex);
 unsigned short	SiS_GetResInfo(struct SiS_Private *SiS_Pr,unsigned short ModeNo,unsigned short ModeIdIndex);
 void		SiS_DisableBridge(struct SiS_Private *SiS_Pr);
-#ifndef SIS_LINUX_KERNEL
-void		SiS_EnableBridge(struct SiS_Private *SiS_Pr);
-#endif
 bool		SiS_SetCRT2Group(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
 void		SiS_SiS30xBLOn(struct SiS_Private *SiS_Pr);
 void		SiS_SiS30xBLOff(struct SiS_Private *SiS_Pr);
@@ -386,13 +372,9 @@
 unsigned short	SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short tempax);
 void		SiS_SetCH701x(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
 unsigned short	SiS_GetCH701x(struct SiS_Private *SiS_Pr, unsigned short tempax);
-#ifndef SIS_LINUX_KERNEL
-void		SiS_SetCH70xx(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
-unsigned short	SiS_GetCH70xx(struct SiS_Private *SiS_Pr, unsigned short tempax);
-#endif
 void		SiS_SetCH70xxANDOR(struct SiS_Private *SiS_Pr, unsigned short reg,
 			unsigned char orval,unsigned short andval);
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static void	SiS_Chrontel701xOn(struct SiS_Private *SiS_Pr);
 static void	SiS_Chrontel701xOff(struct SiS_Private *SiS_Pr);
 static void	SiS_ChrontelInitTVVSync(struct SiS_Private *SiS_Pr);
@@ -401,7 +383,7 @@
 void		SiS_Chrontel701xBLOff(struct SiS_Private *SiS_Pr);
 #endif /* 315 */
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 static  bool	SiS_SetTrumpionBlock(struct SiS_Private *SiS_Pr, unsigned char *dataptr);
 void		SiS_SetChrontelGPIO(struct SiS_Private *SiS_Pr, unsigned short myvbinfo);
 #endif
@@ -412,21 +394,12 @@
 			unsigned short adaptnum, unsigned short DDCdatatype,
 			unsigned char *buffer, unsigned int VBFlags2);
 
-#ifdef SIS_XORG_XF86
-unsigned short		SiS_InitDDCRegs(struct SiS_Private *SiS_Pr, unsigned int VBFlags,
-				int VGAEngine, unsigned short adaptnum, unsigned short DDCdatatype,
-				bool checkcr32, unsigned int VBFlags2);
-unsigned short		SiS_ProbeDDC(struct SiS_Private *SiS_Pr);
-unsigned short		SiS_ReadDDC(struct SiS_Private *SiS_Pr, unsigned short DDCdatatype,
-				unsigned char *buffer);
-#else
 static unsigned short	SiS_InitDDCRegs(struct SiS_Private *SiS_Pr, unsigned int VBFlags,
 				int VGAEngine, unsigned short adaptnum, unsigned short DDCdatatype,
 				bool checkcr32, unsigned int VBFlags2);
 static unsigned short	SiS_ProbeDDC(struct SiS_Private *SiS_Pr);
 static unsigned short	SiS_ReadDDC(struct SiS_Private *SiS_Pr, unsigned short DDCdatatype,
 				unsigned char *buffer);
-#endif
 static void		SiS_SetSwitchDDC2(struct SiS_Private *SiS_Pr);
 static unsigned short	SiS_SetStart(struct SiS_Private *SiS_Pr);
 static unsigned short	SiS_SetStop(struct SiS_Private *SiS_Pr);
@@ -441,13 +414,13 @@
 static void		SiS_SendACK(struct SiS_Private *SiS_Pr, unsigned short yesno);
 static unsigned short	SiS_DoProbeDDC(struct SiS_Private *SiS_Pr);
 
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 static void		SiS_OEM300Setting(struct SiS_Private *SiS_Pr,
 				unsigned short ModeNo, unsigned short ModeIdIndex, unsigned short RefTabindex);
 static void		SetOEMLCDData2(struct SiS_Private *SiS_Pr,
 				unsigned short ModeNo, unsigned short ModeIdIndex,unsigned short RefTableIndex);
 #endif
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
 static void		SiS_OEM310Setting(struct SiS_Private *SiS_Pr,
 				unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
 static void		SiS_OEM661Setting(struct SiS_Private *SiS_Pr,
@@ -482,15 +455,13 @@
 extern void		SiS_CalcCRRegisters(struct SiS_Private *SiS_Pr, int depth);
 extern unsigned short	SiS_GetRefCRTVCLK(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
 extern unsigned short	SiS_GetRefCRT1CRTC(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
-#ifdef SIS300
+#ifdef CONFIG_FB_SIS_300
 extern void		SiS_GetFIFOThresholdIndex300(struct SiS_Private *SiS_Pr, unsigned short *tempbx,
 				unsigned short *tempcl);
 extern unsigned short	SiS_GetFIFOThresholdB300(unsigned short tempbx, unsigned short tempcl);
 extern unsigned short	SiS_GetLatencyFactor630(struct SiS_Private *SiS_Pr, unsigned short index);
-#ifdef SIS_LINUX_KERNEL
 extern unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
 extern unsigned int	sisfb_read_lpc_pci_dword(struct SiS_Private *SiS_Pr, int reg);
 #endif
-#endif
 
 #endif
diff --git a/drivers/video/sis/initextlfb.c b/drivers/video/sis/initextlfb.c
index 99c04a4..9dec64d 100644
--- a/drivers/video/sis/initextlfb.c
+++ b/drivers/video/sis/initextlfb.c
@@ -25,7 +25,6 @@
  * Author:	Thomas Winischhofer <thomas@winischhofer.net>
  */
 
-#include "osdef.h"
 #include "initdef.h"
 #include "vgatypes.h"
 #include "vstruct.h"
@@ -59,7 +58,7 @@
 
     if(rateindex > 0) rateindex--;
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
     switch(ModeNo) {
     case 0x5a: ModeNo = 0x50; break;
     case 0x5b: ModeNo = 0x56;
@@ -103,7 +102,7 @@
 
     if(rateindex > 0) rateindex--;
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
     switch(ModeNo) {
        case 0x5a: ModeNo = 0x50; break;
        case 0x5b: ModeNo = 0x56;
@@ -187,7 +186,7 @@
 
     if(rateindex > 0) rateindex--;
 
-#ifdef SIS315H
+#ifdef CONFIG_FB_SIS_315
     switch(ModeNo) {
        case 0x5a: ModeNo = 0x50; break;
        case 0x5b: ModeNo = 0x56;
diff --git a/drivers/video/sis/osdef.h b/drivers/video/sis/osdef.h
deleted file mode 100644
index 6ff8f98..0000000
--- a/drivers/video/sis/osdef.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/* $XFree86$ */
-/* $XdotOrg$ */
-/*
- * OS depending defines
- *
- * Copyright (C) 2001-2005 by Thomas Winischhofer, Vienna, Austria
- *
- * If distributed as part of the Linux kernel, the following license terms
- * apply:
- *
- * * This program is free software; you can redistribute it and/or modify
- * * it under the terms of the GNU General Public License as published by
- * * the Free Software Foundation; either version 2 of the named License,
- * * or any later version.
- * *
- * * This program is distributed in the hope that it will be useful,
- * * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * * GNU General Public License for more details.
- * *
- * * You should have received a copy of the GNU General Public License
- * * along with this program; if not, write to the Free Software
- * * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
- *
- * Otherwise, the following license terms apply:
- *
- * * Redistribution and use in source and binary forms, with or without
- * * modification, are permitted provided that the following conditions
- * * are met:
- * * 1) Redistributions of source code must retain the above copyright
- * *    notice, this list of conditions and the following disclaimer.
- * * 2) Redistributions in binary form must reproduce the above copyright
- * *    notice, this list of conditions and the following disclaimer in the
- * *    documentation and/or other materials provided with the distribution.
- * * 3) The name of the author may not be used to endorse or promote products
- * *    derived from this software without specific prior written permission.
- * *
- * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: 	Thomas Winischhofer <thomas@winischhofer.net>
- *		Silicon Integrated Systems, Inc. (used by permission)
- *
- */
-
-#ifndef _SIS_OSDEF_H_
-#define _SIS_OSDEF_H_
-
-/* The choices are: */
-#define SIS_LINUX_KERNEL		/* Linux kernel framebuffer */
-#undef  SIS_XORG_XF86			/* XFree86/X.org */
-
-#ifdef OutPortByte
-#undef OutPortByte
-#endif
-
-#ifdef OutPortWord
-#undef OutPortWord
-#endif
-
-#ifdef OutPortLong
-#undef OutPortLong
-#endif
-
-#ifdef InPortByte
-#undef InPortByte
-#endif
-
-#ifdef InPortWord
-#undef InPortWord
-#endif
-
-#ifdef InPortLong
-#undef InPortLong
-#endif
-
-/**********************************************************************/
-/*  LINUX KERNEL                                                      */
-/**********************************************************************/
-
-#ifdef SIS_LINUX_KERNEL
-
-#ifdef CONFIG_FB_SIS_300
-#define SIS300
-#endif
-
-#ifdef CONFIG_FB_SIS_315
-#define SIS315H
-#endif
-
-#if !defined(SIS300) && !defined(SIS315H)
-#warning Neither CONFIG_FB_SIS_300 nor CONFIG_FB_SIS_315 is set
-#warning sisfb will not work!
-#endif
-
-#define OutPortByte(p,v) outb((u8)(v),(SISIOADDRESS)(p))
-#define OutPortWord(p,v) outw((u16)(v),(SISIOADDRESS)(p))
-#define OutPortLong(p,v) outl((u32)(v),(SISIOADDRESS)(p))
-#define InPortByte(p)    inb((SISIOADDRESS)(p))
-#define InPortWord(p)    inw((SISIOADDRESS)(p))
-#define InPortLong(p)    inl((SISIOADDRESS)(p))
-#define SiS_SetMemory(MemoryAddress,MemorySize,value) memset_io(MemoryAddress, value, MemorySize)
-
-#endif /* LINUX_KERNEL */
-
-/**********************************************************************/
-/*  XFree86/X.org                                                    */
-/**********************************************************************/
-
-#ifdef SIS_XORG_XF86
-
-#define SIS300
-#define SIS315H
-
-#define OutPortByte(p,v) outSISREG((IOADDRESS)(p),(CARD8)(v))
-#define OutPortWord(p,v) outSISREGW((IOADDRESS)(p),(CARD16)(v))
-#define OutPortLong(p,v) outSISREGL((IOADDRESS)(p),(CARD32)(v))
-#define InPortByte(p)    inSISREG((IOADDRESS)(p))
-#define InPortWord(p)    inSISREGW((IOADDRESS)(p))
-#define InPortLong(p)    inSISREGL((IOADDRESS)(p))
-#define SiS_SetMemory(MemoryAddress,MemorySize,value) memset(MemoryAddress, value, MemorySize)
-
-#endif /* XF86 */
-
-#endif  /* _OSDEF_H_ */
diff --git a/drivers/video/sis/sis.h b/drivers/video/sis/sis.h
index 7c5710e..80d89d3 100644
--- a/drivers/video/sis/sis.h
+++ b/drivers/video/sis/sis.h
@@ -24,7 +24,6 @@
 #ifndef _SIS_H_
 #define _SIS_H_
 
-#include "osdef.h"
 #include <video/sisfb.h>
 
 #include "vgatypes.h"
diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c
index b52f8e4..7e3370f 100644
--- a/drivers/video/sis/sis_main.c
+++ b/drivers/video/sis/sis_main.c
@@ -60,6 +60,11 @@
 #include "sis.h"
 #include "sis_main.h"
 
+#if !defined(CONFIG_FB_SIS_300) && !defined(CONFIG_FB_SIS_315)
+#warning Neither CONFIG_FB_SIS_300 nor CONFIG_FB_SIS_315 is set
+#warning sisfb will not work!
+#endif
+
 static void sisfb_handle_command(struct sis_video_info *ivideo,
 				 struct sisfb_cmd *sisfb_command);
 
@@ -4114,14 +4119,6 @@
 			if(sisfb_check_rom(rom_base, ivideo)) {
 
 				if((myrombase = vmalloc(65536))) {
-
-					/* Work around bug in pci/rom.c: Folks forgot to check
-					 * whether the size retrieved from the BIOS image eventually
-					 * is larger than the mapped size
-					 */
-					if(pci_resource_len(pdev, PCI_ROM_RESOURCE) < romsize)
-						romsize = pci_resource_len(pdev, PCI_ROM_RESOURCE);
-
 					memcpy_fromio(myrombase, rom_base,
 							(romsize > 65536) ? 65536 : romsize);
 				}
@@ -4155,23 +4152,6 @@
 
         }
 
-#else
-
-	pci_read_config_dword(pdev, PCI_ROM_ADDRESS, &temp);
-	pci_write_config_dword(pdev, PCI_ROM_ADDRESS,
-			(ivideo->video_base & PCI_ROM_ADDRESS_MASK) | PCI_ROM_ADDRESS_ENABLE);
-
-	rom_base = ioremap(ivideo->video_base, 65536);
-	if(rom_base) {
-		if(sisfb_check_rom(rom_base, ivideo)) {
-			if((myrombase = vmalloc(65536)))
-				memcpy_fromio(myrombase, rom_base, 65536);
-		}
-		iounmap(rom_base);
-	}
-
-	pci_write_config_dword(pdev, PCI_ROM_ADDRESS, temp);
-
 #endif
 
 	return myrombase;
@@ -4181,6 +4161,9 @@
 sisfb_post_map_vram(struct sis_video_info *ivideo, unsigned int *mapsize,
 			unsigned int min)
 {
+	if (*mapsize < (min << 20))
+		return;
+
 	ivideo->video_vbase = ioremap(ivideo->video_base, (*mapsize));
 
 	if(!ivideo->video_vbase) {
@@ -4514,7 +4497,7 @@
 	} else {
 #endif
 		/* Need to map max FB size for finding out about RAM size */
-		mapsize = 64 << 20;
+		mapsize = ivideo->video_size;
 		sisfb_post_map_vram(ivideo, &mapsize, 4);
 
 		if(ivideo->video_vbase) {
@@ -4680,7 +4663,7 @@
 	orSISIDXREG(SISSR, 0x20, (0x80 | 0x04));
 
 	/* Need to map max FB size for finding out about RAM size */
-	mapsize = 256 << 20;
+	mapsize = ivideo->video_size;
 	sisfb_post_map_vram(ivideo, &mapsize, 32);
 
 	if(!ivideo->video_vbase) {
@@ -5936,6 +5919,7 @@
 	}
 
 	ivideo->video_base = pci_resource_start(pdev, 0);
+	ivideo->video_size = pci_resource_len(pdev, 0);
 	ivideo->mmio_base  = pci_resource_start(pdev, 1);
 	ivideo->mmio_size  = pci_resource_len(pdev, 1);
 	ivideo->SiS_Pr.RelIO = pci_resource_start(pdev, 2) + 0x30;
diff --git a/drivers/video/sis/vgatypes.h b/drivers/video/sis/vgatypes.h
index 81a22ea..12c0dfa 100644
--- a/drivers/video/sis/vgatypes.h
+++ b/drivers/video/sis/vgatypes.h
@@ -55,21 +55,10 @@
 
 #define SISIOMEMTYPE
 
-#ifdef SIS_LINUX_KERNEL
 typedef unsigned long SISIOADDRESS;
 #include <linux/types.h>  /* Need __iomem */
 #undef SISIOMEMTYPE
 #define SISIOMEMTYPE __iomem
-#endif
-
-#ifdef SIS_XORG_XF86
-#if XF86_VERSION_CURRENT < XF86_VERSION_NUMERIC(4,2,0,0,0)
-typedef unsigned long IOADDRESS;
-typedef unsigned long SISIOADDRESS;
-#else
-typedef IOADDRESS SISIOADDRESS;
-#endif
-#endif
 
 typedef enum _SIS_CHIP_TYPE {
     SIS_VGALegacy = 0,
diff --git a/drivers/video/sis/vstruct.h b/drivers/video/sis/vstruct.h
index bef4aae..ea94d21 100644
--- a/drivers/video/sis/vstruct.h
+++ b/drivers/video/sis/vstruct.h
@@ -233,24 +233,15 @@
 {
 	unsigned char			ChipType;
 	unsigned char			ChipRevision;
-#ifdef SIS_XORG_XF86
-	PCITAG				PciTag;
-#endif
-#ifdef SIS_LINUX_KERNEL
 	void				*ivideo;
-#endif
 	unsigned char 			*VirtualRomBase;
 	bool				UseROM;
-#ifdef SIS_LINUX_KERNEL
 	unsigned char SISIOMEMTYPE	*VideoMemoryAddress;
 	unsigned int			VideoMemorySize;
-#endif
 	SISIOADDRESS			IOAddress;
 	SISIOADDRESS			IOAddress2;  /* For dual chip XGI volari */
 
-#ifdef SIS_LINUX_KERNEL
 	SISIOADDRESS			RelIO;
-#endif
 	SISIOADDRESS			SiS_P3c4;
 	SISIOADDRESS			SiS_P3d4;
 	SISIOADDRESS			SiS_P3c0;
@@ -280,9 +271,6 @@
 	unsigned short			SiS_IF_DEF_FSTN;
 	unsigned short			SiS_SysFlags;
 	unsigned char			SiS_VGAINFO;
-#ifdef SIS_XORG_XF86
-	unsigned short			SiS_CP1, SiS_CP2, SiS_CP3, SiS_CP4;
-#endif
 	bool				SiS_UseROM;
 	bool				SiS_ROMNew;
 	bool				SiS_XGIROM;
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3a43ebf..efb35aa 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -9,19 +9,19 @@
 			   struct device_attribute *attr, char *buf)
 {
 	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
-	return sprintf(buf, "%hu", dev->id.device);
+	return sprintf(buf, "0x%04x\n", dev->id.device);
 }
 static ssize_t vendor_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
 	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
-	return sprintf(buf, "%hu", dev->id.vendor);
+	return sprintf(buf, "0x%04x\n", dev->id.vendor);
 }
 static ssize_t status_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
 	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
-	return sprintf(buf, "0x%08x", dev->config->get_status(dev));
+	return sprintf(buf, "0x%08x\n", dev->config->get_status(dev));
 }
 static ssize_t modalias_show(struct device *_d,
 			     struct device_attribute *attr, char *buf)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 1475ed6..cc2f73e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -230,9 +230,6 @@
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
 
-	/* If we're indirect, we can fit many (assuming not OOM). */
-	if (vq->indirect)
-		return vq->num_free ? vq->vring.num : 0;
 	return vq->num_free;
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index eb8a78d..533a199 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -8,9 +8,12 @@
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
-obj-$(CONFIG_XEN_DEV_EVTCHN)	+= evtchn.o
+obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
 obj-$(CONFIG_XENFS)		+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
 obj-$(CONFIG_SWIOTLB_XEN)	+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)		+= pci.o
+
+xen-evtchn-y			:= evtchn.o
+
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 500290b..2b17ad5 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -50,6 +50,7 @@
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
+#include <asm/e820.h>
 
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
@@ -119,7 +120,7 @@
 }
 
 /* balloon_append: add the given page to the balloon. */
-static void balloon_append(struct page *page)
+static void __balloon_append(struct page *page)
 {
 	/* Lowmem is re-populated first, so highmem pages go at list tail. */
 	if (PageHighMem(page)) {
@@ -130,7 +131,11 @@
 		list_add(&page->lru, &ballooned_pages);
 		balloon_stats.balloon_low++;
 	}
+}
 
+static void balloon_append(struct page *page)
+{
+	__balloon_append(page);
 	totalram_pages--;
 }
 
@@ -191,7 +196,7 @@
 
 static int increase_reservation(unsigned long nr_pages)
 {
-	unsigned long  pfn, i, flags;
+	unsigned long  pfn, i;
 	struct page   *page;
 	long           rc;
 	struct xen_memory_reservation reservation = {
@@ -203,8 +208,6 @@
 	if (nr_pages > ARRAY_SIZE(frame_list))
 		nr_pages = ARRAY_SIZE(frame_list);
 
-	spin_lock_irqsave(&xen_reservation_lock, flags);
-
 	page = balloon_first_page();
 	for (i = 0; i < nr_pages; i++) {
 		BUG_ON(page == NULL);
@@ -247,14 +250,12 @@
 	balloon_stats.current_pages += rc;
 
  out:
-	spin_unlock_irqrestore(&xen_reservation_lock, flags);
-
 	return rc < 0 ? rc : rc != nr_pages;
 }
 
 static int decrease_reservation(unsigned long nr_pages)
 {
-	unsigned long  pfn, i, flags;
+	unsigned long  pfn, i;
 	struct page   *page;
 	int            need_sleep = 0;
 	int ret;
@@ -292,8 +293,6 @@
 	kmap_flush_unused();
 	flush_tlb_all();
 
-	spin_lock_irqsave(&xen_reservation_lock, flags);
-
 	/* No more mappings: invalidate P2M and add to balloon. */
 	for (i = 0; i < nr_pages; i++) {
 		pfn = mfn_to_pfn(frame_list[i]);
@@ -308,8 +307,6 @@
 
 	balloon_stats.current_pages -= nr_pages;
 
-	spin_unlock_irqrestore(&xen_reservation_lock, flags);
-
 	return need_sleep;
 }
 
@@ -395,7 +392,7 @@
 
 static int __init balloon_init(void)
 {
-	unsigned long pfn;
+	unsigned long pfn, extra_pfn_end;
 	struct page *page;
 
 	if (!xen_pv_domain())
@@ -416,10 +413,15 @@
 	register_balloon(&balloon_sysdev);
 
 	/* Initialise the balloon with excess memory space. */
-	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+	extra_pfn_end = min(e820_end_of_ram_pfn(),
+			    (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
+	for (pfn = PFN_UP(xen_extra_mem_start);
+	     pfn < extra_pfn_end;
+	     pfn++) {
 		page = pfn_to_page(pfn);
-		if (!PageReserved(page))
-			balloon_append(page);
+		/* totalram_pages doesn't include the boot-time
+		   balloon extension, so don't subtract from it. */
+		__balloon_append(page);
 	}
 
 	target_watch.callback = watch_target;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 321a0c8..2811bb9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -278,17 +278,17 @@
 	cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
 #endif
 
-	__clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
-	__set_bit(chn, cpu_evtchn_mask(cpu));
+	clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
+	set_bit(chn, cpu_evtchn_mask(cpu));
 
 	irq_info[irq].cpu = cpu;
 }
 
 static void init_evtchn_cpu_bindings(void)
 {
+	int i;
 #ifdef CONFIG_SMP
 	struct irq_desc *desc;
-	int i;
 
 	/* By default all event channels notify CPU#0. */
 	for_each_irq_desc(i, desc) {
@@ -296,7 +296,10 @@
 	}
 #endif
 
-	memset(cpu_evtchn_mask(0), ~0, sizeof(struct cpu_evtchn_s));
+	for_each_possible_cpu(i)
+		memset(cpu_evtchn_mask(i),
+		       (i == 0) ? ~0 : 0, sizeof(struct cpu_evtchn_s));
+
 }
 
 static inline void clear_evtchn(int port)
@@ -752,7 +755,7 @@
 		goto out;
 
 	if (xen_initial_domain()) {
-		unmap_irq.pirq = info->u.pirq.gsi;
+		unmap_irq.pirq = info->u.pirq.pirq;
 		unmap_irq.domid = DOMID_SELF;
 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
 		if (rc) {
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index fec6ba3..ef11daf 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -69,20 +69,51 @@
 	const char *name;
 };
 
-/* Who's bound to each port? */
-static struct per_user_data *port_user[NR_EVENT_CHANNELS];
+/*
+ * Who's bound to each port?  This is logically an array of struct
+ * per_user_data *, but we encode the current enabled-state in bit 0.
+ */
+static unsigned long *port_user;
 static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
 
-irqreturn_t evtchn_interrupt(int irq, void *data)
+static inline struct per_user_data *get_port_user(unsigned port)
+{
+	return (struct per_user_data *)(port_user[port] & ~1);
+}
+
+static inline void set_port_user(unsigned port, struct per_user_data *u)
+{
+	port_user[port] = (unsigned long)u;
+}
+
+static inline bool get_port_enabled(unsigned port)
+{
+	return port_user[port] & 1;
+}
+
+static inline void set_port_enabled(unsigned port, bool enabled)
+{
+	if (enabled)
+		port_user[port] |= 1;
+	else
+		port_user[port] &= ~1;
+}
+
+static irqreturn_t evtchn_interrupt(int irq, void *data)
 {
 	unsigned int port = (unsigned long)data;
 	struct per_user_data *u;
 
 	spin_lock(&port_user_lock);
 
-	u = port_user[port];
+	u = get_port_user(port);
+
+	WARN(!get_port_enabled(port),
+	     "Interrupt for port %d, but apparently not enabled; per-user %p\n",
+	     port, u);
 
 	disable_irq_nosync(irq);
+	set_port_enabled(port, false);
 
 	if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
 		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
@@ -92,9 +123,8 @@
 			kill_fasync(&u->evtchn_async_queue,
 				    SIGIO, POLL_IN);
 		}
-	} else {
+	} else
 		u->ring_overflow = 1;
-	}
 
 	spin_unlock(&port_user_lock);
 
@@ -198,9 +228,18 @@
 		goto out;
 
 	spin_lock_irq(&port_user_lock);
-	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
-		if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
-			enable_irq(irq_from_evtchn(kbuf[i]));
+
+	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
+		unsigned port = kbuf[i];
+
+		if (port < NR_EVENT_CHANNELS &&
+		    get_port_user(port) == u &&
+		    !get_port_enabled(port)) {
+			set_port_enabled(port, true);
+			enable_irq(irq_from_evtchn(port));
+		}
+	}
+
 	spin_unlock_irq(&port_user_lock);
 
 	rc = count;
@@ -222,8 +261,9 @@
 	 * interrupt handler yet, and our caller has already
 	 * serialized bind operations.)
 	 */
-	BUG_ON(port_user[port] != NULL);
-	port_user[port] = u;
+	BUG_ON(get_port_user(port) != NULL);
+	set_port_user(port, u);
+	set_port_enabled(port, true); /* start enabled */
 
 	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
 				       u->name, (void *)(unsigned long)port);
@@ -239,10 +279,7 @@
 
 	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
 
-	/* make sure we unbind the irq handler before clearing the port */
-	barrier();
-
-	port_user[port] = NULL;
+	set_port_user(port, NULL);
 }
 
 static long evtchn_ioctl(struct file *file,
@@ -333,15 +370,17 @@
 		spin_lock_irq(&port_user_lock);
 
 		rc = -ENOTCONN;
-		if (port_user[unbind.port] != u) {
+		if (get_port_user(unbind.port) != u) {
 			spin_unlock_irq(&port_user_lock);
 			break;
 		}
 
-		evtchn_unbind_from_user(u, unbind.port);
+		disable_irq(irq_from_evtchn(unbind.port));
 
 		spin_unlock_irq(&port_user_lock);
 
+		evtchn_unbind_from_user(u, unbind.port);
+
 		rc = 0;
 		break;
 	}
@@ -355,7 +394,7 @@
 
 		if (notify.port >= NR_EVENT_CHANNELS) {
 			rc = -EINVAL;
-		} else if (port_user[notify.port] != u) {
+		} else if (get_port_user(notify.port) != u) {
 			rc = -ENOTCONN;
 		} else {
 			notify_remote_via_evtchn(notify.port);
@@ -431,7 +470,7 @@
 
 	filp->private_data = u;
 
-	return 0;
+	return nonseekable_open(inode, filp);;
 }
 
 static int evtchn_release(struct inode *inode, struct file *filp)
@@ -444,14 +483,21 @@
 	free_page((unsigned long)u->ring);
 
 	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-		if (port_user[i] != u)
+		if (get_port_user(i) != u)
 			continue;
 
-		evtchn_unbind_from_user(port_user[i], i);
+		disable_irq(irq_from_evtchn(i));
 	}
 
 	spin_unlock_irq(&port_user_lock);
 
+	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+		if (get_port_user(i) != u)
+			continue;
+
+		evtchn_unbind_from_user(get_port_user(i), i);
+	}
+
 	kfree(u->name);
 	kfree(u);
 
@@ -467,12 +513,12 @@
 	.fasync  = evtchn_fasync,
 	.open    = evtchn_open,
 	.release = evtchn_release,
-	.llseek = noop_llseek,
+	.llseek	 = no_llseek,
 };
 
 static struct miscdevice evtchn_miscdev = {
 	.minor        = MISC_DYNAMIC_MINOR,
-	.name         = "evtchn",
+	.name         = "xen/evtchn",
 	.fops         = &evtchn_fops,
 };
 static int __init evtchn_init(void)
@@ -482,8 +528,11 @@
 	if (!xen_domain())
 		return -ENODEV;
 
+	port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
+	if (port_user == NULL)
+		return -ENOMEM;
+
 	spin_lock_init(&port_user_lock);
-	memset(port_user, 0, sizeof(port_user));
 
 	/* Create '/dev/misc/evtchn'. */
 	err = misc_register(&evtchn_miscdev);
@@ -499,6 +548,9 @@
 
 static void __exit evtchn_cleanup(void)
 {
+	kfree(port_user);
+	port_user = NULL;
+
 	misc_deregister(&evtchn_miscdev);
 }
 
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
index f80be7f..dbd3b16 100644
--- a/drivers/xen/xenfs/privcmd.c
+++ b/drivers/xen/xenfs/privcmd.c
@@ -15,7 +15,6 @@
 #include <linux/mman.h>
 #include <linux/uaccess.h>
 #include <linux/swap.h>
-#include <linux/smp_lock.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/seq_file.h>
@@ -266,9 +265,7 @@
 	xen_pfn_t *mfnp = data;
 	struct mmap_batch_state *st = state;
 
-	put_user(*mfnp, st->user++);
-
-	return 0;
+	return put_user(*mfnp, st->user++);
 }
 
 static struct vm_operations_struct privcmd_vm_ops;
@@ -323,10 +320,8 @@
 	up_write(&mm->mmap_sem);
 
 	if (state.err > 0) {
-		ret = 0;
-
 		state.user = m.arr;
-		traverse_pages(m.num, sizeof(xen_pfn_t),
+		ret = traverse_pages(m.num, sizeof(xen_pfn_t),
 			       &pagelist,
 			       mmap_return_errors, &state);
 	}
@@ -384,8 +379,9 @@
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return -ENOSYS;
 
-	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+	/* DONTCOPY is essential for Xen because copy_page_range doesn't know
+	 * how to recreate these mappings */
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
 	vma->vm_ops = &privcmd_vm_ops;
 	vma->vm_private_data = NULL;
 
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index f6339d1..1aa3897 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -12,8 +12,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/magic.h>
-#include <linux/mm.h>
-#include <linux/backing-dev.h>
 
 #include <xen/xen.h>
 
@@ -24,28 +22,12 @@
 MODULE_DESCRIPTION("Xen filesystem");
 MODULE_LICENSE("GPL");
 
-static int xenfs_set_page_dirty(struct page *page)
-{
-	return !TestSetPageDirty(page);
-}
-
-static const struct address_space_operations xenfs_aops = {
-	.set_page_dirty = xenfs_set_page_dirty,
-};
-
-static struct backing_dev_info xenfs_backing_dev_info = {
-	.ra_pages	= 0,	/* No readahead */
-	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
-};
-
 static struct inode *xenfs_make_inode(struct super_block *sb, int mode)
 {
 	struct inode *ret = new_inode(sb);
 
 	if (ret) {
 		ret->i_mode = mode;
-		ret->i_mapping->a_ops = &xenfs_aops;
-		ret->i_mapping->backing_dev_info = &xenfs_backing_dev_info;
 		ret->i_uid = ret->i_gid = 0;
 		ret->i_blocks = 0;
 		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
@@ -121,9 +103,9 @@
 	return rc;
 }
 
-static int xenfs_mount(struct file_system_type *fs_type,
-			int flags, const char *dev_name,
-			void *data)
+static struct dentry *xenfs_mount(struct file_system_type *fs_type,
+				  int flags, const char *dev_name,
+				  void *data)
 {
 	return mount_single(fs_type, flags, data, xenfs_fill_super);
 }
@@ -137,25 +119,11 @@
 
 static int __init xenfs_init(void)
 {
-	int err;
-	if (!xen_domain()) {
-		printk(KERN_INFO "xenfs: not registering filesystem on non-xen platform\n");
-		return 0;
-	}
+	if (xen_domain())
+		return register_filesystem(&xenfs_type);
 
-	err = register_filesystem(&xenfs_type);
-	if (err) {
-		printk(KERN_ERR "xenfs: Unable to register filesystem!\n");
-		goto out;
-	}
-
-	err = bdi_init(&xenfs_backing_dev_info);
-	if (err)
-		unregister_filesystem(&xenfs_type);
-
- out:
-
-	return err;
+	printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n");
+	return 0;
 }
 
 static void __exit xenfs_exit(void)
diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c
index cafc504..e0c8472 100644
--- a/drivers/zorro/proc.c
+++ b/drivers/zorro/proc.c
@@ -13,7 +13,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <asm/amigahw.h>
 #include <asm/setup.h>
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 06e8ff1..4230252 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -11,7 +11,6 @@
 #include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/major.h>
-#include <linux/smp_lock.h>
 #include <linux/device_cgroup.h>
 #include <linux/highmem.h>
 #include <linux/blkdev.h>
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e9c874a..561438b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -204,7 +204,7 @@
 	err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
 				  page->index << PAGE_CACHE_SHIFT, &len,
 				  ci->i_truncate_seq, ci->i_truncate_size,
-				  &page, 1);
+				  &page, 1, 0);
 	if (err == -ENOENT)
 		err = 0;
 	if (err < 0) {
@@ -287,7 +287,7 @@
 	rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
 				 offset, &len,
 				 ci->i_truncate_seq, ci->i_truncate_size,
-				 pages, nr_pages);
+				 pages, nr_pages, 0);
 	if (rc == -ENOENT)
 		rc = 0;
 	if (rc < 0)
@@ -774,7 +774,7 @@
 					    snapc, do_sync,
 					    ci->i_truncate_seq,
 					    ci->i_truncate_size,
-					    &inode->i_mtime, true, 1);
+					    &inode->i_mtime, true, 1, 0);
 				max_pages = req->r_num_pages;
 
 				alloc_page_vec(fsc, req);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 98ab13e..60d27bc 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1430,8 +1430,8 @@
 	    invalidating_gen == ci->i_rdcache_gen) {
 		/* success. */
 		dout("try_nonblocking_invalidate %p success\n", inode);
-		ci->i_rdcache_gen = 0;
-		ci->i_rdcache_revoking = 0;
+		/* save any racing async invalidate some trouble */
+		ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
 		return 0;
 	}
 	dout("try_nonblocking_invalidate %p failed\n", inode);
@@ -2273,8 +2273,7 @@
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int mds = session->s_mds;
-	unsigned seq = le32_to_cpu(grant->seq);
-	unsigned issue_seq = le32_to_cpu(grant->issue_seq);
+	int seq = le32_to_cpu(grant->seq);
 	int newcaps = le32_to_cpu(grant->caps);
 	int issued, implemented, used, wanted, dirty;
 	u64 size = le64_to_cpu(grant->size);
@@ -2286,8 +2285,8 @@
 	int revoked_rdcache = 0;
 	int queue_invalidate = 0;
 
-	dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n",
-	     inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps));
+	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
+	     inode, cap, mds, seq, ceph_cap_string(newcaps));
 	dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
 		inode->i_size);
 
@@ -2383,7 +2382,6 @@
 	}
 
 	cap->seq = seq;
-	cap->issue_seq = issue_seq;
 
 	/* file layout may have changed */
 	ci->i_layout = grant->layout;
@@ -2691,6 +2689,11 @@
 		     NULL /* no caps context */);
 	try_flush_caps(inode, session, NULL);
 	up_read(&mdsc->snap_rwsem);
+
+	/* make sure we re-request max_size, if necessary */
+	spin_lock(&inode->i_lock);
+	ci->i_requested_max_size = 0;
+	spin_unlock(&inode->i_lock);
 }
 
 /*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e0a2dc6..7d447af 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -336,7 +336,10 @@
 		if (req->r_reply_info.dir_end) {
 			kfree(fi->last_name);
 			fi->last_name = NULL;
-			fi->next_offset = 2;
+			if (ceph_frag_is_rightmost(frag))
+				fi->next_offset = 2;
+			else
+				fi->next_offset = 0;
 		} else {
 			rinfo = &req->r_reply_info;
 			err = note_last_dentry(fi,
@@ -355,18 +358,22 @@
 		u64 pos = ceph_make_fpos(frag, off);
 		struct ceph_mds_reply_inode *in =
 			rinfo->dir_in[off - fi->offset].in;
+		struct ceph_vino vino;
+		ino_t ino;
+
 		dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
 		     off, off - fi->offset, rinfo->dir_nr, pos,
 		     rinfo->dir_dname_len[off - fi->offset],
 		     rinfo->dir_dname[off - fi->offset], in);
 		BUG_ON(!in);
 		ftype = le32_to_cpu(in->mode) >> 12;
+		vino.ino = le64_to_cpu(in->ino);
+		vino.snap = le64_to_cpu(in->snapid);
+		ino = ceph_vino_to_ino(vino);
 		if (filldir(dirent,
 			    rinfo->dir_dname[off - fi->offset],
 			    rinfo->dir_dname_len[off - fi->offset],
-			    pos,
-			    le64_to_cpu(in->ino),
-			    ftype) < 0) {
+			    pos, ino, ftype) < 0) {
 			dout("filldir stopping us...\n");
 			return 0;
 		}
@@ -414,6 +421,7 @@
 		fi->last_readdir = NULL;
 	}
 	kfree(fi->last_name);
+	fi->last_name = NULL;
 	fi->next_offset = 2;  /* compensate for . and .. */
 	if (fi->dentry) {
 		dput(fi->dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e77c28c..8d79b89 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -154,11 +154,13 @@
 	}
 
 	/*
-	 * No need to block if we have any caps.  Update wanted set
+	 * No need to block if we have caps on the auth MDS (for
+	 * write) or any MDS (for read).  Update wanted set
 	 * asynchronously.
 	 */
 	spin_lock(&inode->i_lock);
-	if (__ceph_is_any_real_caps(ci)) {
+	if (__ceph_is_any_real_caps(ci) &&
+	    (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
 		int mds_wanted = __ceph_caps_mds_wanted(ci);
 		int issued = __ceph_caps_issued(ci, NULL);
 
@@ -280,11 +282,12 @@
 static int striped_read(struct inode *inode,
 			u64 off, u64 len,
 			struct page **pages, int num_pages,
-			int *checkeof)
+			int *checkeof, bool align_to_pages)
 {
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 pos, this_len;
+	int io_align, page_align;
 	int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
 	int left, pages_left;
 	int read;
@@ -300,14 +303,19 @@
 	page_pos = pages;
 	pages_left = num_pages;
 	read = 0;
+	io_align = off & ~PAGE_MASK;
 
 more:
+	if (align_to_pages)
+		page_align = (pos - io_align) & ~PAGE_MASK;
+	else
+		page_align = pos & ~PAGE_MASK;
 	this_len = left;
 	ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
 				  &ci->i_layout, pos, &this_len,
 				  ci->i_truncate_seq,
 				  ci->i_truncate_size,
-				  page_pos, pages_left);
+				  page_pos, pages_left, page_align);
 	hit_stripe = this_len < left;
 	was_short = ret >= 0 && ret < this_len;
 	if (ret == -ENOENT)
@@ -374,26 +382,25 @@
 	dout("sync_read on file %p %llu~%u %s\n", file, off, len,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
-	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages, off, len);
-
-		/*
-		 * flush any page cache pages in this range.  this
-		 * will make concurrent normal and O_DIRECT io slow,
-		 * but it will at least behave sensibly when they are
-		 * in sequence.
-		 */
-	} else {
+	if (file->f_flags & O_DIRECT)
+		pages = ceph_get_direct_page_vector(data, num_pages);
+	else
 		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
-	}
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
+	/*
+	 * flush any page cache pages in this range.  this
+	 * will make concurrent normal and sync io slow,
+	 * but it will at least behave sensibly when they are
+	 * in sequence.
+	 */
 	ret = filemap_write_and_wait(inode->i_mapping);
 	if (ret < 0)
 		goto done;
 
-	ret = striped_read(inode, off, len, pages, num_pages, checkeof);
+	ret = striped_read(inode, off, len, pages, num_pages, checkeof,
+			   file->f_flags & O_DIRECT);
 
 	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
 		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -448,6 +455,7 @@
 	int flags;
 	int do_sync = 0;
 	int check_caps = 0;
+	int page_align, io_align;
 	int ret;
 	struct timespec mtime = CURRENT_TIME;
 
@@ -462,6 +470,8 @@
 	else
 		pos = *offset;
 
+	io_align = pos & ~PAGE_MASK;
+
 	ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
 	if (ret < 0)
 		return ret;
@@ -486,20 +496,26 @@
 	 */
 more:
 	len = left;
+	if (file->f_flags & O_DIRECT)
+		/* write from beginning of first page, regardless of
+		   io alignment */
+		page_align = (pos - io_align) & ~PAGE_MASK;
+	else
+		page_align = pos & ~PAGE_MASK;
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 				    ceph_vino(inode), pos, &len,
 				    CEPH_OSD_OP_WRITE, flags,
 				    ci->i_snap_realm->cached_context,
 				    do_sync,
 				    ci->i_truncate_seq, ci->i_truncate_size,
-				    &mtime, false, 2);
+				    &mtime, false, 2, page_align);
 	if (!req)
 		return -ENOMEM;
 
 	num_pages = calc_pages_for(pos, len);
 
 	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
+		pages = ceph_get_direct_page_vector(data, num_pages);
 		if (IS_ERR(pages)) {
 			ret = PTR_ERR(pages);
 			goto out;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 1d6a45b..bf12865 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2,7 +2,6 @@
 
 #include <linux/module.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
@@ -471,7 +470,9 @@
 
 	if (issued & (CEPH_CAP_FILE_EXCL|
 		      CEPH_CAP_FILE_WR|
-		      CEPH_CAP_FILE_BUFFER)) {
+		      CEPH_CAP_FILE_BUFFER|
+		      CEPH_CAP_AUTH_EXCL|
+		      CEPH_CAP_XATTR_EXCL)) {
 		if (timespec_compare(ctime, &inode->i_ctime) > 0) {
 			dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
 			     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
@@ -511,7 +512,7 @@
 			warn = 1;
 		}
 	} else {
-		/* we have no write caps; whatever the MDS says is true */
+		/* we have no write|excl caps; whatever the MDS says is true */
 		if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
 			inode->i_ctime = *ctime;
 			inode->i_mtime = *mtime;
@@ -567,12 +568,17 @@
 
 	/*
 	 * provided version will be odd if inode value is projected,
-	 * even if stable.  skip the update if we have a newer info
-	 * (e.g., due to inode info racing form multiple MDSs), or if
-	 * we are getting projected (unstable) inode info.
+	 * even if stable.  skip the update if we have newer stable
+	 * info (ours>=theirs, e.g. due to racing mds replies), unless
+	 * we are getting projected (unstable) info (in which case the
+	 * version is odd, and we want ours>theirs).
+	 *   us   them
+	 *   2    2     skip
+	 *   3    2     skip
+	 *   3    3     update
 	 */
 	if (le64_to_cpu(info->version) > 0 &&
-	    (ci->i_version & ~1) > le64_to_cpu(info->version))
+	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
 		goto no_change;
 
 	issued = __ceph_caps_issued(ci, &implemented);
@@ -606,7 +612,14 @@
 			    le32_to_cpu(info->time_warp_seq),
 			    &ctime, &mtime, &atime);
 
-	ci->i_max_size = le64_to_cpu(info->max_size);
+	/* only update max_size on auth cap */
+	if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+	    ci->i_max_size != le64_to_cpu(info->max_size)) {
+		dout("max_size %lld -> %llu\n", ci->i_max_size,
+		     le64_to_cpu(info->max_size));
+		ci->i_max_size = le64_to_cpu(info->max_size);
+	}
+
 	ci->i_layout = info->layout;
 	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
@@ -1055,7 +1068,8 @@
 		ininfo = rinfo->targeti.in;
 		vino.ino = le64_to_cpu(ininfo->ino);
 		vino.snap = le64_to_cpu(ininfo->snapid);
-		if (!dn->d_inode) {
+		in = dn->d_inode;
+		if (!in) {
 			in = ceph_get_inode(sb, vino);
 			if (IS_ERR(in)) {
 				pr_err("fill_trace bad get_inode "
@@ -1386,11 +1400,8 @@
 	spin_lock(&inode->i_lock);
 	dout("invalidate_pages %p gen %d revoking %d\n", inode,
 	     ci->i_rdcache_gen, ci->i_rdcache_revoking);
-	if (ci->i_rdcache_gen == 0 ||
-	    ci->i_rdcache_revoking != ci->i_rdcache_gen) {
-		BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen);
+	if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
 		/* nevermind! */
-		ci->i_rdcache_revoking = 0;
 		spin_unlock(&inode->i_lock);
 		goto out;
 	}
@@ -1400,15 +1411,16 @@
 	ceph_invalidate_nondirty_pages(inode->i_mapping);
 
 	spin_lock(&inode->i_lock);
-	if (orig_gen == ci->i_rdcache_gen) {
+	if (orig_gen == ci->i_rdcache_gen &&
+	    orig_gen == ci->i_rdcache_revoking) {
 		dout("invalidate_pages %p gen %d successful\n", inode,
 		     ci->i_rdcache_gen);
-		ci->i_rdcache_gen = 0;
-		ci->i_rdcache_revoking = 0;
+		ci->i_rdcache_revoking--;
 		check = 1;
 	} else {
-		dout("invalidate_pages %p gen %d raced, gen now %d\n",
-		     inode, orig_gen, ci->i_rdcache_gen);
+		dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
+		     inode, orig_gen, ci->i_rdcache_gen,
+		     ci->i_rdcache_revoking);
 	}
 	spin_unlock(&inode->i_lock);
 
@@ -1739,7 +1751,7 @@
 		return 0;
 	}
 
-	dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask));
+	dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
 	if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
 		return 0;
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3142b15..098b185 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -6,7 +6,6 @@
 #include <linux/sched.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 
 #include "super.h"
 #include "mds_client.h"
@@ -529,6 +528,9 @@
 	ceph_mdsc_get_request(req);
 	__insert_request(mdsc, req);
 
+	req->r_uid = current_fsuid();
+	req->r_gid = current_fsgid();
+
 	if (dir) {
 		struct ceph_inode_info *ci = ceph_inode(dir);
 
@@ -1588,8 +1590,8 @@
 
 	head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
 	head->op = cpu_to_le32(req->r_op);
-	head->caller_uid = cpu_to_le32(current_fsuid());
-	head->caller_gid = cpu_to_le32(current_fsgid());
+	head->caller_uid = cpu_to_le32(req->r_uid);
+	head->caller_gid = cpu_to_le32(req->r_gid);
 	head->args = req->r_args;
 
 	ceph_encode_filepath(&p, end, ino1, path1);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d66d63c..9341fd4 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -170,6 +170,8 @@
 
 	union ceph_mds_request_args r_args;
 	int r_fmode;        /* file mode, if expecting cap */
+	uid_t r_uid;
+	gid_t r_gid;
 
 	/* for choosing which mds to send this request to */
 	int r_direct_mode;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1886294..7f01728 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,9 +293,7 @@
 	int i_rd_ref, i_rdcache_ref, i_wr_ref;
 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
 	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
-	u32 i_rdcache_gen;      /* we increment this each time we get
-				   FILE_CACHE.  If it's non-zero, we
-				   _may_ have cached pages. */
+	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
 
 	struct list_head i_unsafe_writes; /* uncommitted sync writes */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 410ed18..a60579b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -19,7 +19,6 @@
 #include <linux/compiler.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/ioctl.h>
 #include <linux/if.h>
 #include <linux/if_bridge.h>
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 2537323..2720178 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -28,7 +28,6 @@
 #include <linux/key.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 #include <linux/file.h>
 #include <linux/crypto.h>
 #include "ecryptfs_kernel.h"
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2fedaf8..acf8695 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/parser.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
 #include <linux/vfs.h>
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bf5ae88..eb3bc2f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -331,6 +331,30 @@
 		return err;
 	}
 
+	case FITRIM:
+	{
+		struct super_block *sb = inode->i_sb;
+		struct fstrim_range range;
+		int ret = 0;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&range, (struct fstrim_range *)arg,
+		    sizeof(range)))
+			return -EFAULT;
+
+		ret = ext4_trim_fs(sb, &range);
+		if (ret < 0)
+			return ret;
+
+		if (copy_to_user((struct fstrim_range *)arg, &range,
+		    sizeof(range)))
+			return -EFAULT;
+
+		return 0;
+	}
+
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7f5451c..beacce1 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -237,8 +237,6 @@
 			} while (bh != head);
 		}
 
-		put_io_page(io_end->pages[i]);
-
 		/*
 		 * If this is a partial write which happened to make
 		 * all buffers uptodate then we can optimize away a
@@ -248,6 +246,8 @@
 		 */
 		if (!partial_write)
 			SetPageUptodate(page);
+
+		put_io_page(io_end->pages[i]);
 	}
 	io_end->num_io_pages = 0;
 	inode = io_end->inode;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 61182fe..e32195d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1197,7 +1197,6 @@
 	.quota_write	= ext4_quota_write,
 #endif
 	.bdev_try_to_free_page = bdev_try_to_free_page,
-	.trim_fs	= ext4_trim_fs
 };
 
 static const struct super_operations ext4_nojournal_sops = {
@@ -2799,9 +2798,6 @@
 	struct ext4_li_request *elr;
 
 	mutex_lock(&ext4_li_info->li_list_mtx);
-	if (list_empty(&ext4_li_info->li_request_list))
-		return;
-
 	list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
 		elr = list_entry(pos, struct ext4_li_request,
 				 lr_request);
@@ -3268,13 +3264,14 @@
 	 * Test whether we have more sectors than will fit in sector_t,
 	 * and whether the max offset is addressable by the page cache.
 	 */
-	ret = generic_check_addressable(sb->s_blocksize_bits,
+	err = generic_check_addressable(sb->s_blocksize_bits,
 					ext4_blocks_count(es));
-	if (ret) {
+	if (err) {
 		ext4_msg(sb, KERN_ERR, "filesystem"
 			 " too large to mount safely on this system");
 		if (sizeof(sector_t) < 8)
 			ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
+		ret = err;
 		goto failed_mount;
 	}
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c822458..9242d29 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -134,6 +134,7 @@
 void fuse_finish_open(struct inode *inode, struct file *file)
 {
 	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = get_fuse_conn(inode);
 
 	if (ff->open_flags & FOPEN_DIRECT_IO)
 		file->f_op = &fuse_direct_io_file_operations;
@@ -141,6 +142,15 @@
 		invalidate_inode_pages2(inode->i_mapping);
 	if (ff->open_flags & FOPEN_NONSEEKABLE)
 		nonseekable_open(inode, file);
+	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
+		struct fuse_inode *fi = get_fuse_inode(inode);
+
+		spin_lock(&fc->lock);
+		fi->attr_version = ++fc->attr_version;
+		i_size_write(inode, 0);
+		spin_unlock(&fc->lock);
+		fuse_invalidate_attr(inode);
+	}
 }
 
 int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 06d5827..5ab3839 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -138,10 +138,8 @@
 				      struct gfs2_inum_host *inum)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_holder i_gh;
 	struct inode *inode;
 	struct dentry *dentry;
-	int error;
 
 	inode = gfs2_ilookup(sb, inum->no_addr);
 	if (inode) {
@@ -152,52 +150,16 @@
 		goto out_inode;
 	}
 
-	error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
-				  LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-	if (error)
-		return ERR_PTR(error);
-
-	error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE);
-	if (error)
-		goto fail;
-
-	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0);
-	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
-		goto fail;
-	}
-
-	error = gfs2_inode_refresh(GFS2_I(inode));
-	if (error) {
-		iput(inode);
-		goto fail;
-	}
-
-	/* Pick up the works we bypass in gfs2_inode_lookup */
-	if (inode->i_state & I_NEW) 
-		gfs2_set_iop(inode);
-
-	if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
-		iput(inode);
-		goto fail;
-	}
-
-	error = -EIO;
-	if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) {
-		iput(inode);
-		goto fail;
-	}
-
-	gfs2_glock_dq_uninit(&i_gh);
+	inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
+				    GFS2_BLKST_DINODE);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
 
 out_inode:
 	dentry = d_obtain_alias(inode);
 	if (!IS_ERR(dentry))
 		dentry->d_op = &gfs2_dops;
 	return dentry;
-fail:
-	gfs2_glock_dq_uninit(&i_gh);
-	return ERR_PTR(error);
 }
 
 static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8777885..f92c177 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -686,21 +686,20 @@
 {
 	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-	struct gfs2_inode *ip = NULL;
+	struct gfs2_inode *ip;
 	struct inode *inode;
-	u64 no_addr = 0;
+	u64 no_addr = gl->gl_name.ln_number;
 
-	spin_lock(&gl->gl_spin);
-	ip = (struct gfs2_inode *)gl->gl_object;
+	ip = gl->gl_object;
+	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
+
 	if (ip)
-		no_addr = ip->i_no_addr;
-	spin_unlock(&gl->gl_spin);
-	if (ip) {
 		inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
-		if (inode) {
-			d_prune_aliases(inode);
-			iput(inode);
-		}
+	else
+		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
+	if (inode && !IS_ERR(inode)) {
+		d_prune_aliases(inode);
+		iput(inode);
 	}
 	gfs2_glock_put(gl);
 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 06370f8..e1213f7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -73,49 +73,6 @@
 	return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
 }
 
-struct gfs2_skip_data {
-	u64	no_addr;
-	int	skipped;
-};
-
-static int iget_skip_test(struct inode *inode, void *opaque)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_skip_data *data = opaque;
-
-	if (ip->i_no_addr == data->no_addr) {
-		if (inode->i_state & (I_FREEING|I_WILL_FREE)){
-			data->skipped = 1;
-			return 0;
-		}
-		return 1;
-	}
-	return 0;
-}
-
-static int iget_skip_set(struct inode *inode, void *opaque)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_skip_data *data = opaque;
-
-	if (data->skipped)
-		return 1;
-	inode->i_ino = (unsigned long)(data->no_addr);
-	ip->i_no_addr = data->no_addr;
-	return 0;
-}
-
-static struct inode *gfs2_iget_skip(struct super_block *sb,
-				    u64 no_addr)
-{
-	struct gfs2_skip_data data;
-	unsigned long hash = (unsigned long)no_addr;
-
-	data.no_addr = no_addr;
-	data.skipped = 0;
-	return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
-}
-
 /**
  * GFS2 lookup code fills in vfs inode contents based on info obtained
  * from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -243,93 +200,54 @@
 	return ERR_PTR(error);
 }
 
-/**
- * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation
- *                               and try to reclaim it by doing iput.
- *
- * This function assumes no rgrp locks are currently held.
- *
- * @sb: The super block
- * no_addr: The inode number
- *
- */
-
-void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
+struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
+				  u64 *no_formal_ino, unsigned int blktype)
 {
-	struct gfs2_sbd *sdp;
-	struct gfs2_inode *ip;
-	struct gfs2_glock *io_gl = NULL;
-	int error;
-	struct gfs2_holder gh;
+	struct super_block *sb = sdp->sd_vfs;
+	struct gfs2_holder i_gh;
 	struct inode *inode;
+	int error;
 
-	inode = gfs2_iget_skip(sb, no_addr);
+	error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
+				  LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+	if (error)
+		return ERR_PTR(error);
 
-	if (!inode)
-		return;
+	error = gfs2_check_blk_type(sdp, no_addr, blktype);
+	if (error)
+		goto fail;
 
-	/* If it's not a new inode, someone's using it, so leave it alone. */
-	if (!(inode->i_state & I_NEW)) {
-		iput(inode);
-		return;
+	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
+	if (IS_ERR(inode))
+		goto fail;
+
+	error = gfs2_inode_refresh(GFS2_I(inode));
+	if (error)
+		goto fail_iput;
+
+	/* Pick up the works we bypass in gfs2_inode_lookup */
+	if (inode->i_state & I_NEW) 
+		gfs2_set_iop(inode);
+
+	/* Two extra checks for NFS only */
+	if (no_formal_ino) {
+		error = -ESTALE;
+		if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino)
+			goto fail_iput;
+
+		error = -EIO;
+		if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
+			goto fail_iput;
+
+		error = 0;
 	}
 
-	ip = GFS2_I(inode);
-	sdp = GFS2_SB(inode);
-	ip->i_no_formal_ino = -1;
-
-	error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
-	if (unlikely(error))
-		goto fail;
-	ip->i_gl->gl_object = ip;
-
-	error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
-	if (unlikely(error))
-		goto fail_put;
-
-	set_bit(GIF_INVALID, &ip->i_flags);
-	error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT,
-				   &ip->i_iopen_gh);
-	if (unlikely(error))
-		goto fail_iopen;
-
-	ip->i_iopen_gh.gh_gl->gl_object = ip;
-	gfs2_glock_put(io_gl);
-	io_gl = NULL;
-
-	inode->i_mode = DT2IF(DT_UNKNOWN);
-
-	/*
-	 * We must read the inode in order to work out its type in
-	 * this case. Note that this doesn't happen often as we normally
-	 * know the type beforehand. This code path only occurs during
-	 * unlinked inode recovery (where it is safe to do this glock,
-	 * which is not true in the general case).
-	 */
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY,
-				   &gh);
-	if (unlikely(error))
-		goto fail_glock;
-
-	/* Inode is now uptodate */
-	gfs2_glock_dq_uninit(&gh);
-	gfs2_set_iop(inode);
-
-	/* The iput will cause it to be deleted. */
-	iput(inode);
-	return;
-
-fail_glock:
-	gfs2_glock_dq(&ip->i_iopen_gh);
-fail_iopen:
-	if (io_gl)
-		gfs2_glock_put(io_gl);
-fail_put:
-	ip->i_gl->gl_object = NULL;
-	gfs2_glock_put(ip->i_gl);
 fail:
-	iget_failed(inode);
-	return;
+	gfs2_glock_dq_uninit(&i_gh);
+	return error ? ERR_PTR(error) : inode;
+fail_iput:
+	iput(inode);
+	goto fail;
 }
 
 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6720d7d..d8499fa 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -99,7 +99,9 @@
 extern void gfs2_set_iop(struct inode *inode);
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
 				       u64 no_addr, u64 no_formal_ino);
-extern void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr);
+extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
+					 u64 *no_formal_ino,
+					 unsigned int blktype);
 extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 
 extern int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bef3ab6..33c8407 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -963,17 +963,18 @@
  *          The inode, if one has been found, in inode.
  */
 
-static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
-			   u64 skip)
+static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
 {
 	u32 goal = 0, block;
 	u64 no_addr;
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	unsigned int n;
+	struct gfs2_glock *gl;
+	struct gfs2_inode *ip;
+	int error;
+	int found = 0;
 
-	for(;;) {
-		if (goal >= rgd->rd_data)
-			break;
+	while (goal < rgd->rd_data) {
 		down_write(&sdp->sd_log_flush_lock);
 		n = 1;
 		block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
@@ -990,11 +991,32 @@
 		if (no_addr == skip)
 			continue;
 		*last_unlinked = no_addr;
-		return no_addr;
+
+		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
+		if (error)
+			continue;
+
+		/* If the inode is already in cache, we can ignore it here
+		 * because the existing inode disposal code will deal with
+		 * it when all refs have gone away. Accessing gl_object like
+		 * this is not safe in general. Here it is ok because we do
+		 * not dereference the pointer, and we only need an approx
+		 * answer to whether it is NULL or not.
+		 */
+		ip = gl->gl_object;
+
+		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+			gfs2_glock_put(gl);
+		else
+			found++;
+
+		/* Limit reclaim to sensible number of tasks */
+		if (found > 2*NR_CPUS)
+			return;
 	}
 
 	rgd->rd_flags &= ~GFS2_RDF_CHECK;
-	return 0;
+	return;
 }
 
 /**
@@ -1075,11 +1097,9 @@
  * Try to acquire rgrp in way which avoids contending with others.
  *
  * Returns: errno
- *          unlinked: the block address of an unlinked block to be reclaimed
  */
 
-static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
-			  u64 *last_unlinked)
+static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd, *begin = NULL;
@@ -1089,7 +1109,6 @@
 	int loops = 0;
 	int error, rg_locked;
 
-	*unlinked = 0;
 	rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
 
 	while (rgd) {
@@ -1106,17 +1125,10 @@
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
-			/* If the rg came in already locked, there's no
-			   way we can recover from a failed try_rgrp_unlink
-			   because that would require an iput which can only
-			   happen after the rgrp is unlocked. */
-			if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
-				*unlinked = try_rgrp_unlink(rgd, last_unlinked,
-							   ip->i_no_addr);
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&al->al_rgd_gh);
-			if (*unlinked)
-				return -EAGAIN;
 			/* fall through */
 		case GLR_TRYFAILED:
 			rgd = recent_rgrp_next(rgd);
@@ -1145,13 +1157,10 @@
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
-			if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
-				*unlinked = try_rgrp_unlink(rgd, last_unlinked,
-							    ip->i_no_addr);
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&al->al_rgd_gh);
-			if (*unlinked)
-				return -EAGAIN;
 			break;
 
 		case GLR_TRYFAILED:
@@ -1204,12 +1213,12 @@
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_alloc *al = ip->i_alloc;
 	int error = 0;
-	u64 last_unlinked = NO_BLOCK, unlinked;
+	u64 last_unlinked = NO_BLOCK;
+	int tries = 0;
 
 	if (gfs2_assert_warn(sdp, al->al_requested))
 		return -EINVAL;
 
-try_again:
 	if (hold_rindex) {
 		/* We need to hold the rindex unless the inode we're using is
 		   the rindex itself, in which case it's already held. */
@@ -1218,31 +1227,23 @@
 		else if (!sdp->sd_rgrps) /* We may not have the rindex read
 					    in, so: */
 			error = gfs2_ri_update_special(ip);
+		if (error)
+			return error;
 	}
 
-	if (error)
-		return error;
+	do {
+		error = get_local_rgrp(ip, &last_unlinked);
+		/* If there is no space, flushing the log may release some */
+		if (error)
+			gfs2_log_flush(sdp, NULL);
+	} while (error && tries++ < 3);
 
-	/* Find an rgrp suitable for allocation.  If it encounters any unlinked
-	   dinodes along the way, error will equal -EAGAIN and unlinked will
-	   contains it block address. We then need to look up that inode and
-	   try to free it, and try the allocation again. */
-	error = get_local_rgrp(ip, &unlinked, &last_unlinked);
 	if (error) {
 		if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
 			gfs2_glock_dq_uninit(&al->al_ri_gh);
-		if (error != -EAGAIN)
-			return error;
-
-		gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked);
-		/* regardless of whether or not gfs2_process_unlinked_inode
-		   was successful, we don't want to repeat it again. */
-		last_unlinked = unlinked;
-		gfs2_log_flush(sdp, NULL);
-		error = 0;
-
-		goto try_again;
+		return error;
 	}
+
 	/* no error, so we have the rgrp set in the inode's allocation. */
 	al->al_file = file;
 	al->al_line = line;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index e92fdbb..d6cc164 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -6,7 +6,6 @@
 
 #include <linux/syscalls.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/capability.h>
 #include <linux/file.h>
 #include <linux/fs.h>
@@ -530,41 +529,6 @@
 	return thaw_super(sb);
 }
 
-static int ioctl_fstrim(struct file *filp, void __user *argp)
-{
-	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
-	struct fstrim_range range;
-	int ret = 0;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	/* If filesystem doesn't support trim feature, return. */
-	if (sb->s_op->trim_fs == NULL)
-		return -EOPNOTSUPP;
-
-	/* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
-	if (sb->s_bdev == NULL)
-		return -EINVAL;
-
-	if (argp == NULL) {
-		range.start = 0;
-		range.len = ULLONG_MAX;
-		range.minlen = 0;
-	} else if (copy_from_user(&range, argp, sizeof(range)))
-		return -EFAULT;
-
-	ret = sb->s_op->trim_fs(sb, &range);
-	if (ret < 0)
-		return ret;
-
-	if ((argp != NULL) &&
-	    (copy_to_user(argp, &range, sizeof(range))))
-		return -EFAULT;
-
-	return 0;
-}
-
 /*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
@@ -615,10 +579,6 @@
 		error = ioctl_fsthaw(filp);
 		break;
 
-	case FITRIM:
-		error = ioctl_fstrim(filp, argp);
-		break;
-
 	case FS_IOC_FIEMAP:
 		return ioctl_fiemap(filp, arg);
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c590d15..f837ba9 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -899,6 +899,14 @@
 
 	/* journal descriptor can store up to n blocks -bzzz */
 	journal->j_blocksize = blocksize;
+	journal->j_dev = bdev;
+	journal->j_fs_dev = fs_dev;
+	journal->j_blk_offset = start;
+	journal->j_maxlen = len;
+	bdevname(journal->j_dev, journal->j_devname);
+	p = journal->j_devname;
+	while ((p = strchr(p, '/')))
+		*p = '!';
 	jbd2_stats_proc_init(journal);
 	n = journal->j_blocksize / sizeof(journal_block_tag_t);
 	journal->j_wbufsize = n;
@@ -908,14 +916,6 @@
 			__func__);
 		goto out_err;
 	}
-	journal->j_dev = bdev;
-	journal->j_fs_dev = fs_dev;
-	journal->j_blk_offset = start;
-	journal->j_maxlen = len;
-	bdevname(journal->j_dev, journal->j_devname);
-	p = journal->j_devname;
-	while ((p = strchr(p, '/')))
-		*p = '!';
 
 	bh = __getblk(journal->j_dev, start, journal->j_blocksize);
 	if (!bh) {
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index d5bb868..25509eb 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -14,7 +14,6 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
-#include <linux/smp_lock.h>
 #include <linux/kthread.h>
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 47ea1e1..332c54c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -7,7 +7,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/errno.h>
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 25e21e4..ed0c59f 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -124,7 +124,7 @@
 			continue;
 		if (host->h_server != ni->server)
 			continue;
-		if (ni->server &&
+		if (ni->server && ni->src_len != 0 &&
 		    !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
 			continue;
 
@@ -167,6 +167,7 @@
 	host->h_addrlen = ni->salen;
 	rpc_set_port(nlm_addr(host), 0);
 	memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
+	host->h_srcaddrlen = ni->src_len;
 	host->h_version    = ni->version;
 	host->h_proto      = ni->protocol;
 	host->h_rpcclnt    = NULL;
@@ -238,9 +239,6 @@
 				     const char *hostname,
 				     int noresvport)
 {
-	const struct sockaddr source = {
-		.sa_family	= AF_UNSPEC,
-	};
 	struct nlm_lookup_host_info ni = {
 		.server		= 0,
 		.sap		= sap,
@@ -249,8 +247,6 @@
 		.version	= version,
 		.hostname	= hostname,
 		.hostname_len	= strlen(hostname),
-		.src_sap	= &source,
-		.src_len	= sizeof(source),
 		.noresvport	= noresvport,
 	};
 
@@ -357,7 +353,6 @@
 			.protocol	= host->h_proto,
 			.address	= nlm_addr(host),
 			.addrsize	= host->h_addrlen,
-			.saddress	= nlm_srcaddr(host),
 			.timeout	= &timeparms,
 			.servername	= host->h_name,
 			.program	= &nlm_program,
@@ -376,6 +371,8 @@
 			args.flags |= RPC_CLNT_CREATE_HARDRTRY;
 		if (host->h_noresvport)
 			args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+		if (host->h_srcaddrlen)
+			args.saddress = nlm_srcaddr(host);
 
 		clnt = rpc_create(&args);
 		if (!IS_ERR(clnt))
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index a336e83..38d2611 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -9,7 +9,6 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/smp_lock.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c462d34..ef5659b 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -25,7 +25,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/nlm.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index c3069f3..0caea53 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -9,7 +9,6 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/smp_lock.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
 
diff --git a/fs/locks.c b/fs/locks.c
index 0e62dd3..8729347 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -122,7 +122,6 @@
 #include <linux/module.h>
 #include <linux/security.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/time.h>
 #include <linux/rcupdate.h>
diff --git a/fs/namespace.c b/fs/namespace.c
index 8a415c9..3dbfc07 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -13,7 +13,6 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/acct.h>
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index aac8832e..f22b12e7 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -19,7 +19,6 @@
 #include <linux/mm.h>
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
-#include <linux/smp_lock.h>
 
 #include <linux/ncp_fs.h>
 
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 6c754f7..cb50aaf 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -17,7 +17,6 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 
 #include <linux/ncp_fs.h>
 #include "ncplib_kernel.h"
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index d290545..8fb93b6 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -26,7 +26,6 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <linux/vfs.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index c2a1f9a..d40a547 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -17,7 +17,6 @@
 #include <linux/mount.h>
 #include <linux/slab.h>
 #include <linux/highuid.h>
-#include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
 
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index aeec017..93a8b3b 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -9,7 +9,6 @@
 #include <linux/completion.h>
 #include <linux/ip.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 232a7ee..1fd62fc 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -11,7 +11,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 
 #include <linux/nfs4.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 07ac384..662df2a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
 #include <linux/mount.h>
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
+#include <linux/kmemleak.h>
 
 #include "delegation.h"
 #include "iostat.h"
@@ -194,9 +195,13 @@
 static
 struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
 {
+	void *ptr;
 	if (page == NULL)
 		return ERR_PTR(-EIO);
-	return (struct nfs_cache_array *)kmap(page);
+	ptr = kmap(page);
+	if (ptr == NULL)
+		return ERR_PTR(-ENOMEM);
+	return ptr;
 }
 
 static
@@ -213,6 +218,9 @@
 {
 	struct nfs_cache_array *array = nfs_readdir_get_array(page);
 	int i;
+
+	if (IS_ERR(array))
+		return PTR_ERR(array);
 	for (i = 0; i < array->size; i++)
 		kfree(array->array[i].string.name);
 	nfs_readdir_release_array(page);
@@ -231,6 +239,11 @@
 	string->name = kmemdup(name, len, GFP_KERNEL);
 	if (string->name == NULL)
 		return -ENOMEM;
+	/*
+	 * Avoid a kmemleak false positive. The pointer to the name is stored
+	 * in a page cache page which kmemleak does not scan.
+	 */
+	kmemleak_not_leak(string->name);
 	string->hash = full_name_hash(name, len);
 	return 0;
 }
@@ -244,7 +257,7 @@
 
 	if (IS_ERR(array))
 		return PTR_ERR(array);
-	ret = -EIO;
+	ret = -ENOSPC;
 	if (array->size >= MAX_READDIR_ARRAY)
 		goto out;
 
@@ -255,9 +268,9 @@
 	if (ret)
 		goto out;
 	array->last_cookie = entry->cookie;
+	array->size++;
 	if (entry->eof == 1)
 		array->eof_index = array->size;
-	array->size++;
 out:
 	nfs_readdir_release_array(page);
 	return ret;
@@ -272,7 +285,7 @@
 	if (diff < 0)
 		goto out_eof;
 	if (diff >= array->size) {
-		if (array->eof_index > 0)
+		if (array->eof_index >= 0)
 			goto out_eof;
 		desc->current_index += array->size;
 		return -EAGAIN;
@@ -281,8 +294,6 @@
 	index = (unsigned int)diff;
 	*desc->dir_cookie = array->array[index].cookie;
 	desc->cache_entry_index = index;
-	if (index == array->eof_index)
-		desc->eof = 1;
 	return 0;
 out_eof:
 	desc->eof = 1;
@@ -296,17 +307,17 @@
 	int status = -EAGAIN;
 
 	for (i = 0; i < array->size; i++) {
-		if (i == array->eof_index) {
-			desc->eof = 1;
-			status = -EBADCOOKIE;
-		}
 		if (array->array[i].cookie == *desc->dir_cookie) {
 			desc->cache_entry_index = i;
 			status = 0;
-			break;
+			goto out;
 		}
 	}
-
+	if (i == array->eof_index) {
+		desc->eof = 1;
+		status = -EBADCOOKIE;
+	}
+out:
 	return status;
 }
 
@@ -449,7 +460,7 @@
 
 /* Perform conversion from xdr to cache array */
 static
-void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
+int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
 				void *xdr_page, struct page *page, unsigned int buflen)
 {
 	struct xdr_stream stream;
@@ -471,21 +482,29 @@
 
 	do {
 		status = xdr_decode(desc, entry, &stream);
-		if (status != 0)
+		if (status != 0) {
+			if (status == -EAGAIN)
+				status = 0;
 			break;
+		}
 
-		if (nfs_readdir_add_to_array(entry, page) == -1)
-			break;
 		if (desc->plus == 1)
 			nfs_prime_dcache(desc->file->f_path.dentry, entry);
+
+		status = nfs_readdir_add_to_array(entry, page);
+		if (status != 0)
+			break;
 	} while (!entry->eof);
 
 	if (status == -EBADCOOKIE && entry->eof) {
 		array = nfs_readdir_get_array(page);
-		array->eof_index = array->size - 1;
-		status = 0;
-		nfs_readdir_release_array(page);
+		if (!IS_ERR(array)) {
+			array->eof_index = array->size;
+			status = 0;
+			nfs_readdir_release_array(page);
+		}
 	}
+	return status;
 }
 
 static
@@ -537,7 +556,7 @@
 	struct nfs_entry entry;
 	struct file	*file = desc->file;
 	struct nfs_cache_array *array;
-	int status = 0;
+	int status = -ENOMEM;
 	unsigned int array_size = ARRAY_SIZE(pages);
 
 	entry.prev_cookie = 0;
@@ -549,6 +568,10 @@
 		goto out;
 
 	array = nfs_readdir_get_array(page);
+	if (IS_ERR(array)) {
+		status = PTR_ERR(array);
+		goto out;
+	}
 	memset(array, 0, sizeof(struct nfs_cache_array));
 	array->eof_index = -1;
 
@@ -556,12 +579,19 @@
 	if (!pages_ptr)
 		goto out_release_array;
 	do {
+		unsigned int pglen;
 		status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
 
 		if (status < 0)
 			break;
-		nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
-	} while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
+		pglen = status;
+		status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
+		if (status < 0) {
+			if (status == -ENOSPC)
+				status = 0;
+			break;
+		}
+	} while (array->eof_index < 0);
 
 	nfs_readdir_free_large_page(pages_ptr, pages, array_size);
 out_release_array:
@@ -582,8 +612,10 @@
 int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 {
 	struct inode	*inode = desc->file->f_path.dentry->d_inode;
+	int ret;
 
-	if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
+	ret = nfs_readdir_xdr_to_array(desc, page, inode);
+	if (ret < 0)
 		goto error;
 	SetPageUptodate(page);
 
@@ -595,7 +627,7 @@
 	return 0;
  error:
 	unlock_page(page);
-	return -EIO;
+	return ret;
 }
 
 static
@@ -608,12 +640,8 @@
 static
 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 {
-	struct page *page;
-	page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
+	return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
 			desc->page_index, (filler_t *)nfs_readdir_filler, desc);
-	if (IS_ERR(page))
-		desc->eof = 1;
-	return page;
 }
 
 /*
@@ -639,8 +667,10 @@
 static inline
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 {
-	int res = -EAGAIN;
+	int res;
 
+	if (desc->page_index == 0)
+		desc->current_index = 0;
 	while (1) {
 		res = find_cache_page(desc);
 		if (res != -EAGAIN)
@@ -670,6 +700,8 @@
 	struct dentry *dentry = NULL;
 
 	array = nfs_readdir_get_array(desc->page);
+	if (IS_ERR(array))
+		return PTR_ERR(array);
 
 	for (i = desc->cache_entry_index; i < array->size; i++) {
 		d_type = DT_UNKNOWN;
@@ -685,11 +717,9 @@
 			*desc->dir_cookie = array->array[i+1].cookie;
 		else
 			*desc->dir_cookie = array->last_cookie;
-		if (i == array->eof_index) {
-			desc->eof = 1;
-			break;
-		}
 	}
+	if (i == array->eof_index)
+		desc->eof = 1;
 
 	nfs_readdir_release_array(desc->page);
 	cache_page_release(desc);
@@ -1345,12 +1375,12 @@
 				res = NULL;
 				goto out;
 			/* This turned out not to be a regular file */
-			case -EISDIR:
 			case -ENOTDIR:
 				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
+			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
 				res = ERR_CAST(inode);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index e6bf457..2563f76 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -423,7 +423,7 @@
 	struct page **page;
 	size_t hdrlen;
 	unsigned int pglen, recvd;
-	int status, nr = 0;
+	int status;
 
 	if ((status = ntohl(*p++)))
 		return nfs_stat_to_errno(status);
@@ -443,7 +443,7 @@
 	if (pglen > recvd)
 		pglen = recvd;
 	page = rcvbuf->pages;
-	return nr;
+	return pglen;
 }
 
 static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index d9a5e83..748dc91 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -555,7 +555,7 @@
 	struct page **page;
 	size_t hdrlen;
 	u32 recvd, pglen;
-	int status, nr = 0;
+	int status;
 
 	status = ntohl(*p++);
 	/* Decode post_op_attrs */
@@ -586,7 +586,7 @@
 		pglen = recvd;
 	page = rcvbuf->pages;
 
-	return nr;
+	return pglen;
 }
 
 __be32 *
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0f24cdf..6a653ff 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2852,8 +2852,10 @@
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	res.pgbase = args.pgbase;
 	status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0);
-	if (status == 0)
+	if (status >= 0) {
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+		status += args.pgbase;
+	}
 
 	nfs_invalidate_atime(dir);
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f313c4c..b7a204f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4518,7 +4518,7 @@
 	xdr_read_pages(xdr, pglen);
 
 
-	return 0;
+	return pglen;
 }
 
 static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0a42e8f..3c04504 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -39,7 +39,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
 #include <linux/lockd/bind.h>
-#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/mnt_namespace.h>
@@ -67,6 +66,12 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
+#ifdef CONFIG_NFS_V3
+#define NFS_DEFAULT_VERSION 3
+#else
+#define NFS_DEFAULT_VERSION 2
+#endif
+
 enum {
 	/* Mount options that take no arguments */
 	Opt_soft, Opt_hard,
@@ -2277,7 +2282,7 @@
 	};
 	int error = -ENOMEM;
 
-	data = nfs_alloc_parsed_mount_data(3);
+	data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
 	mntfh = nfs_alloc_fhandle();
 	if (data == NULL || mntfh == NULL)
 		goto out_free_fh;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ad2bfa6..116cab9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2262,7 +2262,7 @@
  * Spawn a thread to perform a recall on the delegation represented
  * by the lease (file_lock)
  *
- * Called from break_lease() with lock_kernel() held.
+ * Called from break_lease() with lock_flocks() held.
  * Note: we assume break_lease will only call this *once* for any given
  * lease.
  */
@@ -2286,7 +2286,7 @@
 	list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
 	spin_unlock(&recall_lock);
 
-	/* only place dl_time is set. protected by lock_kernel*/
+	/* only place dl_time is set. protected by lock_flocks*/
 	dp->dl_time = get_seconds();
 
 	/*
@@ -2303,7 +2303,7 @@
 /*
  * The file_lock is being reapd.
  *
- * Called by locks_free_lock() with lock_kernel() held.
+ * Called by locks_free_lock() with lock_flocks() held.
  */
 static
 void nfsd_release_deleg_cb(struct file_lock *fl)
@@ -2318,7 +2318,7 @@
 }
 
 /*
- * Called from setlease() with lock_kernel() held
+ * Called from setlease() with lock_flocks() held
  */
 static
 int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d840821..1efea36 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -159,7 +159,9 @@
 	char                     l_name[OCFS2_LOCK_ID_MAX_LEN];
 	unsigned int             l_ro_holders;
 	unsigned int             l_ex_holders;
-	unsigned char            l_level;
+	char			 l_level;
+	char			 l_requested;
+	char			 l_blocking;
 
 	/* Data packed - type enum ocfs2_lock_type */
 	unsigned char            l_type;
@@ -169,8 +171,6 @@
 	unsigned char            l_action;
 	/* Data packed - enum type ocfs2_unlock_action */
 	unsigned char            l_unlock_action;
-	unsigned char            l_requested;
-	unsigned char            l_blocking;
 	unsigned int             l_pending_gen;
 
 	spinlock_t               l_lock;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f02c0ef..cfeab7c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,7 +41,6 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/quotaops.h>
-#include <linux/smp_lock.h>
 
 #define MLOG_MASK_PREFIX ML_SUPER
 #include <cluster/masklog.h>
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 9c2b5f4..3ddb606 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -16,7 +16,6 @@
 #include <linux/limits.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/sysctl.h>
 #include <linux/slab.h>
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index da6b01d..c126c83 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -706,6 +706,7 @@
  * skip over unmapped regions.
  */
 #define PAGEMAP_WALK_SIZE	(PMD_SIZE)
+#define PAGEMAP_WALK_MASK	(PMD_MASK)
 static ssize_t pagemap_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
@@ -776,7 +777,7 @@
 		unsigned long end;
 
 		pm.pos = 0;
-		end = start_vaddr + PAGEMAP_WALK_SIZE;
+		end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
 		/* overflow ? */
 		if (end < start_vaddr || end > end_vaddr)
 			end = end_vaddr;
diff --git a/fs/read_write.c b/fs/read_write.c
index 431a0ed..5d431ba 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/uio.h>
-#include <linux/smp_lock.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
 #include <linux/module.h>
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 41656d4..0bae036 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -8,7 +8,6 @@
 #include <linux/reiserfs_acl.h>
 #include <linux/reiserfs_xattr.h>
 #include <linux/exportfs.h>
-#include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index adf22b4..79265fd 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -9,7 +9,6 @@
 #include <linux/time.h>
 #include <asm/uaccess.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/compat.h>
 
 /*
@@ -184,12 +183,11 @@
 		return 0;
 	}
 
-	/* we need to make sure nobody is changing the file size beneath
-	 ** us
-	 */
-	reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
 	depth = reiserfs_write_lock_once(inode->i_sb);
 
+	/* we need to make sure nobody is changing the file size beneath us */
+	reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
+
 	write_from = inode->i_size & (blocksize - 1);
 	/* if we are on a block boundary, we are already unpacked.  */
 	if (write_from == 0) {
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 076c8b19..d31bce1 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -43,7 +43,6 @@
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/workqueue.h>
 #include <linux/writeback.h>
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 3bf7a64..b243117 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,7 +28,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
-#include <linux/smp_lock.h>
 
 struct file_system_type reiserfs_fs_type;
 
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index 01a7141..bc5590b 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -80,6 +80,7 @@
 #define NOUVEAU_GETPARAM_VM_VRAM_BASE    12
 #define NOUVEAU_GETPARAM_GRAPH_UNITS     13
 #define NOUVEAU_GETPARAM_PTIMER_TIME     14
+#define NOUVEAU_GETPARAM_HAS_BO_USAGE    15
 struct drm_nouveau_getparam {
 	uint64_t param;
 	uint64_t value;
@@ -95,6 +96,12 @@
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
 #define NOUVEAU_GEM_DOMAIN_MAPPABLE  (1 << 3)
 
+#define NOUVEAU_GEM_TILE_LAYOUT_MASK 0x0000ff00
+#define NOUVEAU_GEM_TILE_16BPP       0x00000001
+#define NOUVEAU_GEM_TILE_32BPP       0x00000002
+#define NOUVEAU_GEM_TILE_ZETA        0x00000004
+#define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
+
 struct drm_nouveau_gem_info {
 	uint32_t handle;
 	uint32_t domain;
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index f22b2e9..9e76d35 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -227,8 +227,7 @@
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 
 extern struct page **ceph_get_direct_page_vector(const char __user *data,
-					    int num_pages,
-					    loff_t off, size_t len);
+						 int num_pages);
 extern void ceph_put_page_vector(struct page **pages, int num_pages);
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 5956d62..a108b42 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -82,6 +82,7 @@
 	struct ceph_buffer *middle;
 	struct page **pages;            /* data payload.  NOT OWNER. */
 	unsigned nr_pages;              /* size of page array */
+	unsigned page_alignment;        /* io offset in first page */
 	struct ceph_pagelist *pagelist; /* instead of pages */
 	struct list_head list_head;
 	struct kref kref;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 6c91fb0..a1af296 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -79,6 +79,7 @@
 	struct ceph_file_layout r_file_layout;
 	struct ceph_snap_context *r_snapc;    /* snap context for writes */
 	unsigned          r_num_pages;        /* size of page array (follows) */
+	unsigned          r_page_alignment;   /* io offset in first page */
 	struct page     **r_pages;            /* pages for data payload */
 	int               r_pages_from_pool;
 	int               r_own_pages;        /* if true, i own page list */
@@ -194,7 +195,8 @@
 				      int do_sync, u32 truncate_seq,
 				      u64 truncate_size,
 				      struct timespec *mtime,
-				      bool use_mempool, int num_reply);
+				      bool use_mempool, int num_reply,
+				      int page_align);
 
 static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
 {
@@ -218,7 +220,8 @@
 			       struct ceph_file_layout *layout,
 			       u64 off, u64 *plen,
 			       u32 truncate_seq, u64 truncate_size,
-			       struct page **pages, int nr_pages);
+			       struct page **pages, int nr_pages,
+			       int page_align);
 
 extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				struct ceph_vino vino,
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 7fca3dc..d1631d3 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -1122,6 +1122,7 @@
 
 /* drivers/video/fbcmap.c */
 extern int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp);
+extern int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags);
 extern void fb_dealloc_cmap(struct fb_cmap *cmap);
 extern int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to);
 extern int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 334d68a..c9e06cc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -34,9 +34,9 @@
 #define SEEK_MAX	SEEK_END
 
 struct fstrim_range {
-	uint64_t start;
-	uint64_t len;
-	uint64_t minlen;
+	__u64 start;
+	__u64 len;
+	__u64 minlen;
 };
 
 /* And dynamically-tunable limits and defaults: */
@@ -1612,7 +1612,6 @@
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
 	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
-	int (*trim_fs) (struct super_block *, struct fstrim_range *);
 };
 
 /*
diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h
index fc295d7..781d467 100644
--- a/include/linux/fsl-diu-fb.h
+++ b/include/linux/fsl-diu-fb.h
@@ -54,7 +54,6 @@
 };
 
 #define MFB_SET_CHROMA_KEY	_IOW('M', 1, struct mfb_chroma_key)
-#define MFB_WAIT_FOR_VSYNC	_IOW('F', 0x20, u_int32_t)
 #define MFB_SET_BRIGHTNESS	_IOW('M', 3, __u8)
 
 #define MFB_SET_ALPHA		0x80014d00
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 41cb31f..32f9fd6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -2,9 +2,6 @@
 #define LINUX_HARDIRQ_H
 
 #include <linux/preempt.h>
-#ifdef CONFIG_PREEMPT
-#include <linux/smp_lock.h>
-#endif
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <asm/hardirq.h>
@@ -97,7 +94,8 @@
 #define in_nmi()	(preempt_count() & NMI_MASK)
 
 #if defined(CONFIG_PREEMPT) && defined(CONFIG_BKL)
-# define PREEMPT_INATOMIC_BASE kernel_locked()
+# include <linux/sched.h>
+# define PREEMPT_INATOMIC_BASE (current->lock_depth >= 0)
 #else
 # define PREEMPT_INATOMIC_BASE 0
 #endif
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index e844a0b..4bef5c55 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -32,28 +32,6 @@
  */
 
 /* --- Bit algorithm adapters						*/
-#define I2C_HW_B_BT848		0x010005 /* BT848 video boards */
-#define I2C_HW_B_RIVA		0x010010 /* Riva based graphics cards */
-#define I2C_HW_B_ZR36067	0x010019 /* Zoran-36057/36067 based boards */
 #define I2C_HW_B_CX2388x	0x01001b /* connexant 2388x based tv cards */
-#define I2C_HW_B_EM28XX		0x01001f /* em28xx video capture cards */
-#define I2C_HW_B_CX2341X	0x010020 /* Conexant CX2341X MPEG encoder cards */
-#define I2C_HW_B_CX23885	0x010022 /* conexant 23885 based tv cards (bus1) */
-#define I2C_HW_B_AU0828		0x010023 /* auvitek au0828 usb bridge */
-#define I2C_HW_B_CX231XX	0x010024 /* Conexant CX231XX USB based cards */
-#define I2C_HW_B_HDPVR		0x010025 /* Hauppauge HD PVR */
-
-/* --- SGI adapters							*/
-#define I2C_HW_SGI_VINO		0x160000
-
-/* --- SMBus only adapters						*/
-#define I2C_HW_SMBUS_W9968CF	0x04000d
-#define I2C_HW_SMBUS_OV511	0x04000e /* OV511(+) USB 1.1 webcam ICs */
-#define I2C_HW_SMBUS_OV518	0x04000f /* OV518(+) USB 1.1 webcam ICs */
-#define I2C_HW_SMBUS_CAFE	0x040012 /* Marvell 88ALP01 "CAFE" cam  */
-
-/* --- Miscellaneous adapters */
-#define I2C_HW_SAA7146		0x060000 /* SAA7146 video decoder bus */
-#define I2C_HW_SAA7134		0x090000 /* SAA7134 video decoder bus */
 
 #endif /* LINUX_I2C_ID_H */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 889b35a..56cfe23 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -353,7 +353,7 @@
  */
 struct i2c_adapter {
 	struct module *owner;
-	unsigned int id;
+	unsigned int id __deprecated;
 	unsigned int class;		  /* classes to allow probing for */
 	const struct i2c_algorithm *algo; /* the algorithm to access the bus */
 	void *algo_data;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fc3da9e..b6de9a6 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -17,13 +17,11 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/typecheck.h>
+#include <linux/printk.h>
 #include <linux/dynamic_debug.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
-extern const char linux_banner[];
-extern const char linux_proc_banner[];
-
 #define USHRT_MAX	((u16)(~0U))
 #define SHRT_MAX	((s16)(USHRT_MAX>>1))
 #define SHRT_MIN	((s16)(-SHRT_MAX - 1))
@@ -110,31 +108,6 @@
  */
 #define lower_32_bits(n) ((u32)(n))
 
-#define	KERN_EMERG	"<0>"	/* system is unusable			*/
-#define	KERN_ALERT	"<1>"	/* action must be taken immediately	*/
-#define	KERN_CRIT	"<2>"	/* critical conditions			*/
-#define	KERN_ERR	"<3>"	/* error conditions			*/
-#define	KERN_WARNING	"<4>"	/* warning conditions			*/
-#define	KERN_NOTICE	"<5>"	/* normal but significant condition	*/
-#define	KERN_INFO	"<6>"	/* informational			*/
-#define	KERN_DEBUG	"<7>"	/* debug-level messages			*/
-
-/* Use the default kernel loglevel */
-#define KERN_DEFAULT	"<d>"
-/*
- * Annotation for a "continued" line of log printout (only done after a
- * line that had no enclosing \n). Only to be used by core/arch code
- * during early bootup (a continued line is not SMP-safe otherwise).
- */
-#define	KERN_CONT	"<c>"
-
-extern int console_printk[];
-
-#define console_loglevel (console_printk[0])
-#define default_message_loglevel (console_printk[1])
-#define minimum_console_loglevel (console_printk[2])
-#define default_console_loglevel (console_printk[3])
-
 struct completion;
 struct pt_regs;
 struct user;
@@ -187,11 +160,6 @@
 }
 #endif
 
-struct va_format {
-	const char *fmt;
-	va_list *va;
-};
-
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
 NORET_TYPE void panic(const char * fmt, ...)
@@ -245,115 +213,8 @@
 struct pid;
 extern struct pid *session_of_pgrp(struct pid *pgrp);
 
-/*
- * FW_BUG
- * Add this to a message where you are sure the firmware is buggy or behaves
- * really stupid or out of spec. Be aware that the responsible BIOS developer
- * should be able to fix this issue or at least get a concrete idea of the
- * problem by reading your message without the need of looking at the kernel
- * code.
- * 
- * Use it for definite and high priority BIOS bugs.
- *
- * FW_WARN
- * Use it for not that clear (e.g. could the kernel messed up things already?)
- * and medium priority BIOS bugs.
- *
- * FW_INFO
- * Use this one if you want to tell the user or vendor about something
- * suspicious, but generally harmless related to the firmware.
- *
- * Use it for information or very low priority BIOS bugs.
- */
-#define FW_BUG		"[Firmware Bug]: "
-#define FW_WARN		"[Firmware Warn]: "
-#define FW_INFO		"[Firmware Info]: "
-
-/*
- * HW_ERR
- * Add this to a message for hardware errors, so that user can report
- * it to hardware vendor instead of LKML or software vendor.
- */
-#define HW_ERR		"[Hardware Error]: "
-
-#ifdef CONFIG_PRINTK
-asmlinkage int vprintk(const char *fmt, va_list args)
-	__attribute__ ((format (printf, 1, 0)));
-asmlinkage int printk(const char * fmt, ...)
-	__attribute__ ((format (printf, 1, 2))) __cold;
-
-/*
- * Please don't use printk_ratelimit(), because it shares ratelimiting state
- * with all other unrelated printk_ratelimit() callsites.  Instead use
- * printk_ratelimited() or plain old __ratelimit().
- */
-extern int __printk_ratelimit(const char *func);
-#define printk_ratelimit() __printk_ratelimit(__func__)
-extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
-				   unsigned int interval_msec);
-
-extern int printk_delay_msec;
-extern int dmesg_restrict;
-
-/*
- * Print a one-time message (analogous to WARN_ONCE() et al):
- */
-#define printk_once(x...) ({			\
-	static bool __print_once;		\
-						\
-	if (!__print_once) {			\
-		__print_once = true;		\
-		printk(x);			\
-	}					\
-})
-
-void log_buf_kexec_setup(void);
-#else
-static inline int vprintk(const char *s, va_list args)
-	__attribute__ ((format (printf, 1, 0)));
-static inline int vprintk(const char *s, va_list args) { return 0; }
-static inline int printk(const char *s, ...)
-	__attribute__ ((format (printf, 1, 2)));
-static inline int __cold printk(const char *s, ...) { return 0; }
-static inline int printk_ratelimit(void) { return 0; }
-static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
-					  unsigned int interval_msec)	\
-		{ return false; }
-
-/* No effect, but we still get type checking even in the !PRINTK case: */
-#define printk_once(x...) printk(x)
-
-static inline void log_buf_kexec_setup(void)
-{
-}
-#endif
-
-/*
- * Dummy printk for disabled debugging statements to use whilst maintaining
- * gcc's format and side-effect checking.
- */
-static inline __attribute__ ((format (printf, 1, 2)))
-int no_printk(const char *s, ...) { return 0; }
-
-extern int printk_needs_cpu(int cpu);
-extern void printk_tick(void);
-
-extern void asmlinkage __attribute__((format(printf, 1, 2)))
-	early_printk(const char *fmt, ...);
-
 unsigned long int_sqrt(unsigned long);
 
-static inline void console_silent(void)
-{
-	console_loglevel = 0;
-}
-
-static inline void console_verbose(void)
-{
-	if (console_loglevel)
-		console_loglevel = 15;
-}
-
 extern void bust_spinlocks(int yes);
 extern void wake_up_klogd(void);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
@@ -390,22 +251,6 @@
 #define TAINT_CRAP			10
 #define TAINT_FIRMWARE_WORKAROUND	11
 
-extern void dump_stack(void) __cold;
-
-enum {
-	DUMP_PREFIX_NONE,
-	DUMP_PREFIX_ADDRESS,
-	DUMP_PREFIX_OFFSET
-};
-extern void hex_dump_to_buffer(const void *buf, size_t len,
-				int rowsize, int groupsize,
-				char *linebuf, size_t linebuflen, bool ascii);
-extern void print_hex_dump(const char *level, const char *prefix_str,
-				int prefix_type, int rowsize, int groupsize,
-				const void *buf, size_t len, bool ascii);
-extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-			const void *buf, size_t len);
-
 extern const char hex_asc[];
 #define hex_asc_lo(x)	hex_asc[((x) & 0x0f)]
 #define hex_asc_hi(x)	hex_asc[((x) & 0xf0) >> 4]
@@ -419,94 +264,6 @@
 
 extern int hex_to_bin(char ch);
 
-#ifndef pr_fmt
-#define pr_fmt(fmt) fmt
-#endif
-
-#define pr_emerg(fmt, ...) \
-        printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert(fmt, ...) \
-        printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit(fmt, ...) \
-        printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err(fmt, ...) \
-        printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warning(fmt, ...) \
-        printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn pr_warning
-#define pr_notice(fmt, ...) \
-        printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info(fmt, ...) \
-        printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_cont(fmt, ...) \
-	printk(KERN_CONT fmt, ##__VA_ARGS__)
-
-/* pr_devel() should produce zero code unless DEBUG is defined */
-#ifdef DEBUG
-#define pr_devel(fmt, ...) \
-	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
-#else
-#define pr_devel(fmt, ...) \
-	({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
-#endif
-
-/* If you are writing a driver, please use dev_dbg instead */
-#if defined(DEBUG)
-#define pr_debug(fmt, ...) \
-	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
-#elif defined(CONFIG_DYNAMIC_DEBUG)
-/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
-#define pr_debug(fmt, ...) \
-	dynamic_pr_debug(fmt, ##__VA_ARGS__)
-#else
-#define pr_debug(fmt, ...) \
-	({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
-#endif
-
-/*
- * ratelimited messages with local ratelimit_state,
- * no local ratelimit_state used in the !PRINTK case
- */
-#ifdef CONFIG_PRINTK
-#define printk_ratelimited(fmt, ...)  ({				\
-	static DEFINE_RATELIMIT_STATE(_rs,				\
-				      DEFAULT_RATELIMIT_INTERVAL,	\
-				      DEFAULT_RATELIMIT_BURST);		\
-									\
-	if (__ratelimit(&_rs))						\
-		printk(fmt, ##__VA_ARGS__);				\
-})
-#else
-/* No effect, but we still get type checking even in the !PRINTK case: */
-#define printk_ratelimited printk
-#endif
-
-#define pr_emerg_ratelimited(fmt, ...) \
-	printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert_ratelimited(fmt, ...) \
-	printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit_ratelimited(fmt, ...) \
-	printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err_ratelimited(fmt, ...) \
-	printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warning_ratelimited(fmt, ...) \
-	printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn_ratelimited pr_warning_ratelimited
-#define pr_notice_ratelimited(fmt, ...) \
-	printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info_ratelimited(fmt, ...) \
-	printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-/* no pr_cont_ratelimited, don't do that... */
-/* If you are writing a driver, please use dev_dbg instead */
-#if defined(DEBUG)
-#define pr_debug_ratelimited(fmt, ...) \
-	printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
-#else
-#define pr_debug_ratelimited(fmt, ...) \
-	({ if (0) printk_ratelimited(KERN_DEBUG pr_fmt(fmt), \
-				     ##__VA_ARGS__); 0; })
-#endif
-
 /*
  * General tracing related utility functions - trace_printk(),
  * tracing_on/tracing_off and tracing_start()/tracing_stop
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 15b77b8..d947b12 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -986,7 +986,7 @@
 			  unsigned long, struct ata_port_operations *);
 extern int ata_scsi_detect(struct scsi_host_template *sht);
 extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg);
-extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *));
+extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd);
 extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev,
 			    int cmd, void __user *arg);
 extern void ata_sas_port_destroy(struct ata_port *);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index a34dea4..2dee05e 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -43,6 +43,7 @@
 	struct sockaddr_storage	h_addr;		/* peer address */
 	size_t			h_addrlen;
 	struct sockaddr_storage	h_srcaddr;	/* our address (optional) */
+	size_t			h_srcaddrlen;
 	struct rpc_clnt		*h_rpcclnt;	/* RPC client to talk to peer */
 	char			*h_name;		/* remote hostname */
 	u32			h_version;	/* interface version */
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 1ff81b5..dd3c34e 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -11,6 +11,7 @@
 #define MARVELL_PHY_ID_88E1118		0x01410e10
 #define MARVELL_PHY_ID_88E1121R		0x01410cb0
 #define MARVELL_PHY_ID_88E1145		0x01410cd0
+#define MARVELL_PHY_ID_88E1149R		0x01410e50
 #define MARVELL_PHY_ID_88E1240		0x01410e30
 #define MARVELL_PHY_ID_88E1318S		0x01410e90
 
diff --git a/include/linux/mfd/wm8350/audio.h b/include/linux/mfd/wm8350/audio.h
index a95141e..bd581c6 100644
--- a/include/linux/mfd/wm8350/audio.h
+++ b/include/linux/mfd/wm8350/audio.h
@@ -522,9 +522,6 @@
 #define WM8350_MCLK_SEL_PLL_32K			3
 #define WM8350_MCLK_SEL_MCLK			5
 
-#define WM8350_MCLK_DIR_OUT			0
-#define WM8350_MCLK_DIR_IN			1
-
 /* clock divider id's */
 #define WM8350_ADC_CLKDIV			0
 #define WM8350_DAC_CLKDIV			1
diff --git a/include/linux/module.h b/include/linux/module.h
index b29e745..7575bbb 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -517,7 +517,7 @@
 #define symbol_put_addr(p) do { } while(0)
 
 #endif /* CONFIG_MODULE_UNLOAD */
-int use_module(struct module *a, struct module *b);
+int ref_module(struct module *a, struct module *b);
 
 /* This is a #define so the string doesn't get put in every .o file */
 #define module_name(mod)			\
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bba2668..c66fdb7 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -593,12 +593,6 @@
 	return ino;
 }
 
-#define nfs_wait_event(clnt, wq, condition)				\
-({									\
-	int __retval = wait_event_killable(wq, condition);		\
-	__retval;							\
-})
-
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5bb13b3..b02195d 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -59,8 +59,6 @@
 static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)	\
 	{ return test_and_clear_bit(PCG_##lname, &pc->flags);  }
 
-TESTPCGFLAG(Locked, LOCK)
-
 /* Cache flag is set only once (at allocation) */
 TESTPCGFLAG(Cache, CACHE)
 CLEARPCGFLAG(Cache, CACHE)
@@ -104,6 +102,11 @@
 	bit_spin_unlock(PCG_LOCK, &pc->flags);
 }
 
+static inline int page_is_cgroup_locked(struct page_cgroup *pc)
+{
+	return bit_spin_is_locked(PCG_LOCK, &pc->flags);
+}
+
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct page_cgroup;
 
diff --git a/include/linux/printk.h b/include/linux/printk.h
new file mode 100644
index 0000000..b772ca5
--- /dev/null
+++ b/include/linux/printk.h
@@ -0,0 +1,248 @@
+#ifndef __KERNEL_PRINTK__
+#define __KERNEL_PRINTK__
+
+extern const char linux_banner[];
+extern const char linux_proc_banner[];
+
+#define	KERN_EMERG	"<0>"	/* system is unusable			*/
+#define	KERN_ALERT	"<1>"	/* action must be taken immediately	*/
+#define	KERN_CRIT	"<2>"	/* critical conditions			*/
+#define	KERN_ERR	"<3>"	/* error conditions			*/
+#define	KERN_WARNING	"<4>"	/* warning conditions			*/
+#define	KERN_NOTICE	"<5>"	/* normal but significant condition	*/
+#define	KERN_INFO	"<6>"	/* informational			*/
+#define	KERN_DEBUG	"<7>"	/* debug-level messages			*/
+
+/* Use the default kernel loglevel */
+#define KERN_DEFAULT	"<d>"
+/*
+ * Annotation for a "continued" line of log printout (only done after a
+ * line that had no enclosing \n). Only to be used by core/arch code
+ * during early bootup (a continued line is not SMP-safe otherwise).
+ */
+#define	KERN_CONT	"<c>"
+
+extern int console_printk[];
+
+#define console_loglevel (console_printk[0])
+#define default_message_loglevel (console_printk[1])
+#define minimum_console_loglevel (console_printk[2])
+#define default_console_loglevel (console_printk[3])
+
+struct va_format {
+	const char *fmt;
+	va_list *va;
+};
+
+/*
+ * FW_BUG
+ * Add this to a message where you are sure the firmware is buggy or behaves
+ * really stupid or out of spec. Be aware that the responsible BIOS developer
+ * should be able to fix this issue or at least get a concrete idea of the
+ * problem by reading your message without the need of looking at the kernel
+ * code.
+ *
+ * Use it for definite and high priority BIOS bugs.
+ *
+ * FW_WARN
+ * Use it for not that clear (e.g. could the kernel messed up things already?)
+ * and medium priority BIOS bugs.
+ *
+ * FW_INFO
+ * Use this one if you want to tell the user or vendor about something
+ * suspicious, but generally harmless related to the firmware.
+ *
+ * Use it for information or very low priority BIOS bugs.
+ */
+#define FW_BUG		"[Firmware Bug]: "
+#define FW_WARN		"[Firmware Warn]: "
+#define FW_INFO		"[Firmware Info]: "
+
+/*
+ * HW_ERR
+ * Add this to a message for hardware errors, so that user can report
+ * it to hardware vendor instead of LKML or software vendor.
+ */
+#define HW_ERR		"[Hardware Error]: "
+
+#ifdef CONFIG_PRINTK
+asmlinkage int vprintk(const char *fmt, va_list args)
+	__attribute__ ((format (printf, 1, 0)));
+asmlinkage int printk(const char * fmt, ...)
+	__attribute__ ((format (printf, 1, 2))) __cold;
+
+/*
+ * Please don't use printk_ratelimit(), because it shares ratelimiting state
+ * with all other unrelated printk_ratelimit() callsites.  Instead use
+ * printk_ratelimited() or plain old __ratelimit().
+ */
+extern int __printk_ratelimit(const char *func);
+#define printk_ratelimit() __printk_ratelimit(__func__)
+extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+				   unsigned int interval_msec);
+
+extern int printk_delay_msec;
+extern int dmesg_restrict;
+
+/*
+ * Print a one-time message (analogous to WARN_ONCE() et al):
+ */
+#define printk_once(x...) ({			\
+	static bool __print_once;		\
+						\
+	if (!__print_once) {			\
+		__print_once = true;		\
+		printk(x);			\
+	}					\
+})
+
+void log_buf_kexec_setup(void);
+#else
+static inline int vprintk(const char *s, va_list args)
+	__attribute__ ((format (printf, 1, 0)));
+static inline int vprintk(const char *s, va_list args) { return 0; }
+static inline int printk(const char *s, ...)
+	__attribute__ ((format (printf, 1, 2)));
+static inline int __cold printk(const char *s, ...) { return 0; }
+static inline int printk_ratelimit(void) { return 0; }
+static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
+					  unsigned int interval_msec)	\
+		{ return false; }
+
+/* No effect, but we still get type checking even in the !PRINTK case: */
+#define printk_once(x...) printk(x)
+
+static inline void log_buf_kexec_setup(void)
+{
+}
+#endif
+
+/*
+ * Dummy printk for disabled debugging statements to use whilst maintaining
+ * gcc's format and side-effect checking.
+ */
+static inline __attribute__ ((format (printf, 1, 2)))
+int no_printk(const char *s, ...) { return 0; }
+
+extern int printk_needs_cpu(int cpu);
+extern void printk_tick(void);
+
+extern void asmlinkage __attribute__((format(printf, 1, 2)))
+	early_printk(const char *fmt, ...);
+
+static inline void console_silent(void)
+{
+	console_loglevel = 0;
+}
+
+static inline void console_verbose(void)
+{
+	if (console_loglevel)
+		console_loglevel = 15;
+}
+
+extern void dump_stack(void) __cold;
+
+enum {
+	DUMP_PREFIX_NONE,
+	DUMP_PREFIX_ADDRESS,
+	DUMP_PREFIX_OFFSET
+};
+extern void hex_dump_to_buffer(const void *buf, size_t len,
+				int rowsize, int groupsize,
+				char *linebuf, size_t linebuflen, bool ascii);
+extern void print_hex_dump(const char *level, const char *prefix_str,
+				int prefix_type, int rowsize, int groupsize,
+				const void *buf, size_t len, bool ascii);
+extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
+			const void *buf, size_t len);
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define pr_emerg(fmt, ...) \
+        printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert(fmt, ...) \
+        printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit(fmt, ...) \
+        printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+        printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+        printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn pr_warning
+#define pr_notice(fmt, ...) \
+        printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+        printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_cont(fmt, ...) \
+	printk(KERN_CONT fmt, ##__VA_ARGS__)
+
+/* pr_devel() should produce zero code unless DEBUG is defined */
+#ifdef DEBUG
+#define pr_devel(fmt, ...) \
+	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel(fmt, ...) \
+	({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
+#endif
+
+/* If you are writing a driver, please use dev_dbg instead */
+#if defined(DEBUG)
+#define pr_debug(fmt, ...) \
+	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#elif defined(CONFIG_DYNAMIC_DEBUG)
+/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
+#define pr_debug(fmt, ...) \
+	dynamic_pr_debug(fmt, ##__VA_ARGS__)
+#else
+#define pr_debug(fmt, ...) \
+	({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
+#endif
+
+/*
+ * ratelimited messages with local ratelimit_state,
+ * no local ratelimit_state used in the !PRINTK case
+ */
+#ifdef CONFIG_PRINTK
+#define printk_ratelimited(fmt, ...)  ({				\
+	static DEFINE_RATELIMIT_STATE(_rs,				\
+				      DEFAULT_RATELIMIT_INTERVAL,	\
+				      DEFAULT_RATELIMIT_BURST);		\
+									\
+	if (__ratelimit(&_rs))						\
+		printk(fmt, ##__VA_ARGS__);				\
+})
+#else
+/* No effect, but we still get type checking even in the !PRINTK case: */
+#define printk_ratelimited printk
+#endif
+
+#define pr_emerg_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn_ratelimited pr_warning_ratelimited
+#define pr_notice_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+/* no pr_cont_ratelimited, don't do that... */
+/* If you are writing a driver, please use dev_dbg instead */
+#if defined(DEBUG)
+#define pr_debug_ratelimited(fmt, ...) \
+	printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_debug_ratelimited(fmt, ...) \
+	({ if (0) printk_ratelimited(KERN_DEBUG pr_fmt(fmt), \
+				     ##__VA_ARGS__); 0; })
+#endif
+
+#endif
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 5ca47e5..c21072a 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -22,7 +22,6 @@
 #include <asm/unaligned.h>
 #include <linux/bitops.h>
 #include <linux/proc_fs.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/reiserfs_fs_i.h>
 #include <linux/reiserfs_fs_sb.h>
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index d42f2744..bbad657 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -6,7 +6,6 @@
 #include <linux/if_link.h>
 #include <linux/if_addr.h>
 #include <linux/neighbour.h>
-#include <linux/netdevice.h>
 
 /* rtnetlink families. Values up to 127 are reserved for real address
  * families, values above 128 may be used arbitrarily.
@@ -606,6 +605,7 @@
 #ifdef __KERNEL__
 
 #include <linux/mutex.h>
+#include <linux/netdevice.h>
 
 static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d0036e5..2c79e92 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -862,6 +862,7 @@
 	 * single CPU.
 	 */
 	unsigned int cpu_power, cpu_power_orig;
+	unsigned int group_weight;
 
 	/*
 	 * The CPUs this group covers.
diff --git a/include/linux/security.h b/include/linux/security.h
index b8246a8..fd4d55f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -77,7 +77,6 @@
 extern int cap_task_setscheduler(struct task_struct *p);
 extern int cap_task_setioprio(struct task_struct *p, int ioprio);
 extern int cap_task_setnice(struct task_struct *p, int nice);
-extern int cap_syslog(int type, bool from_file);
 extern int cap_vm_enough_memory(struct mm_struct *mm, long pages);
 
 struct msghdr;
@@ -1388,7 +1387,7 @@
 	int (*sysctl) (struct ctl_table *table, int op);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
 	int (*quota_on) (struct dentry *dentry);
-	int (*syslog) (int type, bool from_file);
+	int (*syslog) (int type);
 	int (*settime) (struct timespec *ts, struct timezone *tz);
 	int (*vm_enough_memory) (struct mm_struct *mm, long pages);
 
@@ -1671,7 +1670,7 @@
 int security_sysctl(struct ctl_table *table, int op);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
 int security_quota_on(struct dentry *dentry);
-int security_syslog(int type, bool from_file);
+int security_syslog(int type);
 int security_settime(struct timespec *ts, struct timezone *tz);
 int security_vm_enough_memory(long pages);
 int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
@@ -1901,9 +1900,9 @@
 	return 0;
 }
 
-static inline int security_syslog(int type, bool from_file)
+static inline int security_syslog(int type)
 {
-	return cap_syslog(type, from_file);
+	return 0;
 }
 
 static inline int security_settime(struct timespec *ts, struct timezone *tz)
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index cea0c38..9a52f72 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -19,11 +19,13 @@
 };
 
 struct clk_ops {
+#ifdef CONFIG_SH_CLK_CPG_LEGACY
 	void (*init)(struct clk *clk);
+#endif
 	int (*enable)(struct clk *clk);
 	void (*disable)(struct clk *clk);
 	unsigned long (*recalc)(struct clk *clk);
-	int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id);
+	int (*set_rate)(struct clk *clk, unsigned long rate);
 	int (*set_parent)(struct clk *clk, struct clk *parent);
 	long (*round_rate)(struct clk *clk, unsigned long rate);
 };
@@ -67,36 +69,6 @@
 void clk_unregister(struct clk *);
 void clk_enable_init_clocks(void);
 
-/**
- * clk_set_rate_ex - set the clock rate for a clock source, with additional parameter
- * @clk: clock source
- * @rate: desired clock rate in Hz
- * @algo_id: algorithm id to be passed down to ops->set_rate
- *
- * Returns success (0) or negative errno.
- */
-int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id);
-
-enum clk_sh_algo_id {
-	NO_CHANGE = 0,
-
-	IUS_N1_N1,
-	IUS_322,
-	IUS_522,
-	IUS_N11,
-
-	SB_N1,
-
-	SB3_N1,
-	SB3_32,
-	SB3_43,
-	SB3_54,
-
-	BP_N1,
-
-	IP_N1,
-};
-
 struct clk_div_mult_table {
 	unsigned int *divisors;
 	unsigned int nr_divisors;
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index f656d1a..5812fef 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -79,7 +79,7 @@
 	unsigned int nr_subgroups;
 };
 
-#define _INTC_ARRAY(a) a, a == NULL ? 0 : sizeof(a)/sizeof(*a)
+#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a)
 
 #define INTC_HW_DESC(vectors, groups, mask_regs,	\
 		     prio_regs,	sense_regs, ack_regs)	\
diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index 291f721..3a19882 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -4,8 +4,6 @@
 #ifdef CONFIG_LOCK_KERNEL
 #include <linux/sched.h>
 
-#define kernel_locked()		(current->lock_depth >= 0)
-
 extern int __lockfunc __reacquire_kernel_lock(void);
 extern void __lockfunc __release_kernel_lock(void);
 
@@ -58,7 +56,6 @@
 #define lock_kernel()
 #define unlock_kernel()
 #define cycle_kernel_lock()			do { } while(0)
-#define kernel_locked()				1
 #endif /* CONFIG_BKL */
 
 #define release_kernel_lock(task)		do { } while(0)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index c7ea9bc..032d79f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -13,7 +13,6 @@
 #include <linux/tty_driver.h>
 #include <linux/tty_ldisc.h>
 #include <linux/mutex.h>
-#include <linux/smp_lock.h>
 
 #include <asm/system.h>
 
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2a7936d..97b8b7c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1355,7 +1355,7 @@
 	WIPHY_FLAG_4ADDR_AP			= BIT(5),
 	WIPHY_FLAG_4ADDR_STATION		= BIT(6),
 	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
-	WIPHY_FLAG_IBSS_RSN			= BIT(7),
+	WIPHY_FLAG_IBSS_RSN			= BIT(8),
 };
 
 struct mac_address {
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 55590ab..6beb1ff 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -303,7 +303,7 @@
 
 static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 {
-	unsigned long now = ACCESS_ONCE(jiffies);
+	unsigned long now = jiffies;
 	
 	if (neigh->used != now)
 		neigh->used = now;
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index f986ab7..5c4c167 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -1006,8 +1006,7 @@
 /*
  * SCSI INTERACTION LAYER
  *****************************/
-int fc_queuecommand(struct scsi_cmnd *,
-		    void (*done)(struct scsi_cmnd *));
+int fc_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 int fc_eh_abort(struct scsi_cmnd *);
 int fc_eh_device_reset(struct scsi_cmnd *);
 int fc_eh_host_reset(struct scsi_cmnd *);
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index ae5196a..b81d969 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -341,8 +341,7 @@
 extern int iscsi_eh_recover_target(struct scsi_cmnd *sc);
 extern int iscsi_eh_session_reset(struct scsi_cmnd *sc);
 extern int iscsi_eh_device_reset(struct scsi_cmnd *sc);
-extern int iscsi_queuecommand(struct scsi_cmnd *sc,
-			      void (*done)(struct scsi_cmnd *));
+extern int iscsi_queuecommand(struct Scsi_Host *h, struct scsi_cmnd *sc);
 
 /*
  * iSCSI host helpers.
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 3dec194..90ce527 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -621,8 +621,7 @@
 int sas_phy_enable(struct sas_phy *phy, int enabled);
 int sas_phy_reset(struct sas_phy *phy, int hard_reset);
 int sas_queue_up(struct sas_task *task);
-extern int sas_queuecommand(struct scsi_cmnd *,
-		     void (*scsi_done)(struct scsi_cmnd *));
+extern int sas_queuecommand(struct Scsi_Host * ,struct scsi_cmnd *);
 extern int sas_target_alloc(struct scsi_target *);
 extern int sas_slave_alloc(struct scsi_device *);
 extern int sas_slave_configure(struct scsi_device *);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index d0a6a84..e7e3858 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -127,8 +127,7 @@
 	 *
 	 * STATUS: REQUIRED
 	 */
-	int (* queuecommand)(struct scsi_cmnd *,
-			     void (*done)(struct scsi_cmnd *));
+	int (* queuecommand)(struct Scsi_Host *, struct scsi_cmnd *);
 
 	/*
 	 * The transfer functions are used to queue a scsi command to
@@ -505,6 +504,25 @@
 };
 
 /*
+ * Temporary #define for host lock push down. Can be removed when all
+ * drivers have been updated to take advantage of unlocked
+ * queuecommand.
+ *
+ */
+#define DEF_SCSI_QCMD(func_name) \
+	int func_name(struct Scsi_Host *shost, struct scsi_cmnd *cmd)	\
+	{								\
+		unsigned long irq_flags;				\
+		int rc;							\
+		spin_lock_irqsave(shost->host_lock, irq_flags);		\
+		scsi_cmd_get_serial(shost, cmd);			\
+		rc = func_name##_lck (cmd, cmd->scsi_done);			\
+		spin_unlock_irqrestore(shost->host_lock, irq_flags);	\
+		return rc;						\
+	}
+
+
+/*
  * shost state: If you alter this, you also need to alter scsi_sysfs.c
  * (for the ascii descriptions) and the state model enforcer:
  * scsi_host_set_state()
@@ -752,6 +770,7 @@
 extern void scsi_host_put(struct Scsi_Host *t);
 extern struct Scsi_Host *scsi_host_lookup(unsigned short);
 extern const char *scsi_host_state_name(enum scsi_host_state);
+extern void scsi_cmd_get_serial(struct Scsi_Host *, struct scsi_cmnd *);
 
 extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *);
 
diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h
index 6316cda..89d43b3 100644
--- a/include/video/da8xx-fb.h
+++ b/include/video/da8xx-fb.h
@@ -99,7 +99,6 @@
 #define FBIPUT_COLOR		_IOW('F', 6, int)
 #define FBIPUT_HSYNC		_IOW('F', 9, int)
 #define FBIPUT_VSYNC		_IOW('F', 10, int)
-#define FBIO_WAITFORVSYNC	_IOW('F', 0x20, u_int32_t)
 
 #endif  /* ifndef DA8XX_FB_H */
 
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index d7a6c13..eac3ce1 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -141,6 +141,19 @@
 DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
 
 /*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping     12
+struct xen_machphys_mapping {
+    unsigned long v_start, v_end; /* Start and end virtual addresses.   */
+    unsigned long max_mfn;        /* Maximum MFN that can be looked up. */
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
+
+/*
  * Sets the GPFN at which a particular page appears in the specified guest's
  * pseudophysical address space.
  * arg == addr of xen_add_to_physmap_t.
diff --git a/include/xen/page.h b/include/xen/page.h
index eaf85fa..0be36b9 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -1 +1,8 @@
+#ifndef _XEN_PAGE_H
+#define _XEN_PAGE_H
+
 #include <asm/xen/page.h>
+
+extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+
+#endif	/* _XEN_PAGE_H */
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index b42cdfd..17857fb 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -34,13 +34,10 @@
 #define __LINUX_PUBLIC_PRIVCMD_H__
 
 #include <linux/types.h>
+#include <linux/compiler.h>
 
 typedef unsigned long xen_pfn_t;
 
-#ifndef __user
-#define __user
-#endif
-
 struct privcmd_hypercall {
 	__u64 op;
 	__u64 arg[5];
diff --git a/init/Kconfig b/init/Kconfig
index 88c1046..c972899 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -613,6 +613,19 @@
 	  if boot option "noswapaccount" is set, swap will not be accounted.
 	  Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page
 	  size is 4096bytes, 512k per 1Gbytes of swap.
+config CGROUP_MEM_RES_CTLR_SWAP_ENABLED
+	bool "Memory Resource Controller Swap Extension enabled by default"
+	depends on CGROUP_MEM_RES_CTLR_SWAP
+	default y
+	help
+	  Memory Resource Controller Swap Extension comes with its price in
+	  a bigger memory consumption. General purpose distribution kernels
+	  which want to enable the feautre but keep it disabled by default
+	  and let the user enable it by swapaccount boot command line
+	  parameter should have this option unselected.
+	  For those who want to have the feature enabled by default should
+	  select this option (if, for some reason, they need to disable it
+	  then noswapaccount does the trick).
 
 menuconfig CGROUP_SCHED
 	bool "Group CPU scheduler"
diff --git a/init/main.c b/init/main.c
index e59af24..8646401 100644
--- a/init/main.c
+++ b/init/main.c
@@ -20,7 +20,6 @@
 #include <linux/delay.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 37755d6..a6e7297 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -82,7 +82,7 @@
 #define for_each_kdbcmd(cmd, num)					\
 	for ((cmd) = kdb_base_commands, (num) = 0;			\
 	     num < kdb_max_commands;					\
-	     num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++)
+	     num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++)
 
 typedef struct _kdbmsg {
 	int	km_diag;	/* kdb diagnostic */
@@ -646,7 +646,7 @@
 	}
 	if (!s->usable)
 		return KDB_NOTIMP;
-	s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
+	s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
 	if (!s->command) {
 		kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
 			   cmdstr);
@@ -2361,7 +2361,7 @@
  */
 static int kdb_ll(int argc, const char **argv)
 {
-	int diag;
+	int diag = 0;
 	unsigned long addr;
 	long offset = 0;
 	unsigned long va;
@@ -2400,20 +2400,21 @@
 		char buf[80];
 
 		if (KDB_FLAG(CMD_INTERRUPT))
-			return 0;
+			goto out;
 
 		sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
 		diag = kdb_parse(buf);
 		if (diag)
-			return diag;
+			goto out;
 
 		addr = va + linkoffset;
 		if (kdb_getword(&va, addr, sizeof(va)))
-			return 0;
+			goto out;
 	}
-	kfree(command);
 
-	return 0;
+out:
+	kfree(command);
+	return diag;
 }
 
 static int kdb_kgdb(int argc, const char **argv)
@@ -2739,13 +2740,13 @@
 		}
 		if (kdb_commands) {
 			memcpy(new, kdb_commands,
-			       kdb_max_commands * sizeof(*new));
+			  (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new));
 			kfree(kdb_commands);
 		}
 		memset(new + kdb_max_commands, 0,
 		       kdb_command_extend * sizeof(*new));
 		kdb_commands = new;
-		kp = kdb_commands + kdb_max_commands;
+		kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX;
 		kdb_max_commands += kdb_command_extend;
 	}
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 6c683b3..40a8777 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2489,7 +2489,8 @@
 {
 	struct robust_list_head __user *head = curr->robust_list;
 	struct robust_list __user *entry, *next_entry, *pending;
-	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+	unsigned int uninitialized_var(next_pi);
 	unsigned long futex_offset;
 	int rc;
 
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 06da4df..a7934ac 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -49,7 +49,8 @@
 {
 	struct compat_robust_list_head __user *head = curr->compat_robust_list;
 	struct robust_list __user *entry, *next_entry, *pending;
-	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+	unsigned int uninitialized_var(next_pi);
 	compat_uptr_t uentry, next_uentry, upending;
 	compat_long_t futex_offset;
 	int rc;
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index c7a8f45..aeaa7f8 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -121,10 +121,10 @@
 
 	switch (o->type) {
 	case PM_QOS_MIN:
-		return plist_last(&o->requests)->prio;
+		return plist_first(&o->requests)->prio;
 
 	case PM_QOS_MAX:
-		return plist_first(&o->requests)->prio;
+		return plist_last(&o->requests)->prio;
 
 	default:
 		/* runtime check for not using enum */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 29bff61..a5aff3e 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -246,9 +246,13 @@
 	depends on PM_SLEEP || PM_RUNTIME
 	default y
 
+config ARCH_HAS_OPP
+	bool
+
 config PM_OPP
 	bool "Operating Performance Point (OPP) Layer library"
 	depends on PM
+	depends on ARCH_HAS_OPP
 	---help---
 	  SOCs have a standard set of tuples consisting of frequency and
 	  voltage pairs that the device will support per voltage domain. This
diff --git a/kernel/printk.c b/kernel/printk.c
index 38e7d58..9a2264f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -274,7 +274,20 @@
 	char c;
 	int error = 0;
 
-	error = security_syslog(type, from_file);
+	/*
+	 * If this is from /proc/kmsg we only do the capabilities checks
+	 * at open time.
+	 */
+	if (type == SYSLOG_ACTION_OPEN || !from_file) {
+		if (dmesg_restrict && !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if ((type != SYSLOG_ACTION_READ_ALL &&
+		     type != SYSLOG_ACTION_SIZE_BUFFER) &&
+		    !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+	}
+
+	error = security_syslog(type);
 	if (error)
 		return error;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index aa14a56..dc91a4d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -560,18 +560,8 @@
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
-static inline
-void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
-{
-	rq->curr->sched_class->check_preempt_curr(rq, p, flags);
 
-	/*
-	 * A queue event has occurred, and we're going to schedule.  In
-	 * this case, we can save a useless back to back clock update.
-	 */
-	if (test_tsk_need_resched(p))
-		rq->skip_clock_update = 1;
-}
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
 
 static inline int cpu_of(struct rq *rq)
 {
@@ -2118,6 +2108,31 @@
 		p->sched_class->prio_changed(rq, p, oldprio, running);
 }
 
+static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
+{
+	const struct sched_class *class;
+
+	if (p->sched_class == rq->curr->sched_class) {
+		rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+	} else {
+		for_each_class(class) {
+			if (class == rq->curr->sched_class)
+				break;
+			if (class == p->sched_class) {
+				resched_task(rq->curr);
+				break;
+			}
+		}
+	}
+
+	/*
+	 * A queue event has occurred, and we're going to schedule.  In
+	 * this case, we can save a useless back to back clock update.
+	 */
+	if (test_tsk_need_resched(rq->curr))
+		rq->skip_clock_update = 1;
+}
+
 #ifdef CONFIG_SMP
 /*
  * Is this task likely cache-hot:
@@ -6960,6 +6975,8 @@
 	if (cpu != group_first_cpu(sd->groups))
 		return;
 
+	sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
+
 	child = sd->child;
 
 	sd->groups->cpu_power = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f4f6a83..52ab113 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1654,12 +1654,6 @@
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
 	int scale = cfs_rq->nr_running >= sched_nr_latency;
 
-	if (unlikely(rt_prio(p->prio)))
-		goto preempt;
-
-	if (unlikely(p->sched_class != &fair_sched_class))
-		return;
-
 	if (unlikely(se == pse))
 		return;
 
@@ -2035,13 +2029,16 @@
 	unsigned long this_load_per_task;
 	unsigned long this_nr_running;
 	unsigned long this_has_capacity;
+	unsigned int  this_idle_cpus;
 
 	/* Statistics of the busiest group */
+	unsigned int  busiest_idle_cpus;
 	unsigned long max_load;
 	unsigned long busiest_load_per_task;
 	unsigned long busiest_nr_running;
 	unsigned long busiest_group_capacity;
 	unsigned long busiest_has_capacity;
+	unsigned int  busiest_group_weight;
 
 	int group_imb; /* Is there imbalance in this sd */
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2063,6 +2060,8 @@
 	unsigned long sum_nr_running; /* Nr tasks running in the group */
 	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
 	unsigned long group_capacity;
+	unsigned long idle_cpus;
+	unsigned long group_weight;
 	int group_imb; /* Is there an imbalance in the group ? */
 	int group_has_capacity; /* Is there extra capacity in the group? */
 };
@@ -2431,7 +2430,8 @@
 		sgs->group_load += load;
 		sgs->sum_nr_running += rq->nr_running;
 		sgs->sum_weighted_load += weighted_cpuload(i);
-
+		if (idle_cpu(i))
+			sgs->idle_cpus++;
 	}
 
 	/*
@@ -2469,6 +2469,7 @@
 	sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
 	if (!sgs->group_capacity)
 		sgs->group_capacity = fix_small_capacity(sd, group);
+	sgs->group_weight = group->group_weight;
 
 	if (sgs->group_capacity > sgs->sum_nr_running)
 		sgs->group_has_capacity = 1;
@@ -2576,13 +2577,16 @@
 			sds->this_nr_running = sgs.sum_nr_running;
 			sds->this_load_per_task = sgs.sum_weighted_load;
 			sds->this_has_capacity = sgs.group_has_capacity;
+			sds->this_idle_cpus = sgs.idle_cpus;
 		} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
 			sds->max_load = sgs.avg_load;
 			sds->busiest = sg;
 			sds->busiest_nr_running = sgs.sum_nr_running;
+			sds->busiest_idle_cpus = sgs.idle_cpus;
 			sds->busiest_group_capacity = sgs.group_capacity;
 			sds->busiest_load_per_task = sgs.sum_weighted_load;
 			sds->busiest_has_capacity = sgs.group_has_capacity;
+			sds->busiest_group_weight = sgs.group_weight;
 			sds->group_imb = sgs.group_imb;
 		}
 
@@ -2860,8 +2864,26 @@
 	if (sds.this_load >= sds.avg_load)
 		goto out_balanced;
 
-	if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
-		goto out_balanced;
+	/*
+	 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
+	 * And to check for busy balance use !idle_cpu instead of
+	 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
+	 * even when they are idle.
+	 */
+	if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
+		if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+			goto out_balanced;
+	} else {
+		/*
+		 * This cpu is idle. If the busiest group load doesn't
+		 * have more tasks than the number of available cpu's and
+		 * there is no imbalance between this and busiest group
+		 * wrt to idle cpu's, it is balanced.
+		 */
+		if ((sds.this_idle_cpus  <= sds.busiest_idle_cpus + 1) &&
+		    sds.busiest_nr_running <= sds.busiest_group_weight)
+			goto out_balanced;
+	}
 
 force_balance:
 	/* Looks like there is an imbalance. Compute it */
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 45bddc0..2bf6b47 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -19,14 +19,14 @@
 static void
 check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
 {
-	resched_task(rq->curr); /* we preempt everything */
+	/* we're never preempted */
 }
 
 static struct task_struct *pick_next_task_stop(struct rq *rq)
 {
 	struct task_struct *stop = rq->stop;
 
-	if (stop && stop->state == TASK_RUNNING)
+	if (stop && stop->se.on_rq)
 		return stop;
 
 	return NULL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b65bf63..5abfa15 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -702,7 +702,6 @@
 		.extra1		= &zero,
 		.extra2		= &ten_thousand,
 	},
-#endif
 	{
 		.procname	= "dmesg_restrict",
 		.data		= &dmesg_restrict,
@@ -712,6 +711,7 @@
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#endif
 	{
 		.procname	= "ngroups_max",
 		.data		= &ngroups_max,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e04b8bc..ea37e2f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -126,7 +126,7 @@
 config FUNCTION_TRACER
 	bool "Kernel Function Tracer"
 	depends on HAVE_FUNCTION_TRACER
-	select FRAME_POINTER if (!ARM_UNWIND)
+	select FRAME_POINTER if !ARM_UNWIND && !S390
 	select KALLSYMS
 	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 82d9b81..0420841 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,7 +17,6 @@
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
 #include <linux/debugfs.h>
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2efa8ea..7a22b41 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -61,7 +61,14 @@
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
 int do_swap_account __read_mostly;
-static int really_do_swap_account __initdata = 1; /* for remember boot option*/
+
+/* for remember boot option*/
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED
+static int really_do_swap_account __initdata = 1;
+#else
+static int really_do_swap_account __initdata = 0;
+#endif
+
 #else
 #define do_swap_account		(0)
 #endif
@@ -278,13 +285,14 @@
 
 /* "mc" and its members are protected by cgroup_mutex */
 static struct move_charge_struct {
-	spinlock_t	  lock; /* for from, to, moving_task */
+	spinlock_t	  lock; /* for from, to */
 	struct mem_cgroup *from;
 	struct mem_cgroup *to;
 	unsigned long precharge;
 	unsigned long moved_charge;
 	unsigned long moved_swap;
 	struct task_struct *moving_task;	/* a task moving charges */
+	struct mm_struct *mm;
 	wait_queue_head_t waitq;		/* a waitq for other context */
 } mc = {
 	.lock = __SPIN_LOCK_UNLOCKED(mc.lock),
@@ -2152,7 +2160,7 @@
 {
 	VM_BUG_ON(from == to);
 	VM_BUG_ON(PageLRU(pc->page));
-	VM_BUG_ON(!PageCgroupLocked(pc));
+	VM_BUG_ON(!page_is_cgroup_locked(pc));
 	VM_BUG_ON(!PageCgroupUsed(pc));
 	VM_BUG_ON(pc->mem_cgroup != from);
 
@@ -4631,7 +4639,7 @@
 	unsigned long precharge;
 	struct vm_area_struct *vma;
 
-	down_read(&mm->mmap_sem);
+	/* We've already held the mmap_sem */
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		struct mm_walk mem_cgroup_count_precharge_walk = {
 			.pmd_entry = mem_cgroup_count_precharge_pte_range,
@@ -4643,7 +4651,6 @@
 		walk_page_range(vma->vm_start, vma->vm_end,
 					&mem_cgroup_count_precharge_walk);
 	}
-	up_read(&mm->mmap_sem);
 
 	precharge = mc.precharge;
 	mc.precharge = 0;
@@ -4694,11 +4701,16 @@
 
 		mc.moved_swap = 0;
 	}
+	if (mc.mm) {
+		up_read(&mc.mm->mmap_sem);
+		mmput(mc.mm);
+	}
 	spin_lock(&mc.lock);
 	mc.from = NULL;
 	mc.to = NULL;
-	mc.moving_task = NULL;
 	spin_unlock(&mc.lock);
+	mc.moving_task = NULL;
+	mc.mm = NULL;
 	mem_cgroup_end_move(from);
 	memcg_oom_recover(from);
 	memcg_oom_recover(to);
@@ -4724,12 +4736,21 @@
 			return 0;
 		/* We move charges only when we move a owner of the mm */
 		if (mm->owner == p) {
+			/*
+			 * We do all the move charge works under one mmap_sem to
+			 * avoid deadlock with down_write(&mmap_sem)
+			 * -> try_charge() -> if (mc.moving_task) -> sleep.
+			 */
+			down_read(&mm->mmap_sem);
+
 			VM_BUG_ON(mc.from);
 			VM_BUG_ON(mc.to);
 			VM_BUG_ON(mc.precharge);
 			VM_BUG_ON(mc.moved_charge);
 			VM_BUG_ON(mc.moved_swap);
 			VM_BUG_ON(mc.moving_task);
+			VM_BUG_ON(mc.mm);
+
 			mem_cgroup_start_move(from);
 			spin_lock(&mc.lock);
 			mc.from = from;
@@ -4737,14 +4758,16 @@
 			mc.precharge = 0;
 			mc.moved_charge = 0;
 			mc.moved_swap = 0;
-			mc.moving_task = current;
 			spin_unlock(&mc.lock);
+			mc.moving_task = current;
+			mc.mm = mm;
 
 			ret = mem_cgroup_precharge_mc(mm);
 			if (ret)
 				mem_cgroup_clear_mc();
-		}
-		mmput(mm);
+			/* We call up_read() and mmput() in clear_mc(). */
+		} else
+			mmput(mm);
 	}
 	return ret;
 }
@@ -4832,7 +4855,7 @@
 	struct vm_area_struct *vma;
 
 	lru_add_drain_all();
-	down_read(&mm->mmap_sem);
+	/* We've already held the mmap_sem */
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		int ret;
 		struct mm_walk mem_cgroup_move_charge_walk = {
@@ -4851,7 +4874,6 @@
 			 */
 			break;
 	}
-	up_read(&mm->mmap_sem);
 }
 
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
@@ -4860,17 +4882,11 @@
 				struct task_struct *p,
 				bool threadgroup)
 {
-	struct mm_struct *mm;
-
-	if (!mc.to)
+	if (!mc.mm)
 		/* no need to move charge */
 		return;
 
-	mm = get_task_mm(p);
-	if (mm) {
-		mem_cgroup_move_charge(mm);
-		mmput(mm);
-	}
+	mem_cgroup_move_charge(mc.mm);
 	mem_cgroup_clear_mc();
 }
 #else	/* !CONFIG_MMU */
@@ -4911,10 +4927,20 @@
 };
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+static int __init enable_swap_account(char *s)
+{
+	/* consider enabled if no parameter or 1 is given */
+	if (!s || !strcmp(s, "1"))
+		really_do_swap_account = 1;
+	else if (!strcmp(s, "0"))
+		really_do_swap_account = 0;
+	return 1;
+}
+__setup("swapaccount", enable_swap_account);
 
 static int __init disable_swap_account(char *s)
 {
-	really_do_swap_account = 0;
+	enable_swap_account("0");
 	return 1;
 }
 __setup("noswapaccount", disable_swap_account);
diff --git a/mm/nommu.c b/mm/nommu.c
index 3613517..27a9ac5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1717,6 +1717,7 @@
 		mm->mmap = vma->vm_next;
 		delete_vma_from_mm(vma);
 		delete_vma(mm, vma);
+		cond_resched();
 	}
 
 	kleave("");
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07a6544..e409270 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3008,14 +3008,6 @@
 		build_zonelist_cache(pgdat);
 	}
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-	/* Setup real pagesets for the new zone */
-	if (data) {
-		struct zone *zone = data;
-		setup_zone_pageset(zone);
-	}
-#endif
-
 	/*
 	 * Initialize the boot_pagesets that are going to be used
 	 * for bootstrapping processors. The real pagesets for
@@ -3064,7 +3056,11 @@
 	} else {
 		/* we have to stop all cpus to guarantee there is no user
 		   of zonelist */
-		stop_machine(__build_all_zonelists, data, NULL);
+#ifdef CONFIG_MEMORY_HOTPLUG
+		if (data)
+			setup_zone_pageset((struct zone *)data);
+#endif
+		stop_machine(__build_all_zonelists, NULL, NULL);
 		/* cpuset refresh routine should be here */
 	}
 	vm_total_pages = nr_free_pagecache_pages();
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 8b1a2ce..38cc58b 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -139,7 +139,6 @@
 	pgd_t *pgd;
 	unsigned long next;
 	int err = 0;
-	struct vm_area_struct *vma;
 
 	if (addr >= end)
 		return err;
@@ -149,15 +148,17 @@
 
 	pgd = pgd_offset(walk->mm, addr);
 	do {
+		struct vm_area_struct *uninitialized_var(vma);
+
 		next = pgd_addr_end(addr, end);
 
+#ifdef CONFIG_HUGETLB_PAGE
 		/*
 		 * handle hugetlb vma individually because pagetable walk for
 		 * the hugetlb page is dependent on the architecture and
 		 * we can't handled it in the same manner as non-huge pages.
 		 */
 		vma = find_vma(walk->mm, addr);
-#ifdef CONFIG_HUGETLB_PAGE
 		if (vma && is_vm_hugetlb_page(vma)) {
 			if (vma->vm_end < next)
 				next = vma->vm_end;
diff --git a/mm/slub.c b/mm/slub.c
index 8fd5401..981fb73 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3273,9 +3273,9 @@
 		kfree(n);
 		kfree(s);
 	}
+err:
 	up_write(&slub_lock);
 
-err:
 	if (flags & SLAB_PANIC)
 		panic("Cannot create slabcache %s\n", name);
 	else
@@ -3862,6 +3862,7 @@
 			x += sprintf(buf + x, " N%d=%lu",
 					node, nodes[node]);
 #endif
+	up_read(&slub_lock);
 	kfree(nodes);
 	return x + sprintf(buf + x, "\n");
 }
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 53d8abf..bf3e6a1 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -19,7 +19,7 @@
 	if (b->vec.iov_base) {
 		b->is_vmalloc = false;
 	} else {
-		b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL);
+		b->vec.iov_base = __vmalloc(len, gfp | __GFP_HIGHMEM, PAGE_KERNEL);
 		if (!b->vec.iov_base) {
 			kfree(b);
 			return NULL;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0e8157e..1c7a2ec 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -540,8 +540,7 @@
 		/* initialize page iterator */
 		con->out_msg_pos.page = 0;
 		if (m->pages)
-			con->out_msg_pos.page_pos =
-				le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
+			con->out_msg_pos.page_pos = m->page_alignment;
 		else
 			con->out_msg_pos.page_pos = 0;
 		con->out_msg_pos.data_pos = 0;
@@ -1491,7 +1490,7 @@
 	struct ceph_msg *m = con->in_msg;
 	int ret;
 	int to, left;
-	unsigned front_len, middle_len, data_len, data_off;
+	unsigned front_len, middle_len, data_len;
 	int datacrc = con->msgr->nocrc;
 	int skip;
 	u64 seq;
@@ -1527,19 +1526,17 @@
 	data_len = le32_to_cpu(con->in_hdr.data_len);
 	if (data_len > CEPH_MSG_MAX_DATA_LEN)
 		return -EIO;
-	data_off = le16_to_cpu(con->in_hdr.data_off);
 
 	/* verify seq# */
 	seq = le64_to_cpu(con->in_hdr.seq);
 	if ((s64)seq - (s64)con->in_seq < 1) {
-		pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
+		pr_info("skipping %s%lld %s seq %lld expected %lld\n",
 			ENTITY_NAME(con->peer_name),
 			ceph_pr_addr(&con->peer_addr.in_addr),
 			seq, con->in_seq + 1);
 		con->in_base_pos = -front_len - middle_len - data_len -
 			sizeof(m->footer);
 		con->in_tag = CEPH_MSGR_TAG_READY;
-		con->in_seq++;
 		return 0;
 	} else if ((s64)seq - (s64)con->in_seq > 1) {
 		pr_err("read_partial_message bad seq %lld expected %lld\n",
@@ -1576,7 +1573,7 @@
 
 		con->in_msg_pos.page = 0;
 		if (m->pages)
-			con->in_msg_pos.page_pos = data_off & ~PAGE_MASK;
+			con->in_msg_pos.page_pos = m->page_alignment;
 		else
 			con->in_msg_pos.page_pos = 0;
 		con->in_msg_pos.data_pos = 0;
@@ -2301,6 +2298,7 @@
 
 	/* data */
 	m->nr_pages = 0;
+	m->page_alignment = 0;
 	m->pages = NULL;
 	m->pagelist = NULL;
 	m->bio = NULL;
@@ -2370,6 +2368,7 @@
 			       type, front_len);
 			return NULL;
 		}
+		msg->page_alignment = le16_to_cpu(hdr->data_off);
 	}
 	memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 7939199..3e20a12 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -71,6 +71,7 @@
 		op->extent.length = objlen;
 	}
 	req->r_num_pages = calc_pages_for(off, *plen);
+	req->r_page_alignment = off & ~PAGE_MASK;
 	if (op->op == CEPH_OSD_OP_WRITE)
 		op->payload_len = *plen;
 
@@ -390,6 +391,8 @@
 		req->r_request->hdr.data_len = cpu_to_le32(data_len);
 	}
 
+	req->r_request->page_alignment = req->r_page_alignment;
+
 	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
 	msg_size = p - msg->front.iov_base;
 	msg->front.iov_len = msg_size;
@@ -419,7 +422,8 @@
 					       u32 truncate_seq,
 					       u64 truncate_size,
 					       struct timespec *mtime,
-					       bool use_mempool, int num_reply)
+					       bool use_mempool, int num_reply,
+					       int page_align)
 {
 	struct ceph_osd_req_op ops[3];
 	struct ceph_osd_request *req;
@@ -447,6 +451,10 @@
 	calc_layout(osdc, vino, layout, off, plen, req, ops);
 	req->r_file_layout = *layout;  /* keep a copy */
 
+	/* in case it differs from natural alignment that calc_layout
+	   filled in for us */
+	req->r_page_alignment = page_align;
+
 	ceph_osdc_build_request(req, off, plen, ops,
 				snapc,
 				mtime,
@@ -1489,7 +1497,7 @@
 			struct ceph_vino vino, struct ceph_file_layout *layout,
 			u64 off, u64 *plen,
 			u32 truncate_seq, u64 truncate_size,
-			struct page **pages, int num_pages)
+			struct page **pages, int num_pages, int page_align)
 {
 	struct ceph_osd_request *req;
 	int rc = 0;
@@ -1499,15 +1507,15 @@
 	req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
 				    NULL, 0, truncate_seq, truncate_size, NULL,
-				    false, 1);
+				    false, 1, page_align);
 	if (!req)
 		return -ENOMEM;
 
 	/* it may be a short read due to an object boundary */
 	req->r_pages = pages;
 
-	dout("readpages  final extent is %llu~%llu (%d pages)\n",
-	     off, *plen, req->r_num_pages);
+	dout("readpages  final extent is %llu~%llu (%d pages align %d)\n",
+	     off, *plen, req->r_num_pages, page_align);
 
 	rc = ceph_osdc_start_request(osdc, req, false);
 	if (!rc)
@@ -1533,6 +1541,7 @@
 {
 	struct ceph_osd_request *req;
 	int rc = 0;
+	int page_align = off & ~PAGE_MASK;
 
 	BUG_ON(vino.snap != CEPH_NOSNAP);
 	req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
@@ -1541,7 +1550,7 @@
 					    CEPH_OSD_FLAG_WRITE,
 				    snapc, do_sync,
 				    truncate_seq, truncate_size, mtime,
-				    nofail, 1);
+				    nofail, 1, page_align);
 	if (!req)
 		return -ENOMEM;
 
@@ -1638,8 +1647,7 @@
 	m = ceph_msg_get(req->r_reply);
 
 	if (data_len > 0) {
-		unsigned data_off = le16_to_cpu(hdr->data_off);
-		int want = calc_pages_for(data_off & ~PAGE_MASK, data_len);
+		int want = calc_pages_for(req->r_page_alignment, data_len);
 
 		if (unlikely(req->r_num_pages < want)) {
 			pr_warning("tid %lld reply %d > expected %d pages\n",
@@ -1651,6 +1659,7 @@
 		}
 		m->pages = req->r_pages;
 		m->nr_pages = req->r_num_pages;
+		m->page_alignment = req->r_page_alignment;
 #ifdef CONFIG_BLOCK
 		m->bio = req->r_bio;
 #endif
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 54caf06..ac34fee 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -13,8 +13,7 @@
  * build a vector of user pages
  */
 struct page **ceph_get_direct_page_vector(const char __user *data,
-						 int num_pages,
-						 loff_t off, size_t len)
+					  int num_pages)
 {
 	struct page **pages;
 	int rc;
diff --git a/net/core/filter.c b/net/core/filter.c
index 23e9b2a..c1ee800 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -589,7 +589,7 @@
 EXPORT_SYMBOL(sk_chk_filter);
 
 /**
- * 	sk_filter_rcu_release: Release a socket filter by rcu_head
+ * 	sk_filter_rcu_release - Release a socket filter by rcu_head
  *	@rcu: rcu_head that contains the sk_filter to free
  */
 static void sk_filter_rcu_release(struct rcu_head *rcu)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a5ff5a8..7f902ca 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -712,15 +712,21 @@
 
 
 	map = rcu_dereference_raw(queue->rps_map);
-	if (map)
+	if (map) {
+		RCU_INIT_POINTER(queue->rps_map, NULL);
 		call_rcu(&map->rcu, rps_map_release);
+	}
 
 	flow_table = rcu_dereference_raw(queue->rps_flow_table);
-	if (flow_table)
+	if (flow_table) {
+		RCU_INIT_POINTER(queue->rps_flow_table, NULL);
 		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+	}
 
 	if (atomic_dec_and_test(&first->count))
 		kfree(first);
+	else
+		memset(kobj, 0, sizeof(*kobj));
 }
 
 static struct kobj_type rx_queue_ktype = {
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495..fceeb37 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -45,9 +45,7 @@
 	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
 	lopt_size += nr_table_entries * sizeof(struct request_sock *);
 	if (lopt_size > PAGE_SIZE)
-		lopt = __vmalloc(lopt_size,
-			GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
-			PAGE_KERNEL);
+		lopt = vzalloc(lopt_size);
 	else
 		lopt = kzalloc(lopt_size, GFP_KERNEL);
 	if (lopt == NULL)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 200eb53..0f28034 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -365,7 +365,7 @@
 	if (size <= PAGE_SIZE)
 		return kzalloc(size, GFP_KERNEL);
 	else
-		return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+		return vzalloc(size);
 }
 
 static void __tnode_vfree(struct work_struct *arg)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 96bc7f9..e5d1a44 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -569,6 +569,9 @@
 		/* No need to clone since we're just using its address. */
 		rt2 = rt;
 
+		if (!fl.nl_u.ip4_u.saddr)
+			fl.nl_u.ip4_u.saddr = rt->rt_src;
+
 		err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
 		switch (err) {
 		case 0:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b41ce0f..23cc8e1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -98,7 +98,11 @@
 #endif
 
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
-#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
+
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
 
 #define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ/50 : 1)
 #define ADDRCONF_TIMER_FUZZ		(HZ / 4)
@@ -2754,13 +2758,13 @@
 			ifa->state = INET6_IFADDR_STATE_DEAD;
 			spin_unlock_bh(&ifa->state_lock);
 
-			if (state == INET6_IFADDR_STATE_DEAD) {
-				in6_ifa_put(ifa);
-			} else {
+			if (state != INET6_IFADDR_STATE_DEAD) {
 				__ipv6_ifa_notify(RTM_DELADDR, ifa);
 				atomic_notifier_call_chain(&inet6addr_chain,
 							   NETDEV_DOWN, ifa);
 			}
+
+			in6_ifa_put(ifa);
 			write_lock_bh(&idev->lock);
 		}
 	}
@@ -3444,10 +3448,8 @@
 {
 	struct ifa_cacheinfo ci;
 
-	ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
-			+ TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
-			+ TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	ci.cstamp = cstamp_delta(cstamp);
+	ci.tstamp = cstamp_delta(tstamp);
 	ci.ifa_prefered = preferred;
 	ci.ifa_valid = valid;
 
@@ -3798,8 +3800,10 @@
 	array[DEVCONF_AUTOCONF] = cnf->autoconf;
 	array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
 	array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
-	array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
-	array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
+	array[DEVCONF_RTR_SOLICIT_INTERVAL] =
+		jiffies_to_msecs(cnf->rtr_solicit_interval);
+	array[DEVCONF_RTR_SOLICIT_DELAY] =
+		jiffies_to_msecs(cnf->rtr_solicit_delay);
 	array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
 #ifdef CONFIG_IPV6_PRIVACY
 	array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
@@ -3813,7 +3817,8 @@
 	array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
-	array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval;
+	array[DEVCONF_RTR_PROBE_INTERVAL] =
+		jiffies_to_msecs(cnf->rtr_probe_interval);
 #ifdef CONFIG_IPV6_ROUTE_INFO
 	array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
 #endif
@@ -3929,10 +3934,9 @@
 	NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
 
 	ci.max_reasm_len = IPV6_MAXPLEN;
-	ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	ci.reachable_time = idev->nd_parms->reachable_time;
-	ci.retrans_time = idev->nd_parms->retrans_time;
+	ci.tstamp = cstamp_delta(idev->tstamp);
+	ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
+	ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
 	NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
 
 	nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 7f097989..a6de305 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -45,7 +45,6 @@
 #include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/smp_lock.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/slab.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7fa8637..7c567b8 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -15,7 +15,6 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include "irnet_ppp.h"		/* Private header */
 /* Please put other headers in irnet.h - Thanks */
 
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 285761e..f6054f9 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -550,22 +550,30 @@
  */
 int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
 {
+	int ret;
+
 	IRDA_ASSERT(self != NULL, return -1;);
 	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
 	IRDA_ASSERT(skb != NULL, return -1;);
 
 	IRDA_DEBUG(4, "%s()\n", __func__);
 
+	/* Take shortcut on zero byte packets */
+	if (skb->len == 0) {
+		ret = 0;
+		goto err;
+	}
+
 	/* Check that nothing bad happens */
-	if ((skb->len == 0) || (!self->connected)) {
-		IRDA_DEBUG(1, "%s(), No data, or not connected\n",
-			   __func__);
+	if (!self->connected) {
+		IRDA_WARNING("%s(), Not connected\n", __func__);
+		ret = -ENOTCONN;
 		goto err;
 	}
 
 	if (skb->len > self->max_seg_size) {
-		IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n",
-			   __func__);
+		IRDA_ERROR("%s(), UData is too large for IrLAP!\n", __func__);
+		ret = -EMSGSIZE;
 		goto err;
 	}
 
@@ -576,7 +584,7 @@
 
 err:
 	dev_kfree_skb(skb);
-	return -1;
+	return ret;
 }
 EXPORT_SYMBOL(irttp_udata_request);
 
@@ -599,9 +607,15 @@
 	IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__,
 		   skb_queue_len(&self->tx_queue));
 
+	/* Take shortcut on zero byte packets */
+	if (skb->len == 0) {
+		ret = 0;
+		goto err;
+	}
+
 	/* Check that nothing bad happens */
-	if ((skb->len == 0) || (!self->connected)) {
-		IRDA_WARNING("%s: No data, or not connected\n", __func__);
+	if (!self->connected) {
+		IRDA_WARNING("%s: Not connected\n", __func__);
 		ret = -ENOTCONN;
 		goto err;
 	}
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index a22dac2..70bd1d0 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -4,6 +4,7 @@
 menuconfig IP_VS
 	tristate "IP virtual server support"
 	depends on NET && INET && NETFILTER
+	depends on (NF_CONNTRACK || NF_CONNTRACK=n)
 	---help---
 	  IP Virtual Server support will let you build a high-performance
 	  virtual server based on cluster of two or more real servers. This
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8920f2a..4e37c1c 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -567,7 +567,7 @@
 		goto out;
 	}
 
-	if (args->nr_local > (u64)UINT_MAX) {
+	if (args->nr_local > UIO_MAXIOV) {
 		ret = -EMSGSIZE;
 		goto out;
 	}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index f71a731..80df89d95 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -115,9 +115,7 @@
  */
 struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
 {
-	struct rpc_iostats *new;
-	new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
-	return new;
+	return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
 }
 EXPORT_SYMBOL_GPL(rpc_alloc_iostats);
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c82fe73..ea2ff78 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -5,7 +5,6 @@
  */
 
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index d0c92dd..17cd0c0 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -44,6 +44,38 @@
 	return chan;
 }
 
+static bool can_beacon_sec_chan(struct wiphy *wiphy,
+				struct ieee80211_channel *chan,
+				enum nl80211_channel_type channel_type)
+{
+	struct ieee80211_channel *sec_chan;
+	int diff;
+
+	switch (channel_type) {
+	case NL80211_CHAN_HT40PLUS:
+		diff = 20;
+		break;
+	case NL80211_CHAN_HT40MINUS:
+		diff = -20;
+		break;
+	default:
+		return false;
+	}
+
+	sec_chan = ieee80211_get_channel(wiphy, chan->center_freq + diff);
+	if (!sec_chan)
+		return false;
+
+	/* we'll need a DFS capability later */
+	if (sec_chan->flags & (IEEE80211_CHAN_DISABLED |
+			       IEEE80211_CHAN_PASSIVE_SCAN |
+			       IEEE80211_CHAN_NO_IBSS |
+			       IEEE80211_CHAN_RADAR))
+		return false;
+
+	return true;
+}
+
 int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
 		      struct wireless_dev *wdev, int freq,
 		      enum nl80211_channel_type channel_type)
@@ -68,6 +100,28 @@
 	if (!chan)
 		return -EINVAL;
 
+	/* Both channels should be able to initiate communication */
+	if (wdev && (wdev->iftype == NL80211_IFTYPE_ADHOC ||
+		     wdev->iftype == NL80211_IFTYPE_AP ||
+		     wdev->iftype == NL80211_IFTYPE_AP_VLAN ||
+		     wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
+		     wdev->iftype == NL80211_IFTYPE_P2P_GO)) {
+		switch (channel_type) {
+		case NL80211_CHAN_HT40PLUS:
+		case NL80211_CHAN_HT40MINUS:
+			if (!can_beacon_sec_chan(&rdev->wiphy, chan,
+						 channel_type)) {
+				printk(KERN_DEBUG
+				       "cfg80211: Secondary channel not "
+				       "allowed to initiate communication\n");
+				return -EINVAL;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
 	result = rdev->ops->set_channel(&rdev->wiphy,
 					wdev ? wdev->netdev : NULL,
 					chan, channel_type);
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index a2023ec..1e98bc0 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -19,7 +19,7 @@
 	if (sz <= PAGE_SIZE)
 		n = kzalloc(sz, GFP_KERNEL);
 	else if (hashdist)
-		n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+		n = vzalloc(sz);
 	else
 		n = (struct hlist_head *)
 			__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
diff --git a/scripts/gfp-translate b/scripts/gfp-translate
index d81b968..c9230e1 100644
--- a/scripts/gfp-translate
+++ b/scripts/gfp-translate
@@ -63,7 +63,12 @@
 
 # Extract GFP flags from the kernel source
 TMPFILE=`mktemp -t gfptranslate-XXXXXX` || exit 1
-grep "^#define __GFP" $SOURCE/include/linux/gfp.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
+grep -q ___GFP $SOURCE/include/linux/gfp.h
+if [ $? -eq 0 ]; then
+	grep "^#define ___GFP" $SOURCE/include/linux/gfp.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE
+else
+	grep "^#define __GFP" $SOURCE/include/linux/gfp.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
+fi
 
 # Parse the flags
 IFS="
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index cdb6dc1..39580a5 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -5,7 +5,7 @@
 ## Copyright (c) 1998 Michael Zucchi, All Rights Reserved        ##
 ## Copyright (C) 2000, 1  Tim Waugh <twaugh@redhat.com>          ##
 ## Copyright (C) 2001  Simon Huggins                             ##
-## Copyright (C) 2005-2009  Randy Dunlap                         ##
+## Copyright (C) 2005-2010  Randy Dunlap                         ##
 ## 								 ##
 ## #define enhancements by Armin Kuster <akuster@mvista.com>	 ##
 ## Copyright (c) 2000 MontaVista Software, Inc.			 ##
@@ -453,7 +453,7 @@
     if ($output_mode eq "html" || $output_mode eq "xml") {
 	$contents = local_unescape($contents);
 	# convert data read & converted thru xml_escape() into &xyz; format:
-	$contents =~ s/\\\\\\/&/g;
+	$contents =~ s/\\\\\\/\&/g;
     }
 #   print STDERR "contents b4:$contents\n";
     eval $dohighlight;
@@ -770,7 +770,11 @@
     print $args{'type'} . " " . $args{'struct'} . " {\n";
     foreach $parameter (@{$args{'parameterlist'}}) {
 	if ($parameter =~ /^#/) {
-	    print "$parameter\n";
+	    my $prm = $parameter;
+	    # convert data read & converted thru xml_escape() into &xyz; format:
+	    # This allows us to have #define macros interspersed in a struct.
+	    $prm =~ s/\\\\\\/\&/g;
+	    print "$prm\n";
 	    next;
 	}
 
@@ -1701,6 +1705,8 @@
 	}
 	}
 
+	$param = xml_escape($param);
+
 	# strip spaces from $param so that it is one continous string
 	# on @parameterlist;
 	# this fixes a problem where check_sections() cannot find
diff --git a/security/capability.c b/security/capability.c
index 30ae00f..c773635 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -17,6 +17,11 @@
 	return 0;
 }
 
+static int cap_syslog(int type)
+{
+	return 0;
+}
+
 static int cap_quotactl(int cmds, int type, int id, struct super_block *sb)
 {
 	return 0;
diff --git a/security/commoncap.c b/security/commoncap.c
index 04b80f9..64c2ed9c 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -27,7 +27,6 @@
 #include <linux/sched.h>
 #include <linux/prctl.h>
 #include <linux/securebits.h>
-#include <linux/syslog.h>
 
 /*
  * If a non-root user executes a setuid-root binary in
@@ -884,26 +883,6 @@
 }
 
 /**
- * cap_syslog - Determine whether syslog function is permitted
- * @type: Function requested
- * @from_file: Whether this request came from an open file (i.e. /proc)
- *
- * Determine whether the current process is permitted to use a particular
- * syslog function, returning 0 if permission is granted, -ve if not.
- */
-int cap_syslog(int type, bool from_file)
-{
-	if (type != SYSLOG_ACTION_OPEN && from_file)
-		return 0;
-	if (dmesg_restrict && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	if ((type != SYSLOG_ACTION_READ_ALL &&
-	     type != SYSLOG_ACTION_SIZE_BUFFER) && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	return 0;
-}
-
-/**
  * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted
  * @mm: The VM space in which the new mapping is to be made
  * @pages: The size of the mapping
diff --git a/security/security.c b/security/security.c
index 3ef5e2a..1b798d3 100644
--- a/security/security.c
+++ b/security/security.c
@@ -197,9 +197,9 @@
 	return security_ops->quota_on(dentry);
 }
 
-int security_syslog(int type, bool from_file)
+int security_syslog(int type)
 {
-	return security_ops->syslog(type, from_file);
+	return security_ops->syslog(type);
 }
 
 int security_settime(struct timespec *ts, struct timezone *tz)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d9154cf..65fa8bf 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1973,14 +1973,10 @@
 	return dentry_has_perm(cred, NULL, dentry, FILE__QUOTAON);
 }
 
-static int selinux_syslog(int type, bool from_file)
+static int selinux_syslog(int type)
 {
 	int rc;
 
-	rc = cap_syslog(type, from_file);
-	if (rc)
-		return rc;
-
 	switch (type) {
 	case SYSLOG_ACTION_READ_ALL:	/* Read last kernel messages */
 	case SYSLOG_ACTION_SIZE_BUFFER:	/* Return size of the log buffer */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index bc39f40..489a85a 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -157,15 +157,11 @@
  *
  * Returns 0 on success, error code otherwise.
  */
-static int smack_syslog(int type, bool from_file)
+static int smack_syslog(int typefrom_file)
 {
-	int rc;
+	int rc = 0;
 	char *sp = current_security();
 
-	rc = cap_syslog(type, from_file);
-	if (rc != 0)
-		return rc;
-
 	if (capable(CAP_MAC_OVERRIDE))
 		return 0;
 
diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c
index f2f41c8..6e24091 100644
--- a/sound/atmel/abdac.c
+++ b/sound/atmel/abdac.c
@@ -420,9 +420,9 @@
 		return PTR_ERR(pclk);
 	}
 	sample_clk = clk_get(&pdev->dev, "sample_clk");
-	if (IS_ERR(pclk)) {
+	if (IS_ERR(sample_clk)) {
 		dev_dbg(&pdev->dev, "no sample clock\n");
-		retval = PTR_ERR(pclk);
+		retval = PTR_ERR(sample_clk);
 		goto out_put_pclk;
 	}
 	clk_enable(pclk);
diff --git a/sound/core/info.c b/sound/core/info.c
index b70564e..7077f60 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -23,7 +23,6 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <sound/core.h>
 #include <sound/minors.h>
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index a1707cc..b75db8e 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -223,7 +223,7 @@
 	entry->jiffies = jiffies;
 	entry->pos = pos;
 	entry->period_size = runtime->period_size;
-	entry->buffer_size = runtime->buffer_size;;
+	entry->buffer_size = runtime->buffer_size;
 	entry->old_hw_ptr = runtime->status->hw_ptr;
 	entry->hw_ptr_base = runtime->hw_ptr_base;
 	log->idx = (log->idx + 1) % XRUN_LOG_CNT;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 8bc7cb3..e82c1f9 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -22,7 +22,6 @@
 #include <linux/mm.h>
 #include <linux/file.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/time.h>
 #include <linux/pm_qos_params.h>
 #include <linux/uio.h>
diff --git a/sound/core/sound.c b/sound/core/sound.c
index 62a093e..66691fe 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -21,7 +21,6 @@
 
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/time.h>
 #include <linux/device.h>
 #include <linux/moduleparam.h>
diff --git a/sound/oss/dev_table.c b/sound/oss/dev_table.c
index 727bdb9..d8cf3e5 100644
--- a/sound/oss/dev_table.c
+++ b/sound/oss/dev_table.c
@@ -71,7 +71,7 @@
 	if (sound_nblocks >= MAX_MEM_BLOCKS)
 		sound_nblocks = MAX_MEM_BLOCKS - 1;
 
-	op = (struct audio_operations *) (sound_mem_blocks[sound_nblocks] = vmalloc(sizeof(struct audio_operations)));
+	op = (struct audio_operations *) (sound_mem_blocks[sound_nblocks] = vzalloc(sizeof(struct audio_operations)));
 	sound_nblocks++;
 	if (sound_nblocks >= MAX_MEM_BLOCKS)
 		sound_nblocks = MAX_MEM_BLOCKS - 1;
@@ -81,7 +81,6 @@
 		sound_unload_audiodev(num);
 		return -(ENOMEM);
 	}
-	memset((char *) op, 0, sizeof(struct audio_operations));
 	init_waitqueue_head(&op->in_sleeper);
 	init_waitqueue_head(&op->out_sleeper);	
 	init_waitqueue_head(&op->poll_sleeper);
@@ -128,7 +127,7 @@
 	/* FIXME: This leaks a mixer_operations struct every time its called
 	   until you unload sound! */
 	   
-	op = (struct mixer_operations *) (sound_mem_blocks[sound_nblocks] = vmalloc(sizeof(struct mixer_operations)));
+	op = (struct mixer_operations *) (sound_mem_blocks[sound_nblocks] = vzalloc(sizeof(struct mixer_operations)));
 	sound_nblocks++;
 	if (sound_nblocks >= MAX_MEM_BLOCKS)
 		sound_nblocks = MAX_MEM_BLOCKS - 1;
@@ -137,7 +136,6 @@
 		printk(KERN_ERR "Sound: Can't allocate mixer driver for (%s)\n", name);
 		return -ENOMEM;
 	}
-	memset((char *) op, 0, sizeof(struct mixer_operations));
 	memcpy((char *) op, (char *) driver, driver_size);
 
 	strlcpy(op->name, name, sizeof(op->name));
diff --git a/sound/oss/midibuf.c b/sound/oss/midibuf.c
index 782b3b8..ceedb1e 100644
--- a/sound/oss/midibuf.c
+++ b/sound/oss/midibuf.c
@@ -178,7 +178,7 @@
 		return err;
 
 	parms[dev].prech_timeout = MAX_SCHEDULE_TIMEOUT;
-	midi_in_buf[dev] = (struct midi_buf *) vmalloc(sizeof(struct midi_buf));
+	midi_in_buf[dev] = vmalloc(sizeof(struct midi_buf));
 
 	if (midi_in_buf[dev] == NULL)
 	{
@@ -188,7 +188,7 @@
 	}
 	midi_in_buf[dev]->len = midi_in_buf[dev]->head = midi_in_buf[dev]->tail = 0;
 
-	midi_out_buf[dev] = (struct midi_buf *) vmalloc(sizeof(struct midi_buf));
+	midi_out_buf[dev] = vmalloc(sizeof(struct midi_buf));
 
 	if (midi_out_buf[dev] == NULL)
 	{
diff --git a/sound/oss/pss.c b/sound/oss/pss.c
index e19dd5d..9b800ce 100644
--- a/sound/oss/pss.c
+++ b/sound/oss/pss.c
@@ -859,7 +859,7 @@
 			return 0;
 
 		case SNDCTL_COPR_LOAD:
-			buf = (copr_buffer *) vmalloc(sizeof(copr_buffer));
+			buf = vmalloc(sizeof(copr_buffer));
 			if (buf == NULL)
 				return -ENOSPC;
 			if (copy_from_user(buf, arg, sizeof(copr_buffer))) {
@@ -871,7 +871,7 @@
 			return err;
 		
 		case SNDCTL_COPR_SENDMSG:
-			mbuf = (copr_msg *)vmalloc(sizeof(copr_msg));
+			mbuf = vmalloc(sizeof(copr_msg));
 			if (mbuf == NULL)
 				return -ENOSPC;
 			if (copy_from_user(mbuf, arg, sizeof(copr_msg))) {
@@ -895,7 +895,7 @@
 
 		case SNDCTL_COPR_RCVMSG:
 			err = 0;
-			mbuf = (copr_msg *)vmalloc(sizeof(copr_msg));
+			mbuf = vmalloc(sizeof(copr_msg));
 			if (mbuf == NULL)
 				return -ENOSPC;
 			data = (unsigned short *)mbuf->data;
diff --git a/sound/oss/sequencer.c b/sound/oss/sequencer.c
index e85789e..5ea1098 100644
--- a/sound/oss/sequencer.c
+++ b/sound/oss/sequencer.c
@@ -1646,13 +1646,13 @@
 {
 	if (sequencer_ok)
 		return;
-	queue = (unsigned char *)vmalloc(SEQ_MAX_QUEUE * EV_SZ);
+	queue = vmalloc(SEQ_MAX_QUEUE * EV_SZ);
 	if (queue == NULL)
 	{
 		printk(KERN_ERR "sequencer: Can't allocate memory for sequencer output queue\n");
 		return;
 	}
-	iqueue = (unsigned char *)vmalloc(SEQ_MAX_QUEUE * IEV_SZ);
+	iqueue = vmalloc(SEQ_MAX_QUEUE * IEV_SZ);
 	if (iqueue == NULL)
 	{
 		printk(KERN_ERR "sequencer: Can't allocate memory for sequencer input queue\n");
diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c
index 62895a7..22dbd91 100644
--- a/sound/pci/asihpi/hpioctl.c
+++ b/sound/pci/asihpi/hpioctl.c
@@ -435,7 +435,7 @@
 	struct hpi_message hm;
 	struct hpi_response hr;
 	struct hpi_adapter *pa;
-	pa = (struct hpi_adapter *)pci_get_drvdata(pci_dev);
+	pa = pci_get_drvdata(pci_dev);
 
 	hpi_init_message_response(&hm, &hr, HPI_OBJ_SUBSYSTEM,
 		HPI_SUBSYS_DELETE_ADAPTER);
diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c
index 4679ed8..2f3cacb 100644
--- a/sound/pci/azt3328.c
+++ b/sound/pci/azt3328.c
@@ -1129,10 +1129,11 @@
 
 		count_areas = size/2;
 		addr_area2 = addr+count_areas;
-		count_areas--; /* max. index */
 		snd_azf3328_dbgcodec("setdma: buffers %08lx[%u] / %08lx[%u]\n",
 				addr, count_areas, addr_area2, count_areas);
 
+		count_areas--; /* max. index */
+
 		/* build combined I/O buffer length word */
 		lengths = (count_areas << 16) | (count_areas);
 		spin_lock_irqsave(&chip->reg_lock, flags);
@@ -1740,11 +1741,15 @@
 	.rate_max =		AZF_FREQ_66200,
 	.channels_min =		1,
 	.channels_max =		2,
-	.buffer_bytes_max =	65536,
-	.period_bytes_min =	64,
-	.period_bytes_max =	65536,
-	.periods_min =		1,
-	.periods_max =		1024,
+	.buffer_bytes_max =	(64*1024),
+	.period_bytes_min =	1024,
+	.period_bytes_max =	(32*1024),
+	/* We simply have two DMA areas (instead of a list of descriptors
+	   such as other cards); I believe that this is a fixed hardware
+	   attribute and there isn't much driver magic to be done to expand it.
+	   Thus indicate that we have at least and at most 2 periods. */
+	.periods_min =		2,
+	.periods_max =		2,
 	/* FIXME: maybe that card actually has a FIFO?
 	 * Hmm, it seems newer revisions do have one, but we still don't know
 	 * its size... */
@@ -1980,8 +1985,13 @@
 	chip = snd_timer_chip(timer);
 	spin_lock_irqsave(&chip->reg_lock, flags);
 	/* disable timer countdown and interrupt */
-	/* FIXME: should we write TIMER_IRQ_ACK here? */
-	snd_azf3328_ctrl_outb(chip, IDX_IO_TIMER_VALUE + 3, 0);
+	/* Hmm, should we write TIMER_IRQ_ACK here?
+	   YES indeed, otherwise a rogue timer operation - which prompts
+	   ALSA(?) to call repeated stop() in vain, but NOT start() -
+	   will never end (value 0x03 is kept shown in control byte).
+	   Simply manually poking 0x04 _once_ immediately successfully stops
+	   the hardware/ALSA interrupt activity. */
+	snd_azf3328_ctrl_outb(chip, IDX_IO_TIMER_VALUE + 3, 0x04);
 	spin_unlock_irqrestore(&chip->reg_lock, flags);
 	snd_azf3328_dbgcallleave();
 	return 0;
diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
index 85ab43e..457d211 100644
--- a/sound/pci/ctxfi/ctpcm.c
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -129,8 +129,6 @@
 
 	apcm->substream = substream;
 	apcm->interrupt = ct_atc_pcm_interrupt;
-	runtime->private_data = apcm;
-	runtime->private_free = ct_atc_pcm_free_substream;
 	if (IEC958 == substream->pcm->device) {
 		runtime->hw = ct_spdif_passthru_playback_hw;
 		atc->spdif_out_passthru(atc, 1);
@@ -155,8 +153,12 @@
 	}
 
 	apcm->timer = ct_timer_instance_new(atc->timer, apcm);
-	if (!apcm->timer)
+	if (!apcm->timer) {
+		kfree(apcm);
 		return -ENOMEM;
+	}
+	runtime->private_data = apcm;
+	runtime->private_free = ct_atc_pcm_free_substream;
 
 	return 0;
 }
@@ -278,8 +280,6 @@
 	apcm->started = 0;
 	apcm->substream = substream;
 	apcm->interrupt = ct_atc_pcm_interrupt;
-	runtime->private_data = apcm;
-	runtime->private_free = ct_atc_pcm_free_substream;
 	runtime->hw = ct_pcm_capture_hw;
 	runtime->hw.rate_max = atc->rsr * atc->msr;
 
@@ -298,8 +298,12 @@
 	}
 
 	apcm->timer = ct_timer_instance_new(atc->timer, apcm);
-	if (!apcm->timer)
+	if (!apcm->timer) {
+		kfree(apcm);
 		return -ENOMEM;
+	}
+	runtime->private_data = apcm;
+	runtime->private_free = ct_atc_pcm_free_substream;
 
 	return 0;
 }
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 6361f75..846d1ea 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -3100,6 +3100,7 @@
 	SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTRO),
 	SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD),
 	SND_PCI_QUIRK(0x103c, 0x360b, "HP G60", CXT5066_HP_LAPTOP),
+	SND_PCI_QUIRK(0x1043, 0x13f3, "Asus A52J", CXT5066_HP_LAPTOP),
 	SND_PCI_QUIRK(0x1179, 0xff1e, "Toshiba Satellite C650D", CXT5066_IDEAPAD),
 	SND_PCI_QUIRK(0x1179, 0xff50, "Toshiba Satellite P500-PSPGSC-01800T", CXT5066_OLPC_XO_1_5),
 	SND_PCI_QUIRK(0x1179, 0xffe0, "Toshiba Satellite Pro T130-15F", CXT5066_OLPC_XO_1_5),
@@ -3110,6 +3111,7 @@
 	SND_PCI_QUIRK(0x17aa, 0x21b2, "Thinkpad X100e", CXT5066_IDEAPAD),
 	SND_PCI_QUIRK(0x17aa, 0x21b3, "Thinkpad Edge 13 (197)", CXT5066_IDEAPAD),
 	SND_PCI_QUIRK(0x17aa, 0x21b4, "Thinkpad Edge", CXT5066_IDEAPAD),
+	SND_PCI_QUIRK(0x17aa, 0x21c8, "Thinkpad Edge 11", CXT5066_IDEAPAD),
  	SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo Thinkpad", CXT5066_THINKPAD),
  	SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo G series", CXT5066_IDEAPAD),
 	SND_PCI_QUIRK(0x17aa, 0x390a, "Lenovo S10-3t", CXT5066_IDEAPAD),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5f00589..0ac6aed 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -14623,7 +14623,10 @@
 /* different alc269-variants */
 enum {
 	ALC269_TYPE_NORMAL,
+	ALC269_TYPE_ALC258,
 	ALC269_TYPE_ALC259,
+	ALC269_TYPE_ALC269VB,
+	ALC269_TYPE_ALC270,
 	ALC269_TYPE_ALC271X,
 };
 
@@ -15023,7 +15026,7 @@
 static int patch_alc269(struct hda_codec *codec)
 {
 	struct alc_spec *spec;
-	int board_config;
+	int board_config, coef;
 	int err;
 
 	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
@@ -15034,14 +15037,23 @@
 
 	alc_auto_parse_customize_define(codec);
 
-	if ((alc_read_coef_idx(codec, 0) & 0x00f0) == 0x0010){
+	coef = alc_read_coef_idx(codec, 0);
+	if ((coef & 0x00f0) == 0x0010) {
 		if (codec->bus->pci->subsystem_vendor == 0x1025 &&
 		    spec->cdefine.platform_type == 1) {
 			alc_codec_rename(codec, "ALC271X");
 			spec->codec_variant = ALC269_TYPE_ALC271X;
-		} else {
+		} else if ((coef & 0xf000) == 0x1000) {
+			spec->codec_variant = ALC269_TYPE_ALC270;
+		} else if ((coef & 0xf000) == 0x2000) {
 			alc_codec_rename(codec, "ALC259");
 			spec->codec_variant = ALC269_TYPE_ALC259;
+		} else if ((coef & 0xf000) == 0x3000) {
+			alc_codec_rename(codec, "ALC258");
+			spec->codec_variant = ALC269_TYPE_ALC258;
+		} else {
+			alc_codec_rename(codec, "ALC269VB");
+			spec->codec_variant = ALC269_TYPE_ALC269VB;
 		}
 	} else
 		alc_fix_pll_init(codec, 0x20, 0x04, 15);
@@ -15104,7 +15116,7 @@
 	spec->stream_digital_capture = &alc269_pcm_digital_capture;
 
 	if (!spec->adc_nids) { /* wasn't filled automatically? use default */
-		if (spec->codec_variant != ALC269_TYPE_NORMAL) {
+		if (spec->codec_variant == ALC269_TYPE_NORMAL) {
 			spec->adc_nids = alc269_adc_nids;
 			spec->num_adc_nids = ARRAY_SIZE(alc269_adc_nids);
 			spec->capsrc_nids = alc269_capsrc_nids;
@@ -19298,6 +19310,7 @@
 
 static struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE),
+	SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD),
 	SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD),
 	SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD),
 	{}
@@ -19419,7 +19432,10 @@
 {
 	if ((alc_read_coef_idx(codec, 0) & 0x00f0)==0x0030){
 		kfree(codec->chip_name);
-		codec->chip_name = kstrdup("ALC888-VD", GFP_KERNEL);
+		if (codec->vendor_id == 0x10ec0887)
+			codec->chip_name = kstrdup("ALC887-VD", GFP_KERNEL);
+		else
+			codec->chip_name = kstrdup("ALC888-VD", GFP_KERNEL);
 		if (!codec->chip_name) {
 			alc_free(codec);
 			return -ENOMEM;
@@ -19909,7 +19925,7 @@
 	{ .id = 0x10ec0885, .rev = 0x100103, .name = "ALC889A",
 	  .patch = patch_alc882 },
 	{ .id = 0x10ec0885, .name = "ALC885", .patch = patch_alc882 },
-	{ .id = 0x10ec0887, .name = "ALC887", .patch = patch_alc882 },
+	{ .id = 0x10ec0887, .name = "ALC887", .patch = patch_alc888 },
 	{ .id = 0x10ec0888, .rev = 0x100101, .name = "ALC1200",
 	  .patch = patch_alc882 },
 	{ .id = 0x10ec0888, .name = "ALC888", .patch = patch_alc888 },
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 93fa59c..5c71080 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -389,6 +389,11 @@
 	0x11, 0x20, 0
 };
 
+#define STAC92HD87B_NUM_DMICS	 1
+static hda_nid_t stac92hd87b_dmic_nids[STAC92HD87B_NUM_DMICS + 1] = {
+	0x11, 0
+};
+
 #define STAC92HD83XXX_NUM_CAPS	2
 static unsigned long stac92hd83xxx_capvols[] = {
 	HDA_COMPOSE_AMP_VAL(0x17, 3, 0, HDA_OUTPUT),
@@ -3486,10 +3491,8 @@
 				return err;
 		}
 
-		if (snd_hda_get_bool_hint(codec, "separate_dmux") != 1) {
+		if (snd_hda_get_bool_hint(codec, "separate_dmux") != 1)
 			snd_hda_add_imux_item(imux, label, index, NULL);
-			spec->num_analog_muxes++;
-		}
 	}
 
 	return 0;
@@ -5452,12 +5455,17 @@
 				stac92hd83xxx_brd_tbl[spec->board_config]);
 
 	switch (codec->vendor_id) {
+	case 0x111d76d1:
+	case 0x111d76d9:
+		spec->dmic_nids = stac92hd87b_dmic_nids;
+		spec->num_dmics = stac92xx_connected_ports(codec,
+				stac92hd87b_dmic_nids,
+				STAC92HD87B_NUM_DMICS);
+		/* Fall through */
 	case 0x111d7666:
 	case 0x111d7667:
 	case 0x111d7668:
 	case 0x111d7669:
-	case 0x111d76d1:
-	case 0x111d76d9:
 		spec->num_pins = ARRAY_SIZE(stac92hd88xxx_pin_nids);
 		spec->pin_nids = stac92hd88xxx_pin_nids;
 		spec->mono_nid = 0;
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 400f9eb..629a549 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -1866,6 +1866,12 @@
 	},
 	{
 		.subvendor = 0x1028,
+		.subdevice = 0x0182,
+		.name = "Dell Latitude D610",	/* STAC9750/51 */
+		.type = AC97_TUNE_HP_ONLY
+	},
+	{
+		.subvendor = 0x1028,
 		.subdevice = 0x0186,
 		.name = "Dell Latitude D810", /* cf. Malone #41015 */
 		.type = AC97_TUNE_HP_MUTE_LED
diff --git a/sound/pci/mixart/mixart_hwdep.h b/sound/pci/mixart/mixart_hwdep.h
index a46f508..812e288 100644
--- a/sound/pci/mixart/mixart_hwdep.h
+++ b/sound/pci/mixart/mixart_hwdep.h
@@ -25,11 +25,21 @@
 
 #include <sound/hwdep.h>
 
+#ifndef readl_be
 #define readl_be(x) be32_to_cpu(__raw_readl(x))
-#define writel_be(data,addr) __raw_writel(cpu_to_be32(data),addr)
+#endif
 
+#ifndef writel_be
+#define writel_be(data,addr) __raw_writel(cpu_to_be32(data),addr)
+#endif
+
+#ifndef readl_le
 #define readl_le(x) le32_to_cpu(__raw_readl(x))
+#endif
+
+#ifndef writel_le
 #define writel_le(data,addr) __raw_writel(cpu_to_le32(data),addr)
+#endif
 
 #define MIXART_MEM(mgr,x)	((mgr)->mem[0].virt + (x))
 #define MIXART_REG(mgr,x)	((mgr)->mem[1].virt + (x))
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 8508117..b47cfd4 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -1228,10 +1228,8 @@
 					       chip->rsrc[i].start + 1,
 					       rnames[i]) == NULL) {
 				printk(KERN_ERR "snd: can't request rsrc "
-				       " %d (%s: 0x%016llx:%016llx)\n",
-				       i, rnames[i],
-				       (unsigned long long)chip->rsrc[i].start,
-				       (unsigned long long)chip->rsrc[i].end);
+				       " %d (%s: %pR)\n",
+				       i, rnames[i], &chip->rsrc[i]);
 				err = -ENODEV;
 				goto __error;
 			}
@@ -1256,10 +1254,8 @@
 					       chip->rsrc[i].start + 1,
 					       rnames[i]) == NULL) {
 				printk(KERN_ERR "snd: can't request rsrc "
-				       " %d (%s: 0x%016llx:%016llx)\n",
-				       i, rnames[i],
-				       (unsigned long long)chip->rsrc[i].start,
-				       (unsigned long long)chip->rsrc[i].end);
+				       " %d (%s: %pR)\n",
+				       i, rnames[i], &chip->rsrc[i]);
 				err = -ENODEV;
 				goto __error;
 			}
diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
index e720d5e..bee3c94 100644
--- a/sound/soc/atmel/Kconfig
+++ b/sound/soc/atmel/Kconfig
@@ -16,7 +16,8 @@
 
 config SND_AT91_SOC_SAM9G20_WM8731
 	tristate "SoC Audio support for WM8731-based At91sam9g20 evaluation board"
-	depends on ATMEL_SSC && ARCH_AT91SAM9G20 && SND_ATMEL_SOC
+	depends on ATMEL_SSC && ARCH_AT91SAM9G20 && SND_ATMEL_SOC && \
+                   AT91_PROGRAMMABLE_CLOCKS
 	select SND_ATMEL_SOC_SSC
 	select SND_SOC_WM8731
 	help
@@ -25,7 +26,7 @@
 
 config SND_AT32_SOC_PLAYPAQ
         tristate "SoC Audio support for PlayPaq with WM8510"
-        depends on SND_ATMEL_SOC && BOARD_PLAYPAQ
+        depends on SND_ATMEL_SOC && BOARD_PLAYPAQ && AT91_PROGRAMMABLE_CLOCKS
         select SND_ATMEL_SOC_SSC
         select SND_SOC_WM8510
         help
diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
index bc22ee9..470cb93 100644
--- a/sound/soc/codecs/max98088.c
+++ b/sound/soc/codecs/max98088.c
@@ -28,6 +28,11 @@
 #include <sound/max98088.h>
 #include "max98088.h"
 
+enum max98088_type {
+       MAX98088,
+       MAX98089,
+};
+
 struct max98088_cdata {
        unsigned int rate;
        unsigned int fmt;
@@ -36,6 +41,7 @@
 
 struct max98088_priv {
        u8 reg_cache[M98088_REG_CNT];
+       enum max98088_type devtype;
        void *control_data;
        struct max98088_pdata *pdata;
        unsigned int sysclk;
@@ -2040,6 +2046,8 @@
        if (max98088 == NULL)
                return -ENOMEM;
 
+       max98088->devtype = id->driver_data;
+
        i2c_set_clientdata(i2c, max98088);
        max98088->control_data = i2c;
        max98088->pdata = i2c->dev.platform_data;
@@ -2059,7 +2067,8 @@
 }
 
 static const struct i2c_device_id max98088_i2c_id[] = {
-       { "max98088", 0 },
+       { "max98088", MAX98088 },
+       { "max98089", MAX98089 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, max98088_i2c_id);
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
index 7540a50..464f0cf 100644
--- a/sound/soc/codecs/uda134x.c
+++ b/sound/soc/codecs/uda134x.c
@@ -597,6 +597,7 @@
 	.resume =       uda134x_soc_resume,
 	.reg_cache_size = sizeof(uda134x_reg),
 	.reg_word_size = sizeof(u8),
+	.reg_cache_default = uda134x_reg,
 	.reg_cache_step = 1,
 	.read = uda134x_read_reg_cache,
 	.write = uda134x_write,
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
index f4f1fba..7611add 100644
--- a/sound/soc/codecs/wm8350.c
+++ b/sound/soc/codecs/wm8350.c
@@ -831,7 +831,7 @@
 	}
 
 	/* MCLK direction */
-	if (dir == WM8350_MCLK_DIR_OUT)
+	if (dir == SND_SOC_CLOCK_OUT)
 		wm8350_set_bits(wm8350, WM8350_CLOCK_CONTROL_2,
 				WM8350_MCLK_DIR);
 	else
@@ -1586,6 +1586,13 @@
 	wm8350_set_bits(wm8350, WM8350_ROUT2_VOLUME,
 			WM8350_OUT2_VU | WM8350_OUT2R_MUTE);
 
+	/* Make sure AIF tristating is disabled by default */
+	wm8350_clear_bits(wm8350, WM8350_AI_FORMATING, WM8350_AIF_TRI);
+
+	/* Make sure we've got a sane companding setup too */
+	wm8350_clear_bits(wm8350, WM8350_ADC_DAC_COMP,
+			  WM8350_DAC_COMP | WM8350_LOOPBACK);
+
 	/* Make sure jack detect is disabled to start off with */
 	wm8350_clear_bits(wm8350, WM8350_JACK_DETECT,
 			  WM8350_JDL_ENA | WM8350_JDR_ENA);
diff --git a/sound/soc/codecs/wm8776.c b/sound/soc/codecs/wm8776.c
index 04182c4..0132a27 100644
--- a/sound/soc/codecs/wm8776.c
+++ b/sound/soc/codecs/wm8776.c
@@ -34,7 +34,6 @@
 /* codec private data */
 struct wm8776_priv {
 	enum snd_soc_control_type control_type;
-	u16 reg_cache[WM8776_CACHEREGNUM];
 	int sysclk[2];
 };
 
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 894d0cd..e809274 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -3500,8 +3500,11 @@
 {
 	struct wm8962_priv *wm8962 = dev_get_drvdata(dev);
 	long int time;
+	int ret;
 
-	strict_strtol(buf, 10, &time);
+	ret = strict_strtol(buf, 10, &time);
+	if (ret != 0)
+		return ret;
 
 	input_event(wm8962->beep, EV_SND, SND_TONE, time);
 
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 0db59c3..830dfdd66 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -3903,6 +3903,8 @@
 		return -ENOMEM;
 	snd_soc_codec_set_drvdata(codec, wm8994);
 
+	codec->reg_cache = &wm8994->reg_cache;
+
 	wm8994->pdata = dev_get_platdata(codec->dev->parent);
 	wm8994->codec = codec;
 
diff --git a/sound/soc/davinci/davinci-evm.c b/sound/soc/davinci/davinci-evm.c
index 2b07b17..bc9e6b0b 100644
--- a/sound/soc/davinci/davinci-evm.c
+++ b/sound/soc/davinci/davinci-evm.c
@@ -157,12 +157,23 @@
 }
 
 /* davinci-evm digital audio interface glue - connects codec <--> CPU */
-static struct snd_soc_dai_link evm_dai = {
+static struct snd_soc_dai_link dm6446_evm_dai = {
 	.name = "TLV320AIC3X",
 	.stream_name = "AIC3X",
-	.cpu_dai_name = "davinci-mcasp.0",
+	.cpu_dai_name = "davinci-mcbsp",
 	.codec_dai_name = "tlv320aic3x-hifi",
-	.codec_name = "tlv320aic3x-codec.0-001a",
+	.codec_name = "tlv320aic3x-codec.1-001b",
+	.platform_name = "davinci-pcm-audio",
+	.init = evm_aic3x_init,
+	.ops = &evm_ops,
+};
+
+static struct snd_soc_dai_link dm355_evm_dai = {
+	.name = "TLV320AIC3X",
+	.stream_name = "AIC3X",
+	.cpu_dai_name = "davinci-mcbsp.1",
+	.codec_dai_name = "tlv320aic3x-hifi",
+	.codec_name = "tlv320aic3x-codec.1-001b",
 	.platform_name = "davinci-pcm-audio",
 	.init = evm_aic3x_init,
 	.ops = &evm_ops,
@@ -172,10 +183,10 @@
 #ifdef CONFIG_SND_DM365_AIC3X_CODEC
 	.name = "TLV320AIC3X",
 	.stream_name = "AIC3X",
-	.cpu_dai_name = "davinci-i2s",
+	.cpu_dai_name = "davinci-mcbsp",
 	.codec_dai_name = "tlv320aic3x-hifi",
 	.init = evm_aic3x_init,
-	.codec_name = "tlv320aic3x-codec.0-001a",
+	.codec_name = "tlv320aic3x-codec.1-0018",
 	.ops = &evm_ops,
 #elif defined(CONFIG_SND_DM365_VOICE_CODEC)
 	.name = "Voice Codec - CQ93VC",
@@ -219,10 +230,17 @@
 	.ops = &evm_ops,
 };
 
-/* davinci dm6446, dm355 evm audio machine driver */
-static struct snd_soc_card snd_soc_card_evm = {
-	.name = "DaVinci EVM",
-	.dai_link = &evm_dai,
+/* davinci dm6446 evm audio machine driver */
+static struct snd_soc_card dm6446_snd_soc_card_evm = {
+	.name = "DaVinci DM6446 EVM",
+	.dai_link = &dm6446_evm_dai,
+	.num_links = 1,
+};
+
+/* davinci dm355 evm audio machine driver */
+static struct snd_soc_card dm355_snd_soc_card_evm = {
+	.name = "DaVinci DM355 EVM",
+	.dai_link = &dm355_evm_dai,
 	.num_links = 1,
 };
 
@@ -261,10 +279,10 @@
 	int ret;
 
 	if (machine_is_davinci_evm()) {
-		evm_snd_dev_data = &snd_soc_card_evm;
+		evm_snd_dev_data = &dm6446_snd_soc_card_evm;
 		index = 0;
 	} else if (machine_is_davinci_dm355_evm()) {
-		evm_snd_dev_data = &snd_soc_card_evm;
+		evm_snd_dev_data = &dm355_snd_soc_card_evm;
 		index = 1;
 	} else if (machine_is_davinci_dm365_evm()) {
 		evm_snd_dev_data = &dm365_snd_soc_card_evm;
diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c
index d46b545..9e0e565 100644
--- a/sound/soc/davinci/davinci-i2s.c
+++ b/sound/soc/davinci/davinci-i2s.c
@@ -426,9 +426,6 @@
 	snd_pcm_format_t fmt;
 	unsigned element_cnt = 1;
 
-	dai->capture_dma_data = dev->dma_params;
-	dai->playback_dma_data = dev->dma_params;
-
 	/* general line settings */
 	spcr = davinci_mcbsp_read_reg(dev, DAVINCI_MCBSP_SPCR_REG);
 	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
@@ -601,6 +598,15 @@
 	return ret;
 }
 
+static int davinci_i2s_startup(struct snd_pcm_substream *substream,
+			       struct snd_soc_dai *dai)
+{
+	struct davinci_mcbsp_dev *dev = snd_soc_dai_get_drvdata(dai);
+
+	snd_soc_dai_set_dma_data(dai, substream, dev->dma_params);
+	return 0;
+}
+
 static void davinci_i2s_shutdown(struct snd_pcm_substream *substream,
 		struct snd_soc_dai *dai)
 {
@@ -612,6 +618,7 @@
 #define DAVINCI_I2S_RATES	SNDRV_PCM_RATE_8000_96000
 
 static struct snd_soc_dai_ops davinci_i2s_dai_ops = {
+	.startup	= davinci_i2s_startup,
 	.shutdown	= davinci_i2s_shutdown,
 	.prepare	= davinci_i2s_prepare,
 	.trigger	= davinci_i2s_trigger,
@@ -749,7 +756,7 @@
 	.probe		= davinci_i2s_probe,
 	.remove		= davinci_i2s_remove,
 	.driver		= {
-		.name	= "davinci-i2s",
+		.name	= "davinci-mcbsp",
 		.owner	= THIS_MODULE,
 	},
 };
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 86918ee..fb55d2c 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -715,9 +715,6 @@
 	int word_length;
 	u8 fifo_level;
 
-	cpu_dai->capture_dma_data = dev->dma_params;
-	cpu_dai->playback_dma_data = dev->dma_params;
-
 	davinci_hw_common_param(dev, substream->stream);
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
 		fifo_level = dev->txnumevt;
@@ -799,7 +796,17 @@
 	return ret;
 }
 
+static int davinci_mcasp_startup(struct snd_pcm_substream *substream,
+				 struct snd_soc_dai *dai)
+{
+	struct davinci_audio_dev *dev = snd_soc_dai_get_drvdata(dai);
+
+	snd_soc_dai_set_dma_data(dai, substream, dev->dma_params);
+	return 0;
+}
+
 static struct snd_soc_dai_ops davinci_mcasp_dai_ops = {
+	.startup	= davinci_mcasp_startup,
 	.trigger	= davinci_mcasp_trigger,
 	.hw_params	= davinci_mcasp_hw_params,
 	.set_fmt	= davinci_mcasp_set_dai_fmt,
diff --git a/sound/soc/davinci/davinci-sffsdr.c b/sound/soc/davinci/davinci-sffsdr.c
index 009b652..6c6666a 100644
--- a/sound/soc/davinci/davinci-sffsdr.c
+++ b/sound/soc/davinci/davinci-sffsdr.c
@@ -84,7 +84,7 @@
 static struct snd_soc_dai_link sffsdr_dai = {
 	.name = "PCM3008", /* Codec name */
 	.stream_name = "PCM3008 HiFi",
-	.cpu_dai_name = "davinci-asp.0",
+	.cpu_dai_name = "davinci-mcbsp",
 	.codec_dai_name = "pcm3008-hifi",
 	.codec_name = "pcm3008-codec",
 	.platform_name = "davinci-pcm-audio",
diff --git a/sound/soc/davinci/davinci-vcif.c b/sound/soc/davinci/davinci-vcif.c
index ea232f6..fb4cc1e 100644
--- a/sound/soc/davinci/davinci-vcif.c
+++ b/sound/soc/davinci/davinci-vcif.c
@@ -97,9 +97,6 @@
 			&davinci_vcif_dev->dma_params[substream->stream];
 	u32 w;
 
-	dai->capture_dma_data = davinci_vcif_dev->dma_params;
-	dai->playback_dma_data = davinci_vcif_dev->dma_params;
-
 	/* Restart the codec before setup */
 	davinci_vcif_stop(substream);
 	davinci_vcif_start(substream);
@@ -174,9 +171,19 @@
 	return ret;
 }
 
+static int davinci_vcif_startup(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai)
+{
+	struct davinci_vcif_dev *dev = snd_soc_dai_get_drvdata(dai);
+
+	snd_soc_dai_set_dma_data(dai, substream, dev->dma_params);
+	return 0;
+}
+
 #define DAVINCI_VCIF_RATES	SNDRV_PCM_RATE_8000_48000
 
 static struct snd_soc_dai_ops davinci_vcif_dai_ops = {
+	.startup	= davinci_vcif_startup,
 	.trigger	= davinci_vcif_trigger,
 	.hw_params	= davinci_vcif_hw_params,
 };
diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c
index 74ffed4..9018fa5 100644
--- a/sound/soc/fsl/mpc5200_psc_i2s.c
+++ b/sound/soc/fsl/mpc5200_psc_i2s.c
@@ -160,7 +160,7 @@
 	rc = snd_soc_register_dais(&op->dev, psc_i2s_dai, ARRAY_SIZE(psc_i2s_dai));
 	if (rc != 0) {
 		pr_err("Failed to register DAI\n");
-		return 0;
+		return rc;
 	}
 
 	psc_dma = dev_get_drvdata(&op->dev);
diff --git a/sound/soc/imx/eukrea-tlv320.c b/sound/soc/imx/eukrea-tlv320.c
index b596752..dd4fffd 100644
--- a/sound/soc/imx/eukrea-tlv320.c
+++ b/sound/soc/imx/eukrea-tlv320.c
@@ -34,8 +34,8 @@
 			    struct snd_pcm_hw_params *params)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
-	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	struct snd_soc_dai *codec_dai = rtd->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	int ret;
 
 	ret = snd_soc_dai_set_fmt(cpu_dai, SND_SOC_DAIFMT_I2S |
@@ -79,10 +79,10 @@
 static struct snd_soc_dai_link eukrea_tlv320_dai = {
 	.name		= "tlv320aic23",
 	.stream_name	= "TLV320AIC23",
-	.codec_dai	= "tlv320aic23-hifi",
+	.codec_dai_name	= "tlv320aic23-hifi",
 	.platform_name	= "imx-pcm-audio.0",
 	.codec_name	= "tlv320aic23-codec.0-001a",
-	.cpu_dai = "imx-ssi.0",
+	.cpu_dai_name	= "imx-ssi.0",
 	.ops		= &eukrea_tlv320_snd_ops,
 };
 
diff --git a/sound/soc/imx/imx-pcm-dma-mx2.c b/sound/soc/imx/imx-pcm-dma-mx2.c
index fd493ee..671ef8d 100644
--- a/sound/soc/imx/imx-pcm-dma-mx2.c
+++ b/sound/soc/imx/imx-pcm-dma-mx2.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/dmaengine.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
@@ -27,165 +28,146 @@
 #include <sound/pcm_params.h>
 #include <sound/soc.h>
 
-#include <mach/dma-mx1-mx2.h>
+#include <mach/dma.h>
 
 #include "imx-ssi.h"
 
 struct imx_pcm_runtime_data {
-	int sg_count;
-	struct scatterlist *sg_list;
-	int period;
+	int period_bytes;
 	int periods;
-	unsigned long dma_addr;
 	int dma;
-	struct snd_pcm_substream *substream;
 	unsigned long offset;
 	unsigned long size;
-	unsigned long period_cnt;
 	void *buf;
 	int period_time;
+	struct dma_async_tx_descriptor *desc;
+	struct dma_chan *dma_chan;
+	struct imx_dma_data dma_data;
 };
 
-/* Called by the DMA framework when a period has elapsed */
-static void imx_ssi_dma_progression(int channel, void *data,
-					struct scatterlist *sg)
+static void audio_dma_irq(void *data)
 {
-	struct snd_pcm_substream *substream = data;
+	struct snd_pcm_substream *substream = (struct snd_pcm_substream *)data;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct imx_pcm_runtime_data *iprtd = runtime->private_data;
 
-	if (!sg)
-		return;
+	iprtd->offset += iprtd->period_bytes;
+	iprtd->offset %= iprtd->period_bytes * iprtd->periods;
 
-	runtime = iprtd->substream->runtime;
-
-	iprtd->offset = sg->dma_address - runtime->dma_addr;
-
-	snd_pcm_period_elapsed(iprtd->substream);
+	snd_pcm_period_elapsed(substream);
 }
 
-static void imx_ssi_dma_callback(int channel, void *data)
+static bool filter(struct dma_chan *chan, void *param)
 {
-	pr_err("%s shouldn't be called\n", __func__);
+	struct imx_pcm_runtime_data *iprtd = param;
+
+	if (!imx_dma_is_general_purpose(chan))
+		return false;
+
+        chan->private = &iprtd->dma_data;
+
+        return true;
 }
 
-static void snd_imx_dma_err_callback(int channel, void *data, int err)
-{
-	struct snd_pcm_substream *substream = data;
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct imx_pcm_dma_params *dma_params = 
-		snd_soc_dai_get_dma_data(rtd->dai->cpu_dai, substream);
-	struct snd_pcm_runtime *runtime = substream->runtime;
-	struct imx_pcm_runtime_data *iprtd = runtime->private_data;
-	int ret;
-
-	pr_err("DMA timeout on channel %d -%s%s%s%s\n",
-		 channel,
-		 err & IMX_DMA_ERR_BURST ?    " burst" : "",
-		 err & IMX_DMA_ERR_REQUEST ?  " request" : "",
-		 err & IMX_DMA_ERR_TRANSFER ? " transfer" : "",
-		 err & IMX_DMA_ERR_BUFFER ?   " buffer" : "");
-
-	imx_dma_disable(iprtd->dma);
-	ret = imx_dma_setup_sg(iprtd->dma, iprtd->sg_list, iprtd->sg_count,
-			IMX_DMA_LENGTH_LOOP, dma_params->dma_addr,
-			substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
-			DMA_MODE_WRITE : DMA_MODE_READ);
-	if (!ret)
-		imx_dma_enable(iprtd->dma);
-}
-
-static int imx_ssi_dma_alloc(struct snd_pcm_substream *substream)
+static int imx_ssi_dma_alloc(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct imx_pcm_dma_params *dma_params;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct imx_pcm_runtime_data *iprtd = runtime->private_data;
+	struct dma_slave_config slave_config;
+	dma_cap_mask_t mask;
+	enum dma_slave_buswidth buswidth;
 	int ret;
 
 	dma_params = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream);
 
-	iprtd->dma = imx_dma_request_by_prio(DRV_NAME, DMA_PRIO_HIGH);
-	if (iprtd->dma < 0) {
-		pr_err("Failed to claim the audio DMA\n");
-		return -ENODEV;
+	iprtd->dma_data.peripheral_type = IMX_DMATYPE_SSI;
+	iprtd->dma_data.priority = DMA_PRIO_HIGH;
+	iprtd->dma_data.dma_request = dma_params->dma;
+
+	/* Try to grab a DMA channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	iprtd->dma_chan = dma_request_channel(mask, filter, iprtd);
+	if (!iprtd->dma_chan)
+		return -EINVAL;
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		buswidth = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+	case SNDRV_PCM_FORMAT_S24_LE:
+		buswidth = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		break;
+	default:
+		return 0;
 	}
 
-	ret = imx_dma_setup_handlers(iprtd->dma,
-				imx_ssi_dma_callback,
-				snd_imx_dma_err_callback, substream);
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		slave_config.direction = DMA_TO_DEVICE;
+		slave_config.dst_addr = dma_params->dma_addr;
+		slave_config.dst_addr_width = buswidth;
+		slave_config.dst_maxburst = dma_params->burstsize;
+	} else {
+		slave_config.direction = DMA_FROM_DEVICE;
+		slave_config.src_addr = dma_params->dma_addr;
+		slave_config.src_addr_width = buswidth;
+		slave_config.src_maxburst = dma_params->burstsize;
+	}
+
+	ret = dmaengine_slave_config(iprtd->dma_chan, &slave_config);
 	if (ret)
-		goto out;
-
-	ret = imx_dma_setup_progression_handler(iprtd->dma,
-			imx_ssi_dma_progression);
-	if (ret) {
-		pr_err("Failed to setup the DMA handler\n");
-		goto out;
-	}
-
-	ret = imx_dma_config_channel(iprtd->dma,
-			IMX_DMA_MEMSIZE_16 | IMX_DMA_TYPE_FIFO,
-			IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR,
-			dma_params->dma, 1);
-	if (ret < 0) {
-		pr_err("Cannot configure DMA channel: %d\n", ret);
-		goto out;
-	}
-
-	imx_dma_config_burstlen(iprtd->dma, dma_params->burstsize * 2);
+		return ret;
 
 	return 0;
-out:
-	imx_dma_free(iprtd->dma);
-	return ret;
 }
 
 static int snd_imx_pcm_hw_params(struct snd_pcm_substream *substream,
 				struct snd_pcm_hw_params *params)
 {
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct imx_pcm_runtime_data *iprtd = runtime->private_data;
-	int i;
 	unsigned long dma_addr;
+	struct dma_chan *chan;
+	struct imx_pcm_dma_params *dma_params;
+	int ret;
 
-	imx_ssi_dma_alloc(substream);
+	dma_params = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream);
+	ret = imx_ssi_dma_alloc(substream, params);
+	if (ret)
+		return ret;
+	chan = iprtd->dma_chan;
 
 	iprtd->size = params_buffer_bytes(params);
 	iprtd->periods = params_periods(params);
-	iprtd->period = params_period_bytes(params);
+	iprtd->period_bytes = params_period_bytes(params);
 	iprtd->offset = 0;
 	iprtd->period_time = HZ / (params_rate(params) /
 			params_period_size(params));
 
 	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
 
-	if (iprtd->sg_count != iprtd->periods) {
-		kfree(iprtd->sg_list);
-
-		iprtd->sg_list = kcalloc(iprtd->periods + 1,
-				sizeof(struct scatterlist), GFP_KERNEL);
-		if (!iprtd->sg_list)
-			return -ENOMEM;
-		iprtd->sg_count = iprtd->periods + 1;
-	}
-
-	sg_init_table(iprtd->sg_list, iprtd->sg_count);
 	dma_addr = runtime->dma_addr;
 
-	for (i = 0; i < iprtd->periods; i++) {
-		iprtd->sg_list[i].page_link = 0;
-		iprtd->sg_list[i].offset = 0;
-		iprtd->sg_list[i].dma_address = dma_addr;
-		iprtd->sg_list[i].length = iprtd->period;
-		dma_addr += iprtd->period;
+	iprtd->buf = (unsigned int *)substream->dma_buffer.area;
+
+	iprtd->desc = chan->device->device_prep_dma_cyclic(chan, dma_addr,
+			iprtd->period_bytes * iprtd->periods,
+			iprtd->period_bytes,
+			substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+			DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	if (!iprtd->desc) {
+		dev_err(&chan->dev->device, "cannot prepare slave dma\n");
+		return -EINVAL;
 	}
 
-	/* close the loop */
-	iprtd->sg_list[iprtd->sg_count - 1].offset = 0;
-	iprtd->sg_list[iprtd->sg_count - 1].length = 0;
-	iprtd->sg_list[iprtd->sg_count - 1].page_link =
-			((unsigned long) iprtd->sg_list | 0x01) & ~0x02;
+	iprtd->desc->callback = audio_dma_irq;
+	iprtd->desc->callback_param = substream;
+
 	return 0;
 }
 
@@ -194,41 +176,21 @@
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct imx_pcm_runtime_data *iprtd = runtime->private_data;
 
-	if (iprtd->dma >= 0) {
-		imx_dma_free(iprtd->dma);
-		iprtd->dma = -EINVAL;
+	if (iprtd->dma_chan) {
+		dma_release_channel(iprtd->dma_chan);
+		iprtd->dma_chan = NULL;
 	}
 
-	kfree(iprtd->sg_list);
-	iprtd->sg_list = NULL;
-
 	return 0;
 }
 
 static int snd_imx_pcm_prepare(struct snd_pcm_substream *substream)
 {
-	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct imx_pcm_dma_params *dma_params;
-	struct imx_pcm_runtime_data *iprtd = runtime->private_data;
-	int err;
 
 	dma_params = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream);
 
-	iprtd->substream = substream;
-	iprtd->buf = (unsigned int *)substream->dma_buffer.area;
-	iprtd->period_cnt = 0;
-
-	pr_debug("%s: buf: %p period: %d periods: %d\n",
-			__func__, iprtd->buf, iprtd->period, iprtd->periods);
-
-	err = imx_dma_setup_sg(iprtd->dma, iprtd->sg_list, iprtd->sg_count,
-			IMX_DMA_LENGTH_LOOP, dma_params->dma_addr,
-			substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
-			DMA_MODE_WRITE : DMA_MODE_READ);
-	if (err)
-		return err;
-
 	return 0;
 }
 
@@ -241,14 +203,14 @@
 	case SNDRV_PCM_TRIGGER_START:
 	case SNDRV_PCM_TRIGGER_RESUME:
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
-		imx_dma_enable(iprtd->dma);
+		dmaengine_submit(iprtd->desc);
 
 		break;
 
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-		imx_dma_disable(iprtd->dma);
+		dmaengine_terminate_all(iprtd->dma_chan);
 
 		break;
 	default:
@@ -263,6 +225,9 @@
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct imx_pcm_runtime_data *iprtd = runtime->private_data;
 
+	pr_debug("%s: %ld %ld\n", __func__, iprtd->offset,
+			bytes_to_frames(substream->runtime, iprtd->offset));
+
 	return bytes_to_frames(substream->runtime, iprtd->offset);
 }
 
@@ -279,7 +244,7 @@
 	.channels_max = 2,
 	.buffer_bytes_max = IMX_SSI_DMABUF_SIZE,
 	.period_bytes_min = 128,
-	.period_bytes_max = 16 * 1024,
+	.period_bytes_max = 65535, /* Limited by SDMA engine */
 	.periods_min = 2,
 	.periods_max = 255,
 	.fifo_size = 0,
@@ -304,11 +269,23 @@
 	}
 
 	snd_soc_set_runtime_hwparams(substream, &snd_imx_hardware);
+
+	return 0;
+}
+
+static int snd_imx_close(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct imx_pcm_runtime_data *iprtd = runtime->private_data;
+
+	kfree(iprtd);
+
 	return 0;
 }
 
 static struct snd_pcm_ops imx_pcm_ops = {
 	.open		= snd_imx_open,
+	.close		= snd_imx_close,
 	.ioctl		= snd_pcm_lib_ioctl,
 	.hw_params	= snd_imx_pcm_hw_params,
 	.hw_free	= snd_imx_pcm_hw_free,
@@ -340,7 +317,6 @@
 			.name = "imx-pcm-audio",
 			.owner = THIS_MODULE,
 	},
-
 	.probe = imx_soc_platform_probe,
 	.remove = __devexit_p(imx_soc_platform_remove),
 };
@@ -356,4 +332,3 @@
 	platform_driver_unregister(&imx_pcm_driver);
 }
 module_exit(snd_imx_pcm_exit);
-
diff --git a/sound/soc/imx/imx-ssi.c b/sound/soc/imx/imx-ssi.c
index d4bd345..d2d98c7 100644
--- a/sound/soc/imx/imx-ssi.c
+++ b/sound/soc/imx/imx-ssi.c
@@ -439,7 +439,22 @@
 }
 EXPORT_SYMBOL_GPL(imx_pcm_free);
 
+static int imx_ssi_dai_probe(struct snd_soc_dai *dai)
+{
+	struct imx_ssi *ssi = dev_get_drvdata(dai->dev);
+	uint32_t val;
+
+	snd_soc_dai_set_drvdata(dai, ssi);
+
+	val = SSI_SFCSR_TFWM0(ssi->dma_params_tx.burstsize) |
+		SSI_SFCSR_RFWM0(ssi->dma_params_rx.burstsize);
+	writel(val, ssi->base + SSI_SFCSR);
+
+	return 0;
+}
+
 static struct snd_soc_dai_driver imx_ssi_dai = {
+	.probe = imx_ssi_dai_probe,
 	.playback = {
 		.channels_min = 2,
 		.channels_max = 2,
@@ -455,20 +470,6 @@
 	.ops = &imx_ssi_pcm_dai_ops,
 };
 
-static int imx_ssi_dai_probe(struct snd_soc_dai *dai)
-{
-	struct imx_ssi *ssi = dev_get_drvdata(dai->dev);
-	uint32_t val;
-
-	snd_soc_dai_set_drvdata(dai, ssi);
-
-	val = SSI_SFCSR_TFWM0(ssi->dma_params_tx.burstsize) |
-		SSI_SFCSR_RFWM0(ssi->dma_params_rx.burstsize);
-	writel(val, ssi->base + SSI_SFCSR);
-
-	return 0;
-}
-
 static struct snd_soc_dai_driver imx_ac97_dai = {
 	.probe = imx_ssi_dai_probe,
 	.ac97_control = 1,
@@ -677,7 +678,17 @@
 		goto failed_register;
 	}
 
-	ssi->soc_platform_pdev = platform_device_alloc("imx-fiq-pcm-audio", pdev->id);
+	ssi->soc_platform_pdev_fiq = platform_device_alloc("imx-fiq-pcm-audio", pdev->id);
+	if (!ssi->soc_platform_pdev_fiq)
+		goto failed_pdev_fiq_alloc;
+	platform_set_drvdata(ssi->soc_platform_pdev_fiq, ssi);
+	ret = platform_device_add(ssi->soc_platform_pdev_fiq);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to add platform device\n");
+		goto failed_pdev_fiq_add;
+	}
+
+	ssi->soc_platform_pdev = platform_device_alloc("imx-pcm-audio", pdev->id);
 	if (!ssi->soc_platform_pdev)
 		goto failed_pdev_alloc;
 	platform_set_drvdata(ssi->soc_platform_pdev, ssi);
@@ -692,6 +703,9 @@
 failed_pdev_add:
 	platform_device_put(ssi->soc_platform_pdev);
 failed_pdev_alloc:
+failed_pdev_fiq_add:
+	platform_device_put(ssi->soc_platform_pdev_fiq);
+failed_pdev_fiq_alloc:
 	snd_soc_unregister_dai(&pdev->dev);
 failed_register:
 failed_ac97:
diff --git a/sound/soc/imx/imx-ssi.h b/sound/soc/imx/imx-ssi.h
index 53b780d..a4406a1 100644
--- a/sound/soc/imx/imx-ssi.h
+++ b/sound/soc/imx/imx-ssi.h
@@ -185,6 +185,9 @@
 
 #define DRV_NAME "imx-ssi"
 
+#include <linux/dmaengine.h>
+#include <mach/dma.h>
+
 struct imx_pcm_dma_params {
 	int dma;
 	unsigned long dma_addr;
@@ -212,6 +215,7 @@
 	int enabled;
 
 	struct platform_device *soc_platform_pdev;
+	struct platform_device *soc_platform_pdev_fiq;
 };
 
 struct snd_soc_platform *imx_ssi_fiq_init(struct platform_device *pdev,
diff --git a/sound/soc/imx/phycore-ac97.c b/sound/soc/imx/phycore-ac97.c
index 6a65dd7..39f2373 100644
--- a/sound/soc/imx/phycore-ac97.c
+++ b/sound/soc/imx/phycore-ac97.c
@@ -20,9 +20,6 @@
 #include <sound/soc-dapm.h>
 #include <asm/mach-types.h>
 
-#include "../codecs/wm9712.h"
-#include "imx-ssi.h"
-
 static struct snd_soc_card imx_phycore;
 
 static struct snd_soc_ops imx_phycore_hifi_ops = {
@@ -41,7 +38,7 @@
 };
 
 static struct snd_soc_card imx_phycore = {
-	.name		= "PhyCORE-audio",
+	.name		= "PhyCORE-ac97-audio",
 	.dai_link	= imx_phycore_dai_ac97,
 	.num_links	= ARRAY_SIZE(imx_phycore_dai_ac97),
 };
diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index d211c9f..7e84f24 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -644,15 +644,23 @@
 
 
 	case OMAP_MCBSP_CLKR_SRC_CLKR:
+		if (cpu_class_is_omap1())
+			break;
 		omap2_mcbsp1_mux_clkr_src(CLKR_SRC_CLKR);
 		break;
 	case OMAP_MCBSP_CLKR_SRC_CLKX:
+		if (cpu_class_is_omap1())
+			break;
 		omap2_mcbsp1_mux_clkr_src(CLKR_SRC_CLKX);
 		break;
 	case OMAP_MCBSP_FSR_SRC_FSR:
+		if (cpu_class_is_omap1())
+			break;
 		omap2_mcbsp1_mux_fsr_src(FSR_SRC_FSR);
 		break;
 	case OMAP_MCBSP_FSR_SRC_FSX:
+		if (cpu_class_is_omap1())
+			break;
 		omap2_mcbsp1_mux_fsr_src(FSR_SRC_FSX);
 		break;
 	default:
diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c
index 97e9423..f451acd 100644
--- a/sound/soc/pxa/corgi.c
+++ b/sound/soc/pxa/corgi.c
@@ -100,8 +100,13 @@
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_codec *codec = rtd->codec;
 
+	mutex_lock(&codec->mutex);
+
 	/* check the jack status at stream startup */
 	corgi_ext_control(codec);
+
+	mutex_unlock(&codec->mutex);
+
 	return 0;
 }
 
diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c
index b8207ce..5ef0526 100644
--- a/sound/soc/pxa/magician.c
+++ b/sound/soc/pxa/magician.c
@@ -72,9 +72,13 @@
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_codec *codec = rtd->codec;
 
+	mutex_lock(&codec->mutex);
+
 	/* check the jack status at stream startup */
 	magician_ext_control(codec);
 
+	mutex_unlock(&codec->mutex);
+
 	return 0;
 }
 
diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c
index af84ee9..84edd03 100644
--- a/sound/soc/pxa/poodle.c
+++ b/sound/soc/pxa/poodle.c
@@ -77,8 +77,13 @@
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_codec *codec = rtd->codec;
 
+	mutex_lock(&codec->mutex);
+
 	/* check the jack status at stream startup */
 	poodle_ext_control(codec);
+
+	mutex_unlock(&codec->mutex);
+
 	return 0;
 }
 
diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c
index f470f36..0b30d7d 100644
--- a/sound/soc/pxa/spitz.c
+++ b/sound/soc/pxa/spitz.c
@@ -108,8 +108,13 @@
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_codec *codec = rtd->codec;
 
+	mutex_lock(&codec->mutex);
+
 	/* check the jack status at stream startup */
 	spitz_ext_control(codec);
+
+	mutex_unlock(&codec->mutex);
+
 	return 0;
 }
 
diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c
index 73d0edd..7b983f9 100644
--- a/sound/soc/pxa/tosa.c
+++ b/sound/soc/pxa/tosa.c
@@ -81,8 +81,13 @@
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_codec *codec = rtd->codec;
 
+	mutex_lock(&codec->mutex);
+
 	/* check the jack status at stream startup */
 	tosa_ext_control(codec);
+
+	mutex_unlock(&codec->mutex);
+
 	return 0;
 }
 
diff --git a/sound/soc/s3c24xx/Kconfig b/sound/soc/s3c24xx/Kconfig
index 8a6b53c..d85bf8a 100644
--- a/sound/soc/s3c24xx/Kconfig
+++ b/sound/soc/s3c24xx/Kconfig
@@ -2,6 +2,7 @@
 	tristate "SoC Audio for the Samsung S3CXXXX chips"
 	depends on ARCH_S3C2410 || ARCH_S3C64XX || ARCH_S5PC100 || ARCH_S5PV210
 	select S3C64XX_DMA if ARCH_S3C64XX
+	select S3C2410_DMA if ARCH_S3C2410
 	help
 	  Say Y or M if you want to add support for codecs attached to
 	  the S3C24XX AC97 or I2S interfaces. You will also need to
diff --git a/sound/soc/s3c24xx/rx1950_uda1380.c b/sound/soc/s3c24xx/rx1950_uda1380.c
index ffd5cf2..468cc11 100644
--- a/sound/soc/s3c24xx/rx1950_uda1380.c
+++ b/sound/soc/s3c24xx/rx1950_uda1380.c
@@ -50,7 +50,6 @@
 	16000,
 	44100,
 	48000,
-	88200,
 };
 
 static struct snd_pcm_hw_constraint_list hw_rates = {
@@ -130,7 +129,6 @@
 };
 
 static struct platform_device *s3c24xx_snd_device;
-static struct clk *xtal;
 
 static int rx1950_startup(struct snd_pcm_substream *substream)
 {
@@ -179,10 +177,8 @@
 	case 44100:
 	case 88200:
 		clk_source = S3C24XX_CLKSRC_MPLL;
-		fs_mode = S3C2410_IISMOD_256FS;
-		div = clk_get_rate(xtal) / (256 * rate);
-		if (clk_get_rate(xtal) % (256 * rate) > (128 * rate))
-			div++;
+		fs_mode = S3C2410_IISMOD_384FS;
+		div = 1;
 		break;
 	default:
 		printk(KERN_ERR "%s: rate %d is not supported\n",
@@ -210,7 +206,7 @@
 
 	/* set MCLK division for sample rate */
 	ret = snd_soc_dai_set_clkdiv(cpu_dai, S3C24XX_DIV_MCLK,
-		S3C2410_IISMOD_384FS);
+		fs_mode);
 	if (ret < 0)
 		return ret;
 
@@ -295,17 +291,8 @@
 		goto err_plat_add;
 	}
 
-	xtal = clk_get(&s3c24xx_snd_device->dev, "xtal");
-
-	if (IS_ERR(xtal)) {
-		ret = PTR_ERR(xtal);
-		platform_device_unregister(s3c24xx_snd_device);
-		goto err_clk;
-	}
-
 	return 0;
 
-err_clk:
 err_plat_add:
 err_plat_alloc:
 err_gpio_conf:
@@ -320,7 +307,6 @@
 	platform_device_unregister(s3c24xx_snd_device);
 	snd_soc_jack_free_gpios(&hp_jack, ARRAY_SIZE(hp_jack_gpios),
 		hp_jack_gpios);
-	clk_put(xtal);
 	gpio_free(S3C2410_GPA(1));
 }
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 614a8b3..441285a 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -3043,8 +3043,10 @@
 	for (i = 0; i < count; i++) {
 
 		dai = kzalloc(sizeof(struct snd_soc_dai), GFP_KERNEL);
-		if (dai == NULL)
-			return -ENOMEM;
+		if (dai == NULL) {
+			ret = -ENOMEM;
+			goto err;
+		}
 
 		/* create DAI component name */
 		dai->name = fmt_multiple_name(dev, &dai_drv[i]);
@@ -3263,9 +3265,6 @@
 	return 0;
 
 error:
-	for (i--; i >= 0; i--)
-		snd_soc_unregister_dai(dev);
-
 	if (codec->reg_cache)
 		kfree(codec->reg_cache);
 	kfree(codec->name);
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 7d85c64..75ed649 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -683,12 +683,12 @@
 			    struct snd_soc_dapm_widget *b,
 			    int sort[])
 {
-	if (a->codec != b->codec)
-		return (unsigned long)a - (unsigned long)b;
 	if (sort[a->id] != sort[b->id])
 		return sort[a->id] - sort[b->id];
 	if (a->reg != b->reg)
 		return a->reg - b->reg;
+	if (a->codec != b->codec)
+		return (unsigned long)a->codec - (unsigned long)b->codec;
 
 	return 0;
 }
diff --git a/sound/sound_core.c b/sound/sound_core.c
index c03bbae..5580ace 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -104,7 +104,6 @@
 
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sound.h>
diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c
index 1bc56b2..337a002 100644
--- a/sound/spi/at73c213.c
+++ b/sound/spi/at73c213.c
@@ -155,7 +155,7 @@
 	if (max_tries < 1)
 		max_tries = 1;
 
-	/* ssc_div must be a power of 2. */
+	/* ssc_div must be even. */
 	ssc_div = (ssc_div + 1) & ~1UL;
 
 	if ((ssc_rate / (ssc_div * 2 * 16)) < BITRATE_MIN) {