Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86/amd-iommu: Fix boot crash with hidden PCI devices
  x86/amd-iommu: Use only per-device dma_ops
  x86/amd-iommu: Fix 3 possible endless loops
diff --git a/CREDITS b/CREDITS
index a7ea8e3..d78359f 100644
--- a/CREDITS
+++ b/CREDITS
@@ -518,6 +518,14 @@
 E: zab@zabbo.net
 D: maestro pci sound
 
+M: David Brownell
+D: Kernel engineer, mentor, and friend.  Maintained USB EHCI and
+D: gadget layers, SPI subsystem, GPIO subsystem, and more than a few
+D: device drivers.  His encouragement also helped many engineers get
+D: started working on the Linux kernel.  David passed away in early
+D: 2011, and will be greatly missed.
+W: https://lkml.org/lkml/2011/4/5/36
+
 N: Gary Brubaker
 E: xavyer@ix.netcom.com
 D: USB Serial Empeg Empeg-car Mark I/II Driver
diff --git a/MAINTAINERS b/MAINTAINERS
index 29801f7..fb02949 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4252,8 +4252,7 @@
 F:	include/linux/mmc/
 
 MULTIMEDIA CARD (MMC) ETC. OVER SPI
-M:	David Brownell <dbrownell@users.sourceforge.net>
-S:	Odd Fixes
+S:	Orphan
 F:	drivers/mmc/host/mmc_spi.c
 F:	include/linux/spi/mmc_spi.h
 
@@ -4603,7 +4602,6 @@
 
 OMAP USB SUPPORT
 M:	Felipe Balbi <balbi@ti.com>
-M:	David Brownell <dbrownell@users.sourceforge.net>
 L:	linux-usb@vger.kernel.org
 L:	linux-omap@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@ -5984,7 +5982,6 @@
 F:	drivers/staging/tty/specialix*
 
 SPI SUBSYSTEM
-M:	David Brownell <dbrownell@users.sourceforge.net>
 M:	Grant Likely <grant.likely@secretlab.ca>
 L:	spi-devel-general@lists.sourceforge.net
 Q:	http://patchwork.kernel.org/project/spi-devel-general/list/
@@ -6432,9 +6429,8 @@
 F:	drivers/usb/misc/rio500*
 
 USB EHCI DRIVER
-M:	David Brownell <dbrownell@users.sourceforge.net>
 L:	linux-usb@vger.kernel.org
-S:	Odd Fixes
+S:	Orphan
 F:	Documentation/usb/ehci.txt
 F:	drivers/usb/host/ehci*
 
@@ -6448,10 +6444,9 @@
 F:	drivers/media/video/et61x251/
 
 USB GADGET/PERIPHERAL SUBSYSTEM
-M:	David Brownell <dbrownell@users.sourceforge.net>
 L:	linux-usb@vger.kernel.org
 W:	http://www.linux-usb.org/gadget
-S:	Maintained
+S:	Orphan
 F:	drivers/usb/gadget/
 F:	include/linux/usb/gadget*
 
@@ -6492,9 +6487,8 @@
 F:	sound/usb/midi.*
 
 USB OHCI DRIVER
-M:	David Brownell <dbrownell@users.sourceforge.net>
 L:	linux-usb@vger.kernel.org
-S:	Odd Fixes
+S:	Orphan
 F:	Documentation/usb/ohci.txt
 F:	drivers/usb/host/ohci*
 
diff --git a/arch/arm/mach-omap1/dma.c b/arch/arm/mach-omap1/dma.c
index d855934..f5a5220 100644
--- a/arch/arm/mach-omap1/dma.c
+++ b/arch/arm/mach-omap1/dma.c
@@ -284,14 +284,15 @@
 	dma_base = ioremap(res[0].start, resource_size(&res[0]));
 	if (!dma_base) {
 		pr_err("%s: Unable to ioremap\n", __func__);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto exit_device_put;
 	}
 
 	ret = platform_device_add_resources(pdev, res, ARRAY_SIZE(res));
 	if (ret) {
 		dev_err(&pdev->dev, "%s: Unable to add resources for %s%d\n",
 			__func__, pdev->name, pdev->id);
-		goto exit_device_del;
+		goto exit_device_put;
 	}
 
 	p = kzalloc(sizeof(struct omap_system_dma_plat_info), GFP_KERNEL);
@@ -299,7 +300,7 @@
 		dev_err(&pdev->dev, "%s: Unable to allocate 'p' for %s\n",
 			__func__, pdev->name);
 		ret = -ENOMEM;
-		goto exit_device_put;
+		goto exit_device_del;
 	}
 
 	d = kzalloc(sizeof(struct omap_dma_dev_attr), GFP_KERNEL);
@@ -380,10 +381,10 @@
 	kfree(d);
 exit_release_p:
 	kfree(p);
-exit_device_put:
-	platform_device_put(pdev);
 exit_device_del:
 	platform_device_del(pdev);
+exit_device_put:
+	platform_device_put(pdev);
 
 	return ret;
 }
diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c
index d54969b..5de6eac 100644
--- a/arch/arm/mach-omap2/board-2430sdp.c
+++ b/arch/arm/mach-omap2/board-2430sdp.c
@@ -26,13 +26,13 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/gpio.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
-#include <mach/gpio.h>
 #include <plat/board.h>
 #include <plat/common.h>
 #include <plat/gpmc.h>
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index ae2963a..5dac974 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -622,19 +622,19 @@
 			 OMAP_MUX_MODE0),
 };
 
-static struct omap_board_data serial1_data = {
+static struct omap_board_data serial1_data __initdata = {
 	.id		= 0,
 	.pads		= serial1_pads,
 	.pads_cnt	= ARRAY_SIZE(serial1_pads),
 };
 
-static struct omap_board_data serial2_data = {
+static struct omap_board_data serial2_data __initdata = {
 	.id		= 1,
 	.pads		= serial2_pads,
 	.pads_cnt	= ARRAY_SIZE(serial2_pads),
 };
 
-static struct omap_board_data serial3_data = {
+static struct omap_board_data serial3_data __initdata = {
 	.id		= 2,
 	.pads		= serial3_pads,
 	.pads_cnt	= ARRAY_SIZE(serial3_pads),
diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 73fa90b..63de2d3 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -258,7 +258,7 @@
 	{ ETH_KS8851_IRQ,	GPIOF_IN,		"eth_irq"	},
 };
 
-static int omap_ethernet_init(void)
+static int __init omap_ethernet_init(void)
 {
 	int status;
 
@@ -322,6 +322,7 @@
 		.gpio_wp	= -EINVAL,
 		.nonremovable   = true,
 		.ocr_mask	= MMC_VDD_29_30,
+		.no_off_init	= true,
 	},
 	{
 		.mmc		= 1,
@@ -681,19 +682,19 @@
 			 OMAP_PIN_OUTPUT | OMAP_MUX_MODE0),
 };
 
-static struct omap_board_data serial2_data = {
+static struct omap_board_data serial2_data __initdata = {
 	.id		= 1,
 	.pads		= serial2_pads,
 	.pads_cnt	= ARRAY_SIZE(serial2_pads),
 };
 
-static struct omap_board_data serial3_data = {
+static struct omap_board_data serial3_data __initdata = {
 	.id		= 2,
 	.pads		= serial3_pads,
 	.pads_cnt	= ARRAY_SIZE(serial3_pads),
 };
 
-static struct omap_board_data serial4_data = {
+static struct omap_board_data serial4_data __initdata = {
 	.id		= 3,
 	.pads		= serial4_pads,
 	.pads_cnt	= ARRAY_SIZE(serial4_pads),
@@ -729,7 +730,7 @@
 
 	if (omap_rev() == OMAP4430_REV_ES1_0)
 		package = OMAP_PACKAGE_CBL;
-	omap4_mux_init(board_mux, package);
+	omap4_mux_init(board_mux, NULL, package);
 
 	omap_board_config = sdp4430_config;
 	omap_board_config_size = ARRAY_SIZE(sdp4430_config);
diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c
index f3beb8e..b124bdf 100644
--- a/arch/arm/mach-omap2/board-apollon.c
+++ b/arch/arm/mach-omap2/board-apollon.c
@@ -27,13 +27,13 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/smc91x.h>
+#include <linux/gpio.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 
-#include <mach/gpio.h>
 #include <plat/led.h>
 #include <plat/usb.h>
 #include <plat/board.h>
diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index c63115b..77456de 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -63,8 +63,6 @@
 #define SB_T35_SMSC911X_CS	4
 #define SB_T35_SMSC911X_GPIO	65
 
-#define NAND_BLOCK_SIZE		SZ_128K
-
 #if defined(CONFIG_SMSC911X) || defined(CONFIG_SMSC911X_MODULE)
 #include <linux/smsc911x.h>
 #include <plat/gpmc-smsc911x.h>
diff --git a/arch/arm/mach-omap2/board-cm-t3517.c b/arch/arm/mach-omap2/board-cm-t3517.c
index 08f08e8..c3a9fd3 100644
--- a/arch/arm/mach-omap2/board-cm-t3517.c
+++ b/arch/arm/mach-omap2/board-cm-t3517.c
@@ -48,6 +48,7 @@
 
 #include "mux.h"
 #include "control.h"
+#include "common-board-devices.h"
 
 #if defined(CONFIG_LEDS_GPIO) || defined(CONFIG_LEDS_GPIO_MODULE)
 static struct gpio_led cm_t3517_leds[] = {
@@ -177,7 +178,7 @@
 	.reset_gpio_port[2]  = -EINVAL,
 };
 
-static int cm_t3517_init_usbh(void)
+static int __init cm_t3517_init_usbh(void)
 {
 	int err;
 
@@ -203,8 +204,6 @@
 #endif
 
 #if defined(CONFIG_MTD_NAND_OMAP2) || defined(CONFIG_MTD_NAND_OMAP2_MODULE)
-#define NAND_BLOCK_SIZE		SZ_128K
-
 static struct mtd_partition cm_t3517_nand_partitions[] = {
 	{
 		.name           = "xloader",
diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c
index cf520d7..34956ec 100644
--- a/arch/arm/mach-omap2/board-devkit8000.c
+++ b/arch/arm/mach-omap2/board-devkit8000.c
@@ -61,8 +61,6 @@
 #include "timer-gp.h"
 #include "common-board-devices.h"
 
-#define NAND_BLOCK_SIZE		SZ_128K
-
 #define OMAP_DM9000_GPIO_IRQ	25
 #define OMAP3_DEVKIT_TS_GPIO	27
 
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index be71426..7f21d24 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -54,8 +54,6 @@
 #include "pm.h"
 #include "common-board-devices.h"
 
-#define NAND_BLOCK_SIZE		SZ_128K
-
 /*
  * OMAP3 Beagle revision
  * Run time detection of Beagle revision is done by reading GPIO.
@@ -106,6 +104,9 @@
 	beagle_rev = gpio_get_value(171) | (gpio_get_value(172) << 1)
 			| (gpio_get_value(173) << 2);
 
+	gpio_free_array(omap3_beagle_rev_gpios,
+			ARRAY_SIZE(omap3_beagle_rev_gpios));
+
 	switch (beagle_rev) {
 	case 7:
 		printk(KERN_INFO "OMAP3 Beagle Rev: Ax/Bx\n");
@@ -579,6 +580,9 @@
 	omap_nand_flash_init(NAND_BUSWIDTH_16, omap3beagle_nand_partitions,
 			     ARRAY_SIZE(omap3beagle_nand_partitions));
 
+	/* Ensure msecure is mux'd to be able to set the RTC. */
+	omap_mux_init_signal("sys_drm_msecure", OMAP_PIN_OFF_OUTPUT_HIGH);
+
 	/* Ensure SDRC pins are mux'd for self-refresh */
 	omap_mux_init_signal("sdrc_cke0", OMAP_PIN_OUTPUT);
 	omap_mux_init_signal("sdrc_cke1", OMAP_PIN_OUTPUT);
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index 1d10736..2a0bb48 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -30,6 +30,7 @@
 #include <linux/leds.h>
 #include <linux/input.h>
 #include <linux/input/matrix_keypad.h>
+#include <linux/gpio.h>
 #include <linux/gpio_keys.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
@@ -41,7 +42,6 @@
 
 #include <plat/board.h>
 #include <plat/common.h>
-#include <mach/gpio.h>
 #include <mach/hardware.h>
 #include <plat/mcspi.h>
 #include <plat/usb.h>
@@ -57,8 +57,6 @@
 #define PANDORA_WIFI_NRESET_GPIO	23
 #define OMAP3_PANDORA_TS_GPIO		94
 
-#define NAND_BLOCK_SIZE			SZ_128K
-
 static struct mtd_partition omap3pandora_nand_partitions[] = {
 	{
 		.name           = "xloader",
diff --git a/arch/arm/mach-omap2/board-omap3touchbook.c b/arch/arm/mach-omap2/board-omap3touchbook.c
index 82872d7..5f649fa 100644
--- a/arch/arm/mach-omap2/board-omap3touchbook.c
+++ b/arch/arm/mach-omap2/board-omap3touchbook.c
@@ -56,8 +56,6 @@
 
 #include <asm/setup.h>
 
-#define NAND_BLOCK_SIZE		SZ_128K
-
 #define OMAP3_AC_GPIO		136
 #define OMAP3_TS_GPIO		162
 #define TB_BL_PWM_TIMER		9
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 90485fc..0cfe200 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -526,19 +526,19 @@
 			 OMAP_PIN_OUTPUT | OMAP_MUX_MODE0),
 };
 
-static struct omap_board_data serial2_data = {
+static struct omap_board_data serial2_data __initdata = {
 	.id             = 1,
 	.pads           = serial2_pads,
 	.pads_cnt       = ARRAY_SIZE(serial2_pads),
 };
 
-static struct omap_board_data serial3_data = {
+static struct omap_board_data serial3_data __initdata = {
 	.id             = 2,
 	.pads           = serial3_pads,
 	.pads_cnt       = ARRAY_SIZE(serial3_pads),
 };
 
-static struct omap_board_data serial4_data = {
+static struct omap_board_data serial4_data __initdata = {
 	.id             = 3,
 	.pads           = serial4_pads,
 	.pads_cnt       = ARRAY_SIZE(serial4_pads),
@@ -687,7 +687,7 @@
 
 	if (omap_rev() == OMAP4430_REV_ES1_0)
 		package = OMAP_PACKAGE_CBL;
-	omap4_mux_init(board_mux, package);
+	omap4_mux_init(board_mux, NULL, package);
 
 	if (wl12xx_set_platform_data(&omap_panda_wlan_data))
 		pr_err("error setting wl12xx data\n");
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index 1555918..175e1ab 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -24,6 +24,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/gpio.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/i2c/twl.h>
@@ -45,7 +46,6 @@
 #include <plat/common.h>
 #include <video/omapdss.h>
 #include <video/omap-panel-generic-dpi.h>
-#include <mach/gpio.h>
 #include <plat/gpmc.h>
 #include <mach/hardware.h>
 #include <plat/nand.h>
@@ -65,8 +65,6 @@
 #define OVERO_GPIO_USBH_CPEN	168
 #define OVERO_GPIO_USBH_NRESET	183
 
-#define NAND_BLOCK_SIZE SZ_128K
-
 #define OVERO_SMSC911X_CS      5
 #define OVERO_SMSC911X_GPIO    176
 #define OVERO_SMSC911X2_CS     4
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index f6247e7..9903667 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -488,6 +488,7 @@
 		.name			= "V28_A",
 		.min_uV			= 2800000,
 		.max_uV			= 3000000,
+		.always_on		= true, /* due VIO leak to AIC34 VDDs */
 		.apply_uV		= true,
 		.valid_modes_mask	= REGULATOR_MODE_NORMAL
 					| REGULATOR_MODE_STANDBY,
@@ -582,7 +583,7 @@
 {
 	/* FIXME this gpio setup is just a placeholder for now */
 	gpio_request_one(gpio + 6, GPIOF_OUT_INIT_LOW, "backlight_pwm");
-	gpio_request_one(gpio + 7, GPIOF_OUT_INIT_HIGH, "speaker_en");
+	gpio_request_one(gpio + 7, GPIOF_OUT_INIT_LOW, "speaker_en");
 
 	return 0;
 }
diff --git a/arch/arm/mach-omap2/board-zoom-display.c b/arch/arm/mach-omap2/board-zoom-display.c
index c7c6beb..d4683ba 100644
--- a/arch/arm/mach-omap2/board-zoom-display.c
+++ b/arch/arm/mach-omap2/board-zoom-display.c
@@ -26,7 +26,7 @@
 	{ LCD_PANEL_QVGA_GPIO,	GPIOF_OUT_INIT_HIGH, "lcd qvga"	 },
 };
 
-static void zoom_lcd_panel_init(void)
+static void __init zoom_lcd_panel_init(void)
 {
 	zoom_lcd_gpios[0].gpio = (omap_rev() > OMAP3430_REV_ES3_0) ?
 			LCD_PANEL_RESET_GPIO_PROD :
diff --git a/arch/arm/mach-omap2/common-board-devices.c b/arch/arm/mach-omap2/common-board-devices.c
index e94903b..94ccf46 100644
--- a/arch/arm/mach-omap2/common-board-devices.c
+++ b/arch/arm/mach-omap2/common-board-devices.c
@@ -85,18 +85,18 @@
 	struct spi_board_info *spi_bi = &ads7846_spi_board_info;
 	int err;
 
-	err = gpio_request(gpio_pendown, "TS PenDown");
-	if (err) {
-		pr_err("Could not obtain gpio for TS PenDown: %d\n", err);
-		return;
+	if (board_pdata && board_pdata->get_pendown_state) {
+		err = gpio_request_one(gpio_pendown, GPIOF_IN, "TSPenDown");
+		if (err) {
+			pr_err("Couldn't obtain gpio for TSPenDown: %d\n", err);
+			return;
+		}
+		gpio_export(gpio_pendown, 0);
+
+		if (gpio_debounce)
+			gpio_set_debounce(gpio_pendown, gpio_debounce);
 	}
 
-	gpio_direction_input(gpio_pendown);
-	gpio_export(gpio_pendown, 0);
-
-	if (gpio_debounce)
-		gpio_set_debounce(gpio_pendown, gpio_debounce);
-
 	ads7846_config.gpio_pendown = gpio_pendown;
 
 	spi_bi->bus_num	= bus_num;
diff --git a/arch/arm/mach-omap2/common-board-devices.h b/arch/arm/mach-omap2/common-board-devices.h
index eb80b3b..6797190 100644
--- a/arch/arm/mach-omap2/common-board-devices.h
+++ b/arch/arm/mach-omap2/common-board-devices.h
@@ -1,6 +1,8 @@
 #ifndef __OMAP_COMMON_BOARD_DEVICES__
 #define __OMAP_COMMON_BOARD_DEVICES__
 
+#define NAND_BLOCK_SIZE	SZ_128K
+
 struct twl4030_platform_data;
 struct mtd_partition;
 
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 7b85585..5b8ca68 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -97,7 +97,7 @@
 
 	WARN(IS_ERR(od), "could not build omap_device for %s\n", oh_name);
 
-	return PTR_ERR(od);
+	return IS_ERR(od) ? PTR_ERR(od) : 0;
 }
 postcore_initcall(omap4_l3_init);
 
diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index b2f30be..66868c5 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -145,6 +145,7 @@
 				 int power_on, int vdd)
 {
 	u32 reg;
+	unsigned long timeout;
 
 	if (power_on) {
 		reg = omap4_ctrl_pad_readl(control_pbias_offset);
@@ -157,9 +158,15 @@
 			OMAP4_MMC1_PWRDNZ_MASK |
 			OMAP4_USBC1_ICUSB_PWRDNZ_MASK);
 		omap4_ctrl_pad_writel(reg, control_pbias_offset);
-		/* 4 microsec delay for comparator to generate an error*/
-		udelay(4);
-		reg = omap4_ctrl_pad_readl(control_pbias_offset);
+
+		timeout = jiffies + msecs_to_jiffies(5);
+		do {
+			reg = omap4_ctrl_pad_readl(control_pbias_offset);
+			if (!(reg & OMAP4_MMC1_PBIASLITE_VMODE_ERROR_MASK))
+				break;
+			usleep_range(100, 200);
+		} while (!time_after(jiffies, timeout));
+
 		if (reg & OMAP4_MMC1_PBIASLITE_VMODE_ERROR_MASK) {
 			pr_err("Pbias Voltage is not same as LDO\n");
 			/* Caution : On VMODE_ERROR Power Down MMC IO */
@@ -331,6 +338,9 @@
 	if (c->no_off)
 		mmc->slots[0].no_off = 1;
 
+	if (c->no_off_init)
+		mmc->slots[0].no_regulator_off_init = c->no_off_init;
+
 	if (c->vcc_aux_disable_is_sleep)
 		mmc->slots[0].vcc_aux_disable_is_sleep = 1;
 
diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h
index f119348..f757e78 100644
--- a/arch/arm/mach-omap2/hsmmc.h
+++ b/arch/arm/mach-omap2/hsmmc.h
@@ -18,6 +18,7 @@
 	bool	nonremovable;	/* Nonremovable e.g. eMMC */
 	bool	power_saving;	/* Try to sleep or power off when possible */
 	bool	no_off;		/* power_saving and power is not to go off */
+	bool	no_off_init;	/* no power off when not in MMC sleep state */
 	bool	vcc_aux_disable_is_sleep; /* Regulator off remapped to sleep */
 	int	gpio_cd;	/* or -EINVAL */
 	int	gpio_wp;	/* or -EINVAL */
diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index a4ab1e3..c7fb22a 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -83,6 +83,9 @@
 void omap_mux_write_array(struct omap_mux_partition *partition,
 				 struct omap_board_mux *board_mux)
 {
+	if (!board_mux)
+		return;
+
 	while (board_mux->reg_offset != OMAP_MUX_TERMINATOR) {
 		omap_mux_write(partition, board_mux->value,
 			       board_mux->reg_offset);
@@ -906,7 +909,7 @@
 u16 omap_mux_get_gpio(int gpio)
 {
 	struct omap_mux_partition *partition;
-	struct omap_mux *m;
+	struct omap_mux *m = NULL;
 
 	list_for_each_entry(partition, &mux_partitions, node) {
 		m = omap_mux_get_by_gpio(partition, gpio);
diff --git a/arch/arm/mach-omap2/mux.h b/arch/arm/mach-omap2/mux.h
index 137f321..2132308 100644
--- a/arch/arm/mach-omap2/mux.h
+++ b/arch/arm/mach-omap2/mux.h
@@ -323,10 +323,12 @@
 
 /**
  * omap4_mux_init() - initialize mux system with board specific set
- * @board_mux:		Board specific mux table
+ * @board_subset:	Board specific mux table
+ * @board_wkup_subset:	Board specific mux table for wakeup instance
  * @flags:		OMAP package type used for the board
  */
-int omap4_mux_init(struct omap_board_mux *board_mux, int flags);
+int omap4_mux_init(struct omap_board_mux *board_subset,
+	struct omap_board_mux *board_wkup_subset, int flags);
 
 /**
  * omap_mux_init - private mux init function, do not call
diff --git a/arch/arm/mach-omap2/mux44xx.c b/arch/arm/mach-omap2/mux44xx.c
index 9a66445..f5a74da 100644
--- a/arch/arm/mach-omap2/mux44xx.c
+++ b/arch/arm/mach-omap2/mux44xx.c
@@ -1309,7 +1309,8 @@
 #define omap4_wkup_cbl_cbs_ball  NULL
 #endif
 
-int __init omap4_mux_init(struct omap_board_mux *board_subset, int flags)
+int __init omap4_mux_init(struct omap_board_mux *board_subset,
+	struct omap_board_mux *board_wkup_subset, int flags)
 {
 	struct omap_ball *package_balls_core;
 	struct omap_ball *package_balls_wkup = omap4_wkup_cbl_cbs_ball;
@@ -1347,7 +1348,7 @@
 			    OMAP_MUX_GPIO_IN_MODE3,
 			    OMAP4_CTRL_MODULE_PAD_WKUP_MUX_PBASE,
 			    OMAP4_CTRL_MODULE_PAD_WKUP_MUX_SIZE,
-			    omap4_wkup_muxmodes, NULL, board_subset,
+			    omap4_wkup_muxmodes, NULL, board_wkup_subset,
 			    package_balls_wkup);
 
 	return ret;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index e034294..293fa6c 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -1628,7 +1628,7 @@
 			void *data)
 {
 	struct omap_hwmod *temp_oh;
-	int ret;
+	int ret = 0;
 
 	if (!fn)
 		return -EINVAL;
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index abc548a..e1c69ff 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -5109,7 +5109,7 @@
 	&omap44xx_iva_seq1_hwmod,
 
 	/* kbd class */
-/*	&omap44xx_kbd_hwmod, */
+	&omap44xx_kbd_hwmod,
 
 	/* mailbox class */
 	&omap44xx_mailbox_hwmod,
diff --git a/arch/arm/mach-omap2/omap_phy_internal.c b/arch/arm/mach-omap2/omap_phy_internal.c
index f47813e..58775e3 100644
--- a/arch/arm/mach-omap2/omap_phy_internal.c
+++ b/arch/arm/mach-omap2/omap_phy_internal.c
@@ -56,8 +56,10 @@
 	/* Power down the phy */
 	__raw_writel(PHY_PD, ctrl_base + CONTROL_DEV_CONF);
 
-	if (!dev)
+	if (!dev) {
+		iounmap(ctrl_base);
 		return 0;
+	}
 
 	phyclk = clk_get(dev, "ocp2scp_usb_phy_ick");
 	if (IS_ERR(phyclk)) {
diff --git a/arch/arm/plat-omap/include/plat/flash.h b/arch/arm/plat-omap/include/plat/flash.h
index 3083195..0d88499 100644
--- a/arch/arm/plat-omap/include/plat/flash.h
+++ b/arch/arm/plat-omap/include/plat/flash.h
@@ -11,6 +11,7 @@
 
 #include <linux/mtd/map.h>
 
+struct platform_device;
 extern void omap1_set_vpp(struct platform_device *pdev, int enable);
 
 #endif
diff --git a/arch/arm/plat-omap/include/plat/iovmm.h b/arch/arm/plat-omap/include/plat/iovmm.h
index 32a2f6c..e992b96 100644
--- a/arch/arm/plat-omap/include/plat/iovmm.h
+++ b/arch/arm/plat-omap/include/plat/iovmm.h
@@ -29,9 +29,6 @@
  * lower 16 bit is used for h/w and upper 16 bit is for s/w.
  */
 #define IOVMF_SW_SHIFT		16
-#define IOVMF_HW_SIZE		(1 << IOVMF_SW_SHIFT)
-#define IOVMF_HW_MASK		(IOVMF_HW_SIZE - 1)
-#define IOVMF_SW_MASK		(~IOVMF_HW_MASK)UL
 
 /*
  * iovma: h/w flags derived from cam and ram attribute
diff --git a/arch/arm/plat-omap/include/plat/mmc.h b/arch/arm/plat-omap/include/plat/mmc.h
index f38fef9..c7b8741 100644
--- a/arch/arm/plat-omap/include/plat/mmc.h
+++ b/arch/arm/plat-omap/include/plat/mmc.h
@@ -101,6 +101,9 @@
 		/* If using power_saving and the MMC power is not to go off */
 		unsigned no_off:1;
 
+		/* eMMC does not handle power off when not in sleep state */
+		unsigned no_regulator_off_init:1;
+
 		/* Regulator off remapped to sleep */
 		unsigned vcc_aux_disable_is_sleep:1;
 
diff --git a/arch/arm/plat-omap/iovmm.c b/arch/arm/plat-omap/iovmm.c
index 51ef43e..83a37c5 100644
--- a/arch/arm/plat-omap/iovmm.c
+++ b/arch/arm/plat-omap/iovmm.c
@@ -648,7 +648,6 @@
 			return PTR_ERR(va);
 	}
 
-	flags &= IOVMF_HW_MASK;
 	flags |= IOVMF_DISCONT;
 	flags |= IOVMF_MMIO;
 
@@ -706,7 +705,6 @@
 	if (!va)
 		return -ENOMEM;
 
-	flags &= IOVMF_HW_MASK;
 	flags |= IOVMF_DISCONT;
 	flags |= IOVMF_ALLOC;
 
@@ -795,7 +793,6 @@
 	if (!va)
 		return -ENOMEM;
 
-	flags &= IOVMF_HW_MASK;
 	flags |= IOVMF_LINEAR;
 	flags |= IOVMF_MMIO;
 
@@ -853,7 +850,6 @@
 		return -ENOMEM;
 	pa = virt_to_phys(va);
 
-	flags &= IOVMF_HW_MASK;
 	flags |= IOVMF_LINEAR;
 	flags |= IOVMF_ALLOC;
 
diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
index a3f50b3..6af3d0b 100644
--- a/arch/arm/plat-omap/sram.c
+++ b/arch/arm/plat-omap/sram.c
@@ -166,7 +166,7 @@
 		else if (cpu_is_omap1611())
 			omap_sram_size = SZ_256K;
 		else {
-			printk(KERN_ERR "Could not detect SRAM size\n");
+			pr_err("Could not detect SRAM size\n");
 			omap_sram_size = 0x4000;
 		}
 	}
@@ -221,10 +221,10 @@
 	omap_sram_io_desc[0].length = ROUND_DOWN(omap_sram_size, PAGE_SIZE);
 	iotable_init(omap_sram_io_desc, ARRAY_SIZE(omap_sram_io_desc));
 
-	printk(KERN_INFO "SRAM: Mapped pa 0x%08lx to va 0x%08lx size: 0x%lx\n",
-	__pfn_to_phys(omap_sram_io_desc[0].pfn),
-	omap_sram_io_desc[0].virtual,
-	       omap_sram_io_desc[0].length);
+	pr_info("SRAM: Mapped pa 0x%08llx to va 0x%08lx size: 0x%lx\n",
+		(long long) __pfn_to_phys(omap_sram_io_desc[0].pfn),
+		omap_sram_io_desc[0].virtual,
+		omap_sram_io_desc[0].length);
 
 	/*
 	 * Normally devicemaps_init() would flush caches and tlb after
@@ -252,7 +252,7 @@
 void *omap_sram_push_address(unsigned long size)
 {
 	if (size > (omap_sram_ceil - (omap_sram_base + SRAM_BOOTLOADER_SZ))) {
-		printk(KERN_ERR "Not enough space in SRAM\n");
+		pr_err("Not enough space in SRAM\n");
 		return NULL;
 	}
 
diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c
index f03cb27..bd3e5e7 100644
--- a/arch/mn10300/kernel/traps.c
+++ b/arch/mn10300/kernel/traps.c
@@ -28,7 +28,7 @@
 #include <linux/irq.h>
 #include <asm/processor.h>
 #include <asm/system.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -156,7 +156,7 @@
 
 	case EXCEP_TRAP:
 	case EXCEP_UNIMPINS:
-		if (get_user(opcode, (uint8_t __user *)regs->pc) != 0)
+		if (probe_kernel_read(&opcode, (u8 *)regs->pc, 1) < 0)
 			break;
 		if (opcode == 0xff) {
 			if (notify_die(DIE_BREAKPOINT, str, regs, code, 0, 0))
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index 6f702a6..13c4814 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -44,6 +44,7 @@
   RO_DATA(PAGE_SIZE)
 
   /* writeable */
+  _sdata = .;     /* Start of rw data section */
   RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
   _edata = .;
 
diff --git a/arch/mn10300/mm/cache-dbg-flush-by-reg.S b/arch/mn10300/mm/cache-dbg-flush-by-reg.S
index 665919f..a775ea5 100644
--- a/arch/mn10300/mm/cache-dbg-flush-by-reg.S
+++ b/arch/mn10300/mm/cache-dbg-flush-by-reg.S
@@ -120,14 +120,14 @@
 	# conditionally purge this line in all ways
 	mov	d1,(L1_CACHE_WAYDISP*0,a0)
 
-debugger_local_cache_flushinv_no_dcache:
+debugger_local_cache_flushinv_one_no_dcache:
 	#
 	# now try to flush the icache
 	#
 	mov	CHCTR,a0
 	movhu	(a0),d0
 	btst	CHCTR_ICEN,d0
-	beq	mn10300_local_icache_inv_range_reg_end
+	beq	debugger_local_cache_flushinv_one_end
 
 	LOCAL_CLI_SAVE(d1)
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9fab2aa..90d77bd 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -89,6 +89,7 @@
 	select HAVE_GET_USER_PAGES_FAST
 	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
+	select HAVE_RCU_TABLE_FREE if SMP
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f6314af..38e71eb 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -17,15 +17,15 @@
 #include <linux/gfp.h>
 #include <linux/mm.h>
 
-#define check_pgt_cache()	do {} while (0)
-
 unsigned long *crst_table_alloc(struct mm_struct *);
 void crst_table_free(struct mm_struct *, unsigned long *);
-void crst_table_free_rcu(struct mm_struct *, unsigned long *);
 
 unsigned long *page_table_alloc(struct mm_struct *);
 void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mm_struct *, unsigned long *);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+void page_table_free_rcu(struct mmu_gather *, unsigned long *);
+void __tlb_remove_table(void *_table);
+#endif
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index e4efacf..801fbe1 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -293,19 +293,6 @@
  * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
  */
 
-/* Page status table bits for virtualization */
-#define RCP_ACC_BITS	0xf000000000000000UL
-#define RCP_FP_BIT	0x0800000000000000UL
-#define RCP_PCL_BIT	0x0080000000000000UL
-#define RCP_HR_BIT	0x0040000000000000UL
-#define RCP_HC_BIT	0x0020000000000000UL
-#define RCP_GR_BIT	0x0004000000000000UL
-#define RCP_GC_BIT	0x0002000000000000UL
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT	0x0000800000000000UL
-#define KVM_UC_BIT	0x0000400000000000UL
-
 #ifndef __s390x__
 
 /* Bits in the segment table address-space-control-element */
@@ -325,6 +312,19 @@
 #define _SEGMENT_ENTRY		(_SEGMENT_ENTRY_PTL)
 #define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INV)
 
+/* Page status table bits for virtualization */
+#define RCP_ACC_BITS	0xf0000000UL
+#define RCP_FP_BIT	0x08000000UL
+#define RCP_PCL_BIT	0x00800000UL
+#define RCP_HR_BIT	0x00400000UL
+#define RCP_HC_BIT	0x00200000UL
+#define RCP_GR_BIT	0x00040000UL
+#define RCP_GC_BIT	0x00020000UL
+
+/* User dirty / referenced bit for KVM's migration feature */
+#define KVM_UR_BIT	0x00008000UL
+#define KVM_UC_BIT	0x00004000UL
+
 #else /* __s390x__ */
 
 /* Bits in the segment/region table address-space-control-element */
@@ -367,6 +367,19 @@
 #define _SEGMENT_ENTRY_LARGE	0x400	/* STE-format control, large page   */
 #define _SEGMENT_ENTRY_CO	0x100	/* change-recording override   */
 
+/* Page status table bits for virtualization */
+#define RCP_ACC_BITS	0xf000000000000000UL
+#define RCP_FP_BIT	0x0800000000000000UL
+#define RCP_PCL_BIT	0x0080000000000000UL
+#define RCP_HR_BIT	0x0040000000000000UL
+#define RCP_HC_BIT	0x0020000000000000UL
+#define RCP_GR_BIT	0x0004000000000000UL
+#define RCP_GC_BIT	0x0002000000000000UL
+
+/* User dirty / referenced bit for KVM's migration feature */
+#define KVM_UR_BIT	0x0000800000000000UL
+#define KVM_UC_BIT	0x0000400000000000UL
+
 #endif /* __s390x__ */
 
 /*
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 350e7ee..15c9762 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -139,110 +139,47 @@
 	struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];
 } __attribute__ ((packed, aligned(2048)));
 
-/**
- * struct sbal_flags - storage block address list flags
- * @last: last entry
- * @cont: contiguous storage
- * @frag: fragmentation
- */
-struct sbal_flags {
-	u8	: 1;
-	u8 last : 1;
-	u8 cont : 1;
-	u8	: 1;
-	u8 frag : 2;
-	u8	: 2;
-} __attribute__ ((packed));
+#define SBAL_EFLAGS_LAST_ENTRY		0x40
+#define SBAL_EFLAGS_CONTIGUOUS		0x20
+#define SBAL_EFLAGS_FIRST_FRAG		0x04
+#define SBAL_EFLAGS_MIDDLE_FRAG		0x08
+#define SBAL_EFLAGS_LAST_FRAG		0x0c
+#define SBAL_EFLAGS_MASK		0x6f
 
-#define SBAL_FLAGS_FIRST_FRAG		0x04000000UL
-#define SBAL_FLAGS_MIDDLE_FRAG		0x08000000UL
-#define SBAL_FLAGS_LAST_FRAG		0x0c000000UL
-#define SBAL_FLAGS_LAST_ENTRY		0x40000000UL
-#define SBAL_FLAGS_CONTIGUOUS		0x20000000UL
-
-#define SBAL_FLAGS0_DATA_CONTINUATION	0x20UL
+#define SBAL_SFLAGS0_PCI_REQ		0x40
+#define SBAL_SFLAGS0_DATA_CONTINUATION	0x20
 
 /* Awesome OpenFCP extensions */
-#define SBAL_FLAGS0_TYPE_STATUS		0x00UL
-#define SBAL_FLAGS0_TYPE_WRITE		0x08UL
-#define SBAL_FLAGS0_TYPE_READ		0x10UL
-#define SBAL_FLAGS0_TYPE_WRITE_READ	0x18UL
-#define SBAL_FLAGS0_MORE_SBALS		0x04UL
-#define SBAL_FLAGS0_COMMAND		0x02UL
-#define SBAL_FLAGS0_LAST_SBAL		0x00UL
-#define SBAL_FLAGS0_ONLY_SBAL		SBAL_FLAGS0_COMMAND
-#define SBAL_FLAGS0_MIDDLE_SBAL		SBAL_FLAGS0_MORE_SBALS
-#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND
-#define SBAL_FLAGS0_PCI			0x40
-
-/**
- * struct sbal_sbalf_0 - sbal flags for sbale 0
- * @pci: PCI indicator
- * @cont: data continuation
- * @sbtype: storage-block type (FCP)
- */
-struct sbal_sbalf_0 {
-	u8	  : 1;
-	u8 pci	  : 1;
-	u8 cont   : 1;
-	u8 sbtype : 2;
-	u8	  : 3;
-} __attribute__ ((packed));
-
-/**
- * struct sbal_sbalf_1 - sbal flags for sbale 1
- * @key: storage key
- */
-struct sbal_sbalf_1 {
-	u8     : 4;
-	u8 key : 4;
-} __attribute__ ((packed));
-
-/**
- * struct sbal_sbalf_14 - sbal flags for sbale 14
- * @erridx: error index
- */
-struct sbal_sbalf_14 {
-	u8	  : 4;
-	u8 erridx : 4;
-} __attribute__ ((packed));
-
-/**
- * struct sbal_sbalf_15 - sbal flags for sbale 15
- * @reason: reason for error state
- */
-struct sbal_sbalf_15 {
-	u8 reason;
-} __attribute__ ((packed));
-
-/**
- * union sbal_sbalf - storage block address list flags
- * @i0: sbalf0
- * @i1: sbalf1
- * @i14: sbalf14
- * @i15: sblaf15
- * @value: raw value
- */
-union sbal_sbalf {
-	struct sbal_sbalf_0  i0;
-	struct sbal_sbalf_1  i1;
-	struct sbal_sbalf_14 i14;
-	struct sbal_sbalf_15 i15;
-	u8 value;
-};
+#define SBAL_SFLAGS0_TYPE_STATUS	0x00
+#define SBAL_SFLAGS0_TYPE_WRITE		0x08
+#define SBAL_SFLAGS0_TYPE_READ		0x10
+#define SBAL_SFLAGS0_TYPE_WRITE_READ	0x18
+#define SBAL_SFLAGS0_MORE_SBALS		0x04
+#define SBAL_SFLAGS0_COMMAND		0x02
+#define SBAL_SFLAGS0_LAST_SBAL		0x00
+#define SBAL_SFLAGS0_ONLY_SBAL		SBAL_SFLAGS0_COMMAND
+#define SBAL_SFLAGS0_MIDDLE_SBAL	SBAL_SFLAGS0_MORE_SBALS
+#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND)
 
 /**
  * struct qdio_buffer_element - SBAL entry
- * @flags: flags
+ * @eflags: SBAL entry flags
+ * @scount: SBAL count
+ * @sflags: whole SBAL flags
  * @length: length
  * @addr: address
 */
 struct qdio_buffer_element {
-	u32 flags;
+	u8 eflags;
+	/* private: */
+	u8 res1;
+	/* public: */
+	u8 scount;
+	u8 sflags;
 	u32 length;
 #ifdef CONFIG_32BIT
 	/* private: */
-	void *reserved;
+	void *res2;
 	/* public: */
 #endif
 	void *addr;
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 77eee54..c687a2c 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -26,67 +26,60 @@
 #include <linux/swap.h>
 #include <asm/processor.h>
 #include <asm/pgalloc.h>
-#include <asm/smp.h>
 #include <asm/tlbflush.h>
 
 struct mmu_gather {
 	struct mm_struct *mm;
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	struct mmu_table_batch *batch;
+#endif
 	unsigned int fullmm;
-	unsigned int nr_ptes;
-	unsigned int nr_pxds;
-	unsigned int max;
-	void **array;
-	void *local[8];
+	unsigned int need_flush;
 };
 
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
-	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+struct mmu_table_batch {
+	struct rcu_head		rcu;
+	unsigned int		nr;
+	void			*tables[0];
+};
 
-	if (addr) {
-		tlb->array = (void *) addr;
-		tlb->max = PAGE_SIZE / sizeof(void *);
-	}
-}
+#define MAX_TABLE_BATCH		\
+	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+#endif
 
 static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 				  struct mm_struct *mm,
 				  unsigned int full_mm_flush)
 {
 	tlb->mm = mm;
-	tlb->max = ARRAY_SIZE(tlb->local);
-	tlb->array = tlb->local;
 	tlb->fullmm = full_mm_flush;
+	tlb->need_flush = 0;
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb->batch = NULL;
+#endif
 	if (tlb->fullmm)
 		__tlb_flush_mm(mm);
-	else
-		__tlb_alloc_page(tlb);
-	tlb->nr_ptes = 0;
-	tlb->nr_pxds = tlb->max;
 }
 
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
-	if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max))
-		__tlb_flush_mm(tlb->mm);
-	while (tlb->nr_ptes > 0)
-		page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]);
-	while (tlb->nr_pxds < tlb->max)
-		crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]);
+	if (!tlb->need_flush)
+		return;
+	tlb->need_flush = 0;
+	__tlb_flush_mm(tlb->mm);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb_table_flush(tlb);
+#endif
 }
 
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
 				  unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
-
-	rcu_table_freelist_finish();
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-
-	if (tlb->array != tlb->local)
-		free_pages((unsigned long) tlb->array, 0);
 }
 
 /*
@@ -112,12 +105,11 @@
 static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				unsigned long address)
 {
-	if (!tlb->fullmm) {
-		tlb->array[tlb->nr_ptes++] = pte;
-		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb);
-	} else
-		page_table_free(tlb->mm, (unsigned long *) pte);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	if (!tlb->fullmm)
+		return page_table_free_rcu(tlb, (unsigned long *) pte);
+#endif
+	page_table_free(tlb->mm, (unsigned long *) pte);
 }
 
 /*
@@ -133,12 +125,11 @@
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 31))
 		return;
-	if (!tlb->fullmm) {
-		tlb->array[--tlb->nr_pxds] = pmd;
-		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb);
-	} else
-		crst_table_free(tlb->mm, (unsigned long *) pmd);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	if (!tlb->fullmm)
+		return tlb_remove_table(tlb, pmd);
+#endif
+	crst_table_free(tlb->mm, (unsigned long *) pmd);
 #endif
 }
 
@@ -155,12 +146,11 @@
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 42))
 		return;
-	if (!tlb->fullmm) {
-		tlb->array[--tlb->nr_pxds] = pud;
-		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb);
-	} else
-		crst_table_free(tlb->mm, (unsigned long *) pud);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	if (!tlb->fullmm)
+		return tlb_remove_table(tlb, pud);
+#endif
+	crst_table_free(tlb->mm, (unsigned long *) pud);
 #endif
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 30ca85c..67345ae 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -731,6 +731,7 @@
 	}
 	memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
 	facilities[0] &= 0xff00fff3f47c0000ULL;
+	facilities[1] &= 0x201c000000000000ULL;
 	return 0;
 }
 
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
index ab0e041..5faa1b1 100644
--- a/arch/s390/kvm/sie64a.S
+++ b/arch/s390/kvm/sie64a.S
@@ -93,4 +93,6 @@
 
 	.section __ex_table,"a"
 	.quad	sie_inst,sie_err
+	.quad	sie_exit,sie_err
+	.quad	sie_reenter,sie_err
 	.previous
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b09763f..37a23c22 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,94 +24,12 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 
-struct rcu_table_freelist {
-	struct rcu_head rcu;
-	struct mm_struct *mm;
-	unsigned int pgt_index;
-	unsigned int crst_index;
-	unsigned long *table[0];
-};
-
-#define RCU_FREELIST_SIZE \
-	((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \
-	  / sizeof(unsigned long))
-
-static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist);
-
-static void __page_table_free(struct mm_struct *mm, unsigned long *table);
-
-static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm)
-{
-	struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist);
-	struct rcu_table_freelist *batch = *batchp;
-
-	if (batch)
-		return batch;
-	batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC);
-	if (batch) {
-		batch->mm = mm;
-		batch->pgt_index = 0;
-		batch->crst_index = RCU_FREELIST_SIZE;
-		*batchp = batch;
-	}
-	return batch;
-}
-
-static void rcu_table_freelist_callback(struct rcu_head *head)
-{
-	struct rcu_table_freelist *batch =
-		container_of(head, struct rcu_table_freelist, rcu);
-
-	while (batch->pgt_index > 0)
-		__page_table_free(batch->mm, batch->table[--batch->pgt_index]);
-	while (batch->crst_index < RCU_FREELIST_SIZE)
-		crst_table_free(batch->mm, batch->table[batch->crst_index++]);
-	free_page((unsigned long) batch);
-}
-
-void rcu_table_freelist_finish(void)
-{
-	struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist);
-	struct rcu_table_freelist *batch = *batchp;
-
-	if (!batch)
-		goto out;
-	call_rcu(&batch->rcu, rcu_table_freelist_callback);
-	*batchp = NULL;
-out:
-	put_cpu_var(rcu_table_freelist);
-}
-
-static void smp_sync(void *arg)
-{
-}
-
 #ifndef CONFIG_64BIT
 #define ALLOC_ORDER	1
-#define TABLES_PER_PAGE	4
-#define FRAG_MASK	15UL
-#define SECOND_HALVES	10UL
-
-void clear_table_pgstes(unsigned long *table)
-{
-	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
-	memset(table + 256, 0, PAGE_SIZE/4);
-	clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
-	memset(table + 768, 0, PAGE_SIZE/4);
-}
-
+#define FRAG_MASK	0x0f
 #else
 #define ALLOC_ORDER	2
-#define TABLES_PER_PAGE	2
-#define FRAG_MASK	3UL
-#define SECOND_HALVES	2UL
-
-void clear_table_pgstes(unsigned long *table)
-{
-	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
-	memset(table + 256, 0, PAGE_SIZE/2);
-}
-
+#define FRAG_MASK	0x03
 #endif
 
 unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
@@ -140,29 +58,6 @@
 	free_pages((unsigned long) table, ALLOC_ORDER);
 }
 
-void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table)
-{
-	struct rcu_table_freelist *batch;
-
-	preempt_disable();
-	if (atomic_read(&mm->mm_users) < 2 &&
-	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
-		crst_table_free(mm, table);
-		goto out;
-	}
-	batch = rcu_table_freelist_get(mm);
-	if (!batch) {
-		smp_call_function(smp_sync, NULL, 1);
-		crst_table_free(mm, table);
-		goto out;
-	}
-	batch->table[--batch->crst_index] = table;
-	if (batch->pgt_index >= batch->crst_index)
-		rcu_table_freelist_finish();
-out:
-	preempt_enable();
-}
-
 #ifdef CONFIG_64BIT
 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
 {
@@ -238,124 +133,175 @@
 }
 #endif
 
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+	unsigned int old, new;
+
+	do {
+		old = atomic_read(v);
+		new = old ^ bits;
+	} while (atomic_cmpxchg(v, old, new) != old);
+	return new;
+}
+
 /*
  * page table entry allocation/free routines.
  */
+#ifdef CONFIG_PGSTE
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
+{
+	struct page *page;
+	unsigned long *table;
+
+	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	if (!page)
+		return NULL;
+	pgtable_page_ctor(page);
+	atomic_set(&page->_mapcount, 3);
+	table = (unsigned long *) page_to_phys(page);
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	return table;
+}
+
+static inline void page_table_free_pgste(unsigned long *table)
+{
+	struct page *page;
+
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	pgtable_page_ctor(page);
+	atomic_set(&page->_mapcount, -1);
+	__free_page(page);
+}
+#endif
+
 unsigned long *page_table_alloc(struct mm_struct *mm)
 {
 	struct page *page;
 	unsigned long *table;
-	unsigned long bits;
+	unsigned int mask, bit;
 
-	bits = (mm->context.has_pgste) ? 3UL : 1UL;
+#ifdef CONFIG_PGSTE
+	if (mm_has_pgste(mm))
+		return page_table_alloc_pgste(mm);
+#endif
+	/* Allocate fragments of a 4K page as 1K/2K page table */
 	spin_lock_bh(&mm->context.list_lock);
-	page = NULL;
+	mask = FRAG_MASK;
 	if (!list_empty(&mm->context.pgtable_list)) {
 		page = list_first_entry(&mm->context.pgtable_list,
 					struct page, lru);
-		if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
-			page = NULL;
+		table = (unsigned long *) page_to_phys(page);
+		mask = atomic_read(&page->_mapcount);
+		mask = mask | (mask >> 4);
 	}
-	if (!page) {
+	if ((mask & FRAG_MASK) == FRAG_MASK) {
 		spin_unlock_bh(&mm->context.list_lock);
 		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
 		if (!page)
 			return NULL;
 		pgtable_page_ctor(page);
-		page->flags &= ~FRAG_MASK;
+		atomic_set(&page->_mapcount, 1);
 		table = (unsigned long *) page_to_phys(page);
-		if (mm->context.has_pgste)
-			clear_table_pgstes(table);
-		else
-			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
 		spin_lock_bh(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
+	} else {
+		for (bit = 1; mask & bit; bit <<= 1)
+			table += PTRS_PER_PTE;
+		mask = atomic_xor_bits(&page->_mapcount, bit);
+		if ((mask & FRAG_MASK) == FRAG_MASK)
+			list_del(&page->lru);
 	}
-	table = (unsigned long *) page_to_phys(page);
-	while (page->flags & bits) {
-		table += 256;
-		bits <<= 1;
-	}
-	page->flags |= bits;
-	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
-		list_move_tail(&page->lru, &mm->context.pgtable_list);
 	spin_unlock_bh(&mm->context.list_lock);
 	return table;
 }
 
-static void __page_table_free(struct mm_struct *mm, unsigned long *table)
-{
-	struct page *page;
-	unsigned long bits;
-
-	bits = ((unsigned long) table) & 15;
-	table = (unsigned long *)(((unsigned long) table) ^ bits);
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	page->flags ^= bits;
-	if (!(page->flags & FRAG_MASK)) {
-		pgtable_page_dtor(page);
-		__free_page(page);
-	}
-}
-
 void page_table_free(struct mm_struct *mm, unsigned long *table)
 {
 	struct page *page;
-	unsigned long bits;
+	unsigned int bit, mask;
 
-	bits = (mm->context.has_pgste) ? 3UL : 1UL;
-	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
+#ifdef CONFIG_PGSTE
+	if (mm_has_pgste(mm))
+		return page_table_free_pgste(table);
+#endif
+	/* Free 1K/2K page table fragment of a 4K page */
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
 	spin_lock_bh(&mm->context.list_lock);
-	page->flags ^= bits;
-	if (page->flags & FRAG_MASK) {
-		/* Page now has some free pgtable fragments. */
-		if (!list_empty(&page->lru))
-			list_move(&page->lru, &mm->context.pgtable_list);
-		page = NULL;
-	} else
-		/* All fragments of the 4K page have been freed. */
+	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
 		list_del(&page->lru);
+	mask = atomic_xor_bits(&page->_mapcount, bit);
+	if (mask & FRAG_MASK)
+		list_add(&page->lru, &mm->context.pgtable_list);
 	spin_unlock_bh(&mm->context.list_lock);
-	if (page) {
+	if (mask == 0) {
 		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
 		__free_page(page);
 	}
 }
 
-void page_table_free_rcu(struct mm_struct *mm, unsigned long *table)
-{
-	struct rcu_table_freelist *batch;
-	struct page *page;
-	unsigned long bits;
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 
-	preempt_disable();
-	if (atomic_read(&mm->mm_users) < 2 &&
-	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
-		page_table_free(mm, table);
-		goto out;
+static void __page_table_free_rcu(void *table, unsigned bit)
+{
+	struct page *page;
+
+#ifdef CONFIG_PGSTE
+	if (bit == FRAG_MASK)
+		return page_table_free_pgste(table);
+#endif
+	/* Free 1K/2K page table fragment of a 4K page */
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
+		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
+		__free_page(page);
 	}
-	batch = rcu_table_freelist_get(mm);
-	if (!batch) {
-		smp_call_function(smp_sync, NULL, 1);
-		page_table_free(mm, table);
-		goto out;
+}
+
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
+{
+	struct mm_struct *mm;
+	struct page *page;
+	unsigned int bit, mask;
+
+	mm = tlb->mm;
+#ifdef CONFIG_PGSTE
+	if (mm_has_pgste(mm)) {
+		table = (unsigned long *) (__pa(table) | FRAG_MASK);
+		tlb_remove_table(tlb, table);
+		return;
 	}
-	bits = (mm->context.has_pgste) ? 3UL : 1UL;
-	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
+#endif
+	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	spin_lock_bh(&mm->context.list_lock);
-	/* Delayed freeing with rcu prevents reuse of pgtable fragments */
-	list_del_init(&page->lru);
+	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
+		list_del(&page->lru);
+	mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
+	if (mask & FRAG_MASK)
+		list_add_tail(&page->lru, &mm->context.pgtable_list);
 	spin_unlock_bh(&mm->context.list_lock);
-	table = (unsigned long *)(((unsigned long) table) | bits);
-	batch->table[batch->pgt_index++] = table;
-	if (batch->pgt_index >= batch->crst_index)
-		rcu_table_freelist_finish();
-out:
-	preempt_enable();
+	table = (unsigned long *) (__pa(table) | (bit << 4));
+	tlb_remove_table(tlb, table);
 }
 
+void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long) _table & PAGE_MASK);
+	unsigned type = (unsigned long) _table & ~PAGE_MASK;
+
+	if (type)
+		__page_table_free_rcu(table, type);
+	else
+		free_pages((unsigned long) table, ALLOC_ORDER);
+}
+
+#endif
+
 /*
  * switch on pgstes for its userspace process (for kvm)
  */
@@ -369,7 +315,7 @@
 		return -EINVAL;
 
 	/* Do we have pgstes? if yes, we are done */
-	if (tsk->mm->context.has_pgste)
+	if (mm_has_pgste(tsk->mm))
 		return 0;
 
 	/* lets check if we are allowed to replace the mm */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d6e2477..6df88c7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -47,38 +47,40 @@
 #define DstDI       (5<<1)	/* Destination is in ES:(E)DI */
 #define DstMem64    (6<<1)	/* 64bit memory operand */
 #define DstImmUByte (7<<1)	/* 8-bit unsigned immediate operand */
-#define DstMask     (7<<1)
+#define DstDX       (8<<1)	/* Destination is in DX register */
+#define DstMask     (0xf<<1)
 /* Source operand type. */
-#define SrcNone     (0<<4)	/* No source operand. */
-#define SrcReg      (1<<4)	/* Register operand. */
-#define SrcMem      (2<<4)	/* Memory operand. */
-#define SrcMem16    (3<<4)	/* Memory operand (16-bit). */
-#define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
-#define SrcImm      (5<<4)	/* Immediate operand. */
-#define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
-#define SrcOne      (7<<4)	/* Implied '1' */
-#define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
-#define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
-#define SrcSI       (0xa<<4)	/* Source is in the DS:RSI */
-#define SrcImmFAddr (0xb<<4)	/* Source is immediate far address */
-#define SrcMemFAddr (0xc<<4)	/* Source is far address in memory */
-#define SrcAcc      (0xd<<4)	/* Source Accumulator */
-#define SrcImmU16   (0xe<<4)    /* Immediate operand, unsigned, 16 bits */
-#define SrcMask     (0xf<<4)
+#define SrcNone     (0<<5)	/* No source operand. */
+#define SrcReg      (1<<5)	/* Register operand. */
+#define SrcMem      (2<<5)	/* Memory operand. */
+#define SrcMem16    (3<<5)	/* Memory operand (16-bit). */
+#define SrcMem32    (4<<5)	/* Memory operand (32-bit). */
+#define SrcImm      (5<<5)	/* Immediate operand. */
+#define SrcImmByte  (6<<5)	/* 8-bit sign-extended immediate operand. */
+#define SrcOne      (7<<5)	/* Implied '1' */
+#define SrcImmUByte (8<<5)      /* 8-bit unsigned immediate operand. */
+#define SrcImmU     (9<<5)      /* Immediate operand, unsigned */
+#define SrcSI       (0xa<<5)	/* Source is in the DS:RSI */
+#define SrcImmFAddr (0xb<<5)	/* Source is immediate far address */
+#define SrcMemFAddr (0xc<<5)	/* Source is far address in memory */
+#define SrcAcc      (0xd<<5)	/* Source Accumulator */
+#define SrcImmU16   (0xe<<5)    /* Immediate operand, unsigned, 16 bits */
+#define SrcDX       (0xf<<5)	/* Source is in DX register */
+#define SrcMask     (0xf<<5)
 /* Generic ModRM decode. */
-#define ModRM       (1<<8)
+#define ModRM       (1<<9)
 /* Destination is only written; never read. */
-#define Mov         (1<<9)
-#define BitOp       (1<<10)
-#define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
-#define String      (1<<12)     /* String instruction (rep capable) */
-#define Stack       (1<<13)     /* Stack instruction (push/pop) */
-#define GroupMask   (7<<14)     /* Opcode uses one of the group mechanisms */
-#define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
-#define GroupDual   (2<<14)     /* Alternate decoding of mod == 3 */
-#define Prefix      (3<<14)     /* Instruction varies with 66/f2/f3 prefix */
-#define RMExt       (4<<14)     /* Opcode extension in ModRM r/m if mod == 3 */
-#define Sse         (1<<17)     /* SSE Vector instruction */
+#define Mov         (1<<10)
+#define BitOp       (1<<11)
+#define MemAbs      (1<<12)      /* Memory operand is absolute displacement */
+#define String      (1<<13)     /* String instruction (rep capable) */
+#define Stack       (1<<14)     /* Stack instruction (push/pop) */
+#define GroupMask   (7<<15)     /* Opcode uses one of the group mechanisms */
+#define Group       (1<<15)     /* Bits 3:5 of modrm byte extend opcode */
+#define GroupDual   (2<<15)     /* Alternate decoding of mod == 3 */
+#define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
+#define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
+#define Sse         (1<<18)     /* SSE Vector instruction */
 /* Misc flags */
 #define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
 #define VendorSpecific (1<<22) /* Vendor specific instruction */
@@ -3154,8 +3156,8 @@
 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
 	I(SrcImmByte | Mov | Stack, em_push),
 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
-	D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */
-	D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */
+	D2bvIP(DstDI | SrcDX | Mov | String, ins, check_perm_in), /* insb, insw/insd */
+	D2bvIP(SrcSI | DstDX | String, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
 	X16(D(SrcImmByte)),
 	/* 0x80 - 0x87 */
@@ -3212,8 +3214,8 @@
 	/* 0xE8 - 0xEF */
 	D(SrcImm | Stack), D(SrcImm | ImplicitOps),
 	D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
-	D2bvIP(SrcNone | DstAcc,     in,  check_perm_in),
-	D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out),
+	D2bvIP(SrcDX | DstAcc, in,  check_perm_in),
+	D2bvIP(SrcAcc | DstDX, out, check_perm_out),
 	/* 0xF0 - 0xF7 */
 	N, DI(ImplicitOps, icebp), N, N,
 	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
@@ -3613,6 +3615,12 @@
 		memop.bytes = c->op_bytes + 2;
 		goto srcmem_common;
 		break;
+	case SrcDX:
+		c->src.type = OP_REG;
+		c->src.bytes = 2;
+		c->src.addr.reg = &c->regs[VCPU_REGS_RDX];
+		fetch_register_operand(&c->src);
+		break;
 	}
 
 	if (rc != X86EMUL_CONTINUE)
@@ -3682,6 +3690,12 @@
 		c->dst.addr.mem.seg = VCPU_SREG_ES;
 		c->dst.val = 0;
 		break;
+	case DstDX:
+		c->dst.type = OP_REG;
+		c->dst.bytes = 2;
+		c->dst.addr.reg = &c->regs[VCPU_REGS_RDX];
+		fetch_register_operand(&c->dst);
+		break;
 	case ImplicitOps:
 		/* Special instructions do their own operand decoding. */
 	default:
@@ -4027,7 +4041,6 @@
 		break;
 	case 0xec: /* in al,dx */
 	case 0xed: /* in (e/r)ax,dx */
-		c->src.val = c->regs[VCPU_REGS_RDX];
 	do_io_in:
 		if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
 				     &c->dst.val))
@@ -4035,7 +4048,6 @@
 		break;
 	case 0xee: /* out dx,al */
 	case 0xef: /* out dx,(e/r)ax */
-		c->dst.val = c->regs[VCPU_REGS_RDX];
 	do_io_out:
 		ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val,
 				      &c->src.val, 1);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 51c2257..4d46441 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -776,7 +776,7 @@
 	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
 	seq_printf(m, "  seqno: 0x%08x\n", error->seqno);
 
-	for (i = 0; i < 16; i++)
+	for (i = 0; i < dev_priv->num_fence_regs; i++)
 		seq_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
 
 	if (error->active_bo)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ee66035..f63ee16 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -716,6 +716,7 @@
 	struct intel_fbdev *fbdev;
 
 	struct drm_property *broadcast_rgb_property;
+	struct drm_property *force_audio_property;
 
 	atomic_t forcewake_count;
 } drm_i915_private_t;
@@ -909,13 +910,6 @@
 	} mm;
 };
 
-enum intel_chip_family {
-	CHIP_I8XX = 0x01,
-	CHIP_I9XX = 0x02,
-	CHIP_I915 = 0x04,
-	CHIP_I965 = 0x08,
-};
-
 #define INTEL_INFO(dev)	(((struct drm_i915_private *) (dev)->dev_private)->info)
 
 #define IS_I830(dev)		((dev)->pci_device == 0x3577)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0b2e167..12d3257 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -354,7 +354,7 @@
 		 * page_offset = offset within page
 		 * page_length = bytes to copy for this page
 		 */
-		page_offset = offset & (PAGE_SIZE-1);
+		page_offset = offset_in_page(offset);
 		page_length = remain;
 		if ((page_offset + remain) > PAGE_SIZE)
 			page_length = PAGE_SIZE - page_offset;
@@ -453,9 +453,9 @@
 		 * data_page_offset = offset with data_page_index page.
 		 * page_length = bytes to copy for this page
 		 */
-		shmem_page_offset = offset & ~PAGE_MASK;
+		shmem_page_offset = offset_in_page(offset);
 		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
-		data_page_offset = data_ptr & ~PAGE_MASK;
+		data_page_offset = offset_in_page(data_ptr);
 
 		page_length = remain;
 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
@@ -638,8 +638,8 @@
 		 * page_offset = offset within page
 		 * page_length = bytes to copy for this page
 		 */
-		page_base = (offset & ~(PAGE_SIZE-1));
-		page_offset = offset & (PAGE_SIZE-1);
+		page_base = offset & PAGE_MASK;
+		page_offset = offset_in_page(offset);
 		page_length = remain;
 		if ((page_offset + remain) > PAGE_SIZE)
 			page_length = PAGE_SIZE - page_offset;
@@ -650,7 +650,6 @@
 		 */
 		if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
 				    page_offset, user_data, page_length))
-
 			return -EFAULT;
 
 		remain -= page_length;
@@ -730,9 +729,9 @@
 		 * page_length = bytes to copy for this page
 		 */
 		gtt_page_base = offset & PAGE_MASK;
-		gtt_page_offset = offset & ~PAGE_MASK;
+		gtt_page_offset = offset_in_page(offset);
 		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
-		data_page_offset = data_ptr & ~PAGE_MASK;
+		data_page_offset = offset_in_page(data_ptr);
 
 		page_length = remain;
 		if ((gtt_page_offset + page_length) > PAGE_SIZE)
@@ -791,7 +790,7 @@
 		 * page_offset = offset within page
 		 * page_length = bytes to copy for this page
 		 */
-		page_offset = offset & (PAGE_SIZE-1);
+		page_offset = offset_in_page(offset);
 		page_length = remain;
 		if ((page_offset + remain) > PAGE_SIZE)
 			page_length = PAGE_SIZE - page_offset;
@@ -896,9 +895,9 @@
 		 * data_page_offset = offset with data_page_index page.
 		 * page_length = bytes to copy for this page
 		 */
-		shmem_page_offset = offset & ~PAGE_MASK;
+		shmem_page_offset = offset_in_page(offset);
 		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
-		data_page_offset = data_ptr & ~PAGE_MASK;
+		data_page_offset = offset_in_page(data_ptr);
 
 		page_length = remain;
 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
@@ -1450,8 +1449,9 @@
 	 * edge of an even tile row (where tile rows are counted as if the bo is
 	 * placed in a fenced gtt region).
 	 */
-	if (IS_GEN2(dev) ||
-	    (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
+	if (IS_GEN2(dev))
+		tile_height = 16;
+	else if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
 		tile_height = 32;
 	else
 		tile_height = 8;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b79619a..b9fafe3 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -517,7 +517,7 @@
 	if (de_iir & DE_PIPEA_VBLANK_IVB)
 		drm_handle_vblank(dev, 0);
 
-	if (de_iir & DE_PIPEB_VBLANK_IVB);
+	if (de_iir & DE_PIPEB_VBLANK_IVB)
 		drm_handle_vblank(dev, 1);
 
 	/* check event from PCH */
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index e93f93c..0979d88 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -288,6 +288,8 @@
 		 * This may be a DVI-I connector with a shared DDC
 		 * link between analog and digital outputs, so we
 		 * have to check the EDID input spec of the attached device.
+		 *
+		 * On the other hand, what should we do if it is a broken EDID?
 		 */
 		if (edid != NULL) {
 			is_digital = edid->input & DRM_EDID_INPUT_DIGITAL;
@@ -298,6 +300,8 @@
 		if (!is_digital) {
 			DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n");
 			return true;
+		} else {
+			DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n");
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f553ddfd..81a9059 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3983,54 +3983,6 @@
 #define ILK_LP0_PLANE_LATENCY		700
 #define ILK_LP0_CURSOR_LATENCY		1300
 
-static bool ironlake_compute_wm0(struct drm_device *dev,
-				 int pipe,
-				 const struct intel_watermark_params *display,
-				 int display_latency_ns,
-				 const struct intel_watermark_params *cursor,
-				 int cursor_latency_ns,
-				 int *plane_wm,
-				 int *cursor_wm)
-{
-	struct drm_crtc *crtc;
-	int htotal, hdisplay, clock, pixel_size;
-	int line_time_us, line_count;
-	int entries, tlb_miss;
-
-	crtc = intel_get_crtc_for_pipe(dev, pipe);
-	if (crtc->fb == NULL || !crtc->enabled)
-		return false;
-
-	htotal = crtc->mode.htotal;
-	hdisplay = crtc->mode.hdisplay;
-	clock = crtc->mode.clock;
-	pixel_size = crtc->fb->bits_per_pixel / 8;
-
-	/* Use the small buffer method to calculate plane watermark */
-	entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
-	tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
-	if (tlb_miss > 0)
-		entries += tlb_miss;
-	entries = DIV_ROUND_UP(entries, display->cacheline_size);
-	*plane_wm = entries + display->guard_size;
-	if (*plane_wm > (int)display->max_wm)
-		*plane_wm = display->max_wm;
-
-	/* Use the large buffer method to calculate cursor watermark */
-	line_time_us = ((htotal * 1000) / clock);
-	line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
-	entries = line_count * 64 * pixel_size;
-	tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
-	if (tlb_miss > 0)
-		entries += tlb_miss;
-	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
-	*cursor_wm = entries + cursor->guard_size;
-	if (*cursor_wm > (int)cursor->max_wm)
-		*cursor_wm = (int)cursor->max_wm;
-
-	return true;
-}
-
 /*
  * Check the wm result.
  *
@@ -4139,12 +4091,12 @@
 	unsigned int enabled;
 
 	enabled = 0;
-	if (ironlake_compute_wm0(dev, 0,
-				 &ironlake_display_wm_info,
-				 ILK_LP0_PLANE_LATENCY,
-				 &ironlake_cursor_wm_info,
-				 ILK_LP0_CURSOR_LATENCY,
-				 &plane_wm, &cursor_wm)) {
+	if (g4x_compute_wm0(dev, 0,
+			    &ironlake_display_wm_info,
+			    ILK_LP0_PLANE_LATENCY,
+			    &ironlake_cursor_wm_info,
+			    ILK_LP0_CURSOR_LATENCY,
+			    &plane_wm, &cursor_wm)) {
 		I915_WRITE(WM0_PIPEA_ILK,
 			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
 		DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
@@ -4153,12 +4105,12 @@
 		enabled |= 1;
 	}
 
-	if (ironlake_compute_wm0(dev, 1,
-				 &ironlake_display_wm_info,
-				 ILK_LP0_PLANE_LATENCY,
-				 &ironlake_cursor_wm_info,
-				 ILK_LP0_CURSOR_LATENCY,
-				 &plane_wm, &cursor_wm)) {
+	if (g4x_compute_wm0(dev, 1,
+			    &ironlake_display_wm_info,
+			    ILK_LP0_PLANE_LATENCY,
+			    &ironlake_cursor_wm_info,
+			    ILK_LP0_CURSOR_LATENCY,
+			    &plane_wm, &cursor_wm)) {
 		I915_WRITE(WM0_PIPEB_ILK,
 			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
 		DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
@@ -4223,10 +4175,10 @@
 	unsigned int enabled;
 
 	enabled = 0;
-	if (ironlake_compute_wm0(dev, 0,
-				 &sandybridge_display_wm_info, latency,
-				 &sandybridge_cursor_wm_info, latency,
-				 &plane_wm, &cursor_wm)) {
+	if (g4x_compute_wm0(dev, 0,
+			    &sandybridge_display_wm_info, latency,
+			    &sandybridge_cursor_wm_info, latency,
+			    &plane_wm, &cursor_wm)) {
 		I915_WRITE(WM0_PIPEA_ILK,
 			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
 		DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
@@ -4235,10 +4187,10 @@
 		enabled |= 1;
 	}
 
-	if (ironlake_compute_wm0(dev, 1,
-				 &sandybridge_display_wm_info, latency,
-				 &sandybridge_cursor_wm_info, latency,
-				 &plane_wm, &cursor_wm)) {
+	if (g4x_compute_wm0(dev, 1,
+			    &sandybridge_display_wm_info, latency,
+			    &sandybridge_cursor_wm_info, latency,
+			    &plane_wm, &cursor_wm)) {
 		I915_WRITE(WM0_PIPEB_ILK,
 			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
 		DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
@@ -7675,6 +7627,7 @@
 			dev_priv->display.update_wm = NULL;
 		} else
 			dev_priv->display.update_wm = pineview_update_wm;
+		dev_priv->display.init_clock_gating = gen3_init_clock_gating;
 	} else if (IS_G4X(dev)) {
 		dev_priv->display.update_wm = g4x_update_wm;
 		dev_priv->display.init_clock_gating = g4x_init_clock_gating;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index a4d8031..391b55f 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -59,8 +59,6 @@
 	bool is_pch_edp;
 	uint8_t	train_set[4];
 	uint8_t link_status[DP_LINK_STATUS_SIZE];
-
-	struct drm_property *force_audio_property;
 };
 
 /**
@@ -1702,7 +1700,7 @@
 	if (ret)
 		return ret;
 
-	if (property == intel_dp->force_audio_property) {
+	if (property == dev_priv->force_audio_property) {
 		int i = val;
 		bool has_audio;
 
@@ -1841,16 +1839,7 @@
 static void
 intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
-
-	intel_dp->force_audio_property =
-		drm_property_create(dev, DRM_MODE_PROP_RANGE, "force_audio", 2);
-	if (intel_dp->force_audio_property) {
-		intel_dp->force_audio_property->values[0] = -1;
-		intel_dp->force_audio_property->values[1] = 1;
-		drm_connector_attach_property(connector, intel_dp->force_audio_property, 0);
-	}
-
+	intel_attach_force_audio_property(connector);
 	intel_attach_broadcast_rgb_property(connector);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 831d7a4..9ffa61e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -236,6 +236,7 @@
 int intel_ddc_get_modes(struct drm_connector *c, struct i2c_adapter *adapter);
 extern bool intel_ddc_probe(struct intel_encoder *intel_encoder, int ddc_bus);
 
+extern void intel_attach_force_audio_property(struct drm_connector *connector);
 extern void intel_attach_broadcast_rgb_property(struct drm_connector *connector);
 
 extern void intel_crt_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index f289b86..aa0a8e8 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -45,7 +45,6 @@
 	bool has_hdmi_sink;
 	bool has_audio;
 	int force_audio;
-	struct drm_property *force_audio_property;
 };
 
 static struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder)
@@ -194,7 +193,7 @@
 	if (mode->clock > 165000)
 		return MODE_CLOCK_HIGH;
 	if (mode->clock < 20000)
-		return MODE_CLOCK_HIGH;
+		return MODE_CLOCK_LOW;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
@@ -287,7 +286,7 @@
 	if (ret)
 		return ret;
 
-	if (property == intel_hdmi->force_audio_property) {
+	if (property == dev_priv->force_audio_property) {
 		int i = val;
 		bool has_audio;
 
@@ -365,16 +364,7 @@
 static void
 intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
-
-	intel_hdmi->force_audio_property =
-		drm_property_create(dev, DRM_MODE_PROP_RANGE, "force_audio", 2);
-	if (intel_hdmi->force_audio_property) {
-		intel_hdmi->force_audio_property->values[0] = -1;
-		intel_hdmi->force_audio_property->values[1] = 1;
-		drm_connector_attach_property(connector, intel_hdmi->force_audio_property, 0);
-	}
-
+	intel_attach_force_audio_property(connector);
 	intel_attach_broadcast_rgb_property(connector);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 67cb076..b28f7bd 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -727,6 +727,14 @@
 			DMI_MATCH(DMI_PRODUCT_NAME, "U800"),
 		},
 	},
+	{
+		.callback = intel_no_lvds_dmi_callback,
+		.ident = "Asus EeeBox PC EB1007",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "EB1007"),
+		},
+	},
 
 	{ }	/* terminating entry */
 };
diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c
index 9034dd8f..3b26a3b 100644
--- a/drivers/gpu/drm/i915/intel_modes.c
+++ b/drivers/gpu/drm/i915/intel_modes.c
@@ -81,6 +81,36 @@
 	return ret;
 }
 
+static const char *force_audio_names[] = {
+	"off",
+	"auto",
+	"on",
+};
+
+void
+intel_attach_force_audio_property(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_property *prop;
+	int i;
+
+	prop = dev_priv->force_audio_property;
+	if (prop == NULL) {
+		prop = drm_property_create(dev, DRM_MODE_PROP_ENUM,
+					   "audio",
+					   ARRAY_SIZE(force_audio_names));
+		if (prop == NULL)
+			return;
+
+		for (i = 0; i < ARRAY_SIZE(force_audio_names); i++)
+			drm_property_add_enum(prop, i, i-1, force_audio_names[i]);
+
+		dev_priv->force_audio_property = prop;
+	}
+	drm_connector_attach_property(connector, prop, 0);
+}
+
 static const char *broadcast_rgb_names[] = {
 	"Full",
 	"Limited 16:235",
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 754086f..30fe554 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -148,8 +148,6 @@
 	int   format_supported_num;
 	struct drm_property *tv_format;
 
-	struct drm_property *force_audio_property;
-
 	/* add the property for the SDVO-TV */
 	struct drm_property *left;
 	struct drm_property *right;
@@ -1712,7 +1710,7 @@
 	if (ret)
 		return ret;
 
-	if (property == intel_sdvo_connector->force_audio_property) {
+	if (property == dev_priv->force_audio_property) {
 		int i = val;
 		bool has_audio;
 
@@ -2037,15 +2035,7 @@
 {
 	struct drm_device *dev = connector->base.base.dev;
 
-	connector->force_audio_property =
-		drm_property_create(dev, DRM_MODE_PROP_RANGE, "force_audio", 2);
-	if (connector->force_audio_property) {
-		connector->force_audio_property->values[0] = -1;
-		connector->force_audio_property->values[1] = 1;
-		drm_connector_attach_property(&connector->base.base,
-					      connector->force_audio_property, 0);
-	}
-
+	intel_attach_force_audio_property(&connector->base.base);
 	if (INTEL_INFO(dev)->gen >= 4 && IS_MOBILE(dev))
 		intel_attach_broadcast_rgb_property(&connector->base.base);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_hw.c b/drivers/gpu/drm/nouveau/nouveau_hw.c
index 053edf9..ba896e5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hw.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hw.c
@@ -900,6 +900,7 @@
 	}
 	/* NV11 and NV20 don't have this, they stop at 0x52. */
 	if (nv_gf4_disp_arch(dev)) {
+		rd_cio_state(dev, head, regp, NV_CIO_CRE_42);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_53);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_54);
 
@@ -1003,6 +1004,7 @@
 			nouveau_wait_eq(dev, 650000000, NV_PRMCIO_INP0__COLOR, 0x8, 0x0);
 		}
 
+		wr_cio_state(dev, head, regp, NV_CIO_CRE_42);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_53);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_54);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 2960f58..5ee14d2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -397,7 +397,7 @@
 		if (pci_dma_supported(dev->pdev, DMA_BIT_MASK(40)))
 			dma_bits = 40;
 	} else
-	if (drm_pci_device_is_pcie(dev) &&
+	if (0 && drm_pci_device_is_pcie(dev) &&
 	    dev_priv->chipset  > 0x40 &&
 	    dev_priv->chipset != 0x45) {
 		if (pci_dma_supported(dev->pdev, DMA_BIT_MASK(39)))
@@ -868,7 +868,9 @@
 		nouveau_vm_unmap(&node->tmp_vma);
 		nouveau_vm_put(&node->tmp_vma);
 	}
+
 	mem->mm_node = NULL;
+	kfree(node);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index c77111e..82fad91 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -458,7 +458,7 @@
 		dev_priv->gart_info.type = NOUVEAU_GART_HW;
 		dev_priv->gart_info.func = &nv50_sgdma_backend;
 	} else
-	if (drm_pci_device_is_pcie(dev) &&
+	if (0 && drm_pci_device_is_pcie(dev) &&
 	    dev_priv->chipset > 0x40 && dev_priv->chipset != 0x45) {
 		if (nv44_graph_class(dev)) {
 			dev_priv->gart_info.func = &nv44_sgdma_backend;
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 38ea662..8021888 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -371,6 +371,7 @@
 		engine->vram.flags_valid	= nv50_vram_flags_valid;
 		break;
 	case 0xC0:
+	case 0xD0:
 		engine->instmem.init		= nvc0_instmem_init;
 		engine->instmem.takedown	= nvc0_instmem_takedown;
 		engine->instmem.suspend		= nvc0_instmem_suspend;
@@ -563,68 +564,68 @@
 	if (ret)
 		goto out_timer;
 
-	switch (dev_priv->card_type) {
-	case NV_04:
-		nv04_graph_create(dev);
-		break;
-	case NV_10:
-		nv10_graph_create(dev);
-		break;
-	case NV_20:
-	case NV_30:
-		nv20_graph_create(dev);
-		break;
-	case NV_40:
-		nv40_graph_create(dev);
-		break;
-	case NV_50:
-		nv50_graph_create(dev);
-		break;
-	case NV_C0:
-		nvc0_graph_create(dev);
-		break;
-	default:
-		break;
-	}
-
-	switch (dev_priv->chipset) {
-	case 0x84:
-	case 0x86:
-	case 0x92:
-	case 0x94:
-	case 0x96:
-	case 0xa0:
-		nv84_crypt_create(dev);
-		break;
-	}
-
-	switch (dev_priv->card_type) {
-	case NV_50:
-		switch (dev_priv->chipset) {
-		case 0xa3:
-		case 0xa5:
-		case 0xa8:
-		case 0xaf:
-			nva3_copy_create(dev);
+	if (!nouveau_noaccel) {
+		switch (dev_priv->card_type) {
+		case NV_04:
+			nv04_graph_create(dev);
+			break;
+		case NV_10:
+			nv10_graph_create(dev);
+			break;
+		case NV_20:
+		case NV_30:
+			nv20_graph_create(dev);
+			break;
+		case NV_40:
+			nv40_graph_create(dev);
+			break;
+		case NV_50:
+			nv50_graph_create(dev);
+			break;
+		case NV_C0:
+			nvc0_graph_create(dev);
+			break;
+		default:
 			break;
 		}
-		break;
-	case NV_C0:
-		nvc0_copy_create(dev, 0);
-		nvc0_copy_create(dev, 1);
-		break;
-	default:
-		break;
-	}
 
-	if (dev_priv->card_type == NV_40)
-		nv40_mpeg_create(dev);
-	else
-	if (dev_priv->card_type == NV_50 &&
-	    (dev_priv->chipset < 0x98 || dev_priv->chipset == 0xa0))
-		nv50_mpeg_create(dev);
+		switch (dev_priv->chipset) {
+		case 0x84:
+		case 0x86:
+		case 0x92:
+		case 0x94:
+		case 0x96:
+		case 0xa0:
+			nv84_crypt_create(dev);
+			break;
+		}
 
-	if (!nouveau_noaccel) {
+		switch (dev_priv->card_type) {
+		case NV_50:
+			switch (dev_priv->chipset) {
+			case 0xa3:
+			case 0xa5:
+			case 0xa8:
+			case 0xaf:
+				nva3_copy_create(dev);
+				break;
+			}
+			break;
+		case NV_C0:
+			nvc0_copy_create(dev, 0);
+			nvc0_copy_create(dev, 1);
+			break;
+		default:
+			break;
+		}
+
+		if (dev_priv->card_type == NV_40)
+			nv40_mpeg_create(dev);
+		else
+		if (dev_priv->card_type == NV_50 &&
+		    (dev_priv->chipset < 0x98 || dev_priv->chipset == 0xa0))
+			nv50_mpeg_create(dev);
+
 		for (e = 0; e < NVOBJ_ENGINE_NR; e++) {
 			if (dev_priv->eng[e]) {
 				ret = dev_priv->eng[e]->init(dev, e);
@@ -922,6 +923,7 @@
 		dev_priv->card_type = NV_50;
 		break;
 	case 0xc0:
+	case 0xd0:
 		dev_priv->card_type = NV_C0;
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.c b/drivers/gpu/drm/nouveau/nouveau_vm.c
index 0059e6f..519a6b4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vm.c
@@ -58,6 +58,7 @@
 			num -= len;
 			pte += len;
 			if (unlikely(end >= max)) {
+				phys += len << (bits + 12);
 				pde++;
 				pte = 0;
 			}
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index 3c78bc8..f1a3ae4 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -376,7 +376,10 @@
 	 */
 
 	/* framebuffer can be larger than crtc scanout area. */
-	regp->CRTC[NV_CIO_CRE_RPC0_INDEX] = XLATE(fb->pitch / 8, 8, NV_CIO_CRE_RPC0_OFFSET_10_8);
+	regp->CRTC[NV_CIO_CRE_RPC0_INDEX] =
+		XLATE(fb->pitch / 8, 8, NV_CIO_CRE_RPC0_OFFSET_10_8);
+	regp->CRTC[NV_CIO_CRE_42] =
+		XLATE(fb->pitch / 8, 11, NV_CIO_CRE_42_OFFSET_11);
 	regp->CRTC[NV_CIO_CRE_RPC1_INDEX] = mode->crtc_hdisplay < 1280 ?
 					    MASK(NV_CIO_CRE_RPC1_LARGE) : 0x00;
 	regp->CRTC[NV_CIO_CRE_LSR_INDEX] = XLATE(horizBlankEnd, 6, NV_CIO_CRE_LSR_HBE_6) |
@@ -824,8 +827,11 @@
 	regp->CRTC[NV_CIO_CR_OFFSET_INDEX] = drm_fb->pitch >> 3;
 	regp->CRTC[NV_CIO_CRE_RPC0_INDEX] =
 		XLATE(drm_fb->pitch >> 3, 8, NV_CIO_CRE_RPC0_OFFSET_10_8);
+	regp->CRTC[NV_CIO_CRE_42] =
+		XLATE(drm_fb->pitch / 8, 11, NV_CIO_CRE_42_OFFSET_11);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_RPC0_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CR_OFFSET_INDEX);
+	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_42);
 
 	/* Update the framebuffer location. */
 	regp->fb_start = nv_crtc->fb.offset & ~3;
diff --git a/drivers/gpu/drm/nouveau/nvreg.h b/drivers/gpu/drm/nouveau/nvreg.h
index fe0f253..bbfb1a6 100644
--- a/drivers/gpu/drm/nouveau/nvreg.h
+++ b/drivers/gpu/drm/nouveau/nvreg.h
@@ -277,6 +277,8 @@
 #		define NV_CIO_CRE_EBR_VDE_11		2:2
 #		define NV_CIO_CRE_EBR_VRS_11		4:4
 #		define NV_CIO_CRE_EBR_VBS_11		6:6
+#	define NV_CIO_CRE_42			0x42
+#		define NV_CIO_CRE_42_OFFSET_11		6:6
 #	define NV_CIO_CRE_43			0x43
 #	define NV_CIO_CRE_44			0x44	/* head control */
 #	define NV_CIO_CRE_CSB			0x45	/* colour saturation boost */
diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig
index 9746fee..ea92bbe 100644
--- a/drivers/gpu/drm/radeon/Kconfig
+++ b/drivers/gpu/drm/radeon/Kconfig
@@ -28,11 +28,4 @@
 	  The kernel will also perform security check on command stream
 	  provided by the user, we want to catch and forbid any illegal use
 	  of the GPU such as DMA into random system memory or into memory
-	  not owned by the process supplying the command stream. This part
-	  of the code is still incomplete and this why we propose that patch
-	  as a staging driver addition, future security might forbid current
-	  experimental userspace to run.
-
-	  This code support the following hardware : R1XX,R2XX,R3XX,R4XX,R5XX
-	  (radeon up to X1950). Works is underway to provide support for R6XX,
-	  R7XX and newer hardware (radeon from HD2XXX to HD4XXX).
+	  not owned by the process supplying the command stream.
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index ec84878..84a69e7 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1045,7 +1045,7 @@
 	uint64_t fb_location;
 	uint32_t fb_format, fb_pitch_pixels, tiling_flags;
 	u32 fb_swap = EVERGREEN_GRPH_ENDIAN_SWAP(EVERGREEN_GRPH_ENDIAN_NONE);
-	u32 tmp;
+	u32 tmp, viewport_w, viewport_h;
 	int r;
 
 	/* no fb bound */
@@ -1171,8 +1171,10 @@
 	y &= ~1;
 	WREG32(EVERGREEN_VIEWPORT_START + radeon_crtc->crtc_offset,
 	       (x << 16) | y);
+	viewport_w = crtc->mode.hdisplay;
+	viewport_h = (crtc->mode.vdisplay + 1) & ~1;
 	WREG32(EVERGREEN_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
-	       (crtc->mode.hdisplay << 16) | crtc->mode.vdisplay);
+	       (viewport_w << 16) | viewport_h);
 
 	/* pageflip setup */
 	/* make sure flip is at vb rather than hb */
@@ -1213,7 +1215,7 @@
 	uint64_t fb_location;
 	uint32_t fb_format, fb_pitch_pixels, tiling_flags;
 	u32 fb_swap = R600_D1GRPH_SWAP_ENDIAN_NONE;
-	u32 tmp;
+	u32 tmp, viewport_w, viewport_h;
 	int r;
 
 	/* no fb bound */
@@ -1338,8 +1340,10 @@
 	y &= ~1;
 	WREG32(AVIVO_D1MODE_VIEWPORT_START + radeon_crtc->crtc_offset,
 	       (x << 16) | y);
+	viewport_w = crtc->mode.hdisplay;
+	viewport_h = (crtc->mode.vdisplay + 1) & ~1;
 	WREG32(AVIVO_D1MODE_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
-	       (crtc->mode.hdisplay << 16) | crtc->mode.vdisplay);
+	       (viewport_w << 16) | viewport_h);
 
 	/* pageflip setup */
 	/* make sure flip is at vb rather than hb */
diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.c b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
index e148ab0..7b4eeb7 100644
--- a/drivers/gpu/drm/radeon/cayman_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
@@ -39,17 +39,335 @@
 
 const u32 cayman_default_state[] =
 {
-	/* XXX fill in additional blit state */
+	0xc0066900,
+	0x00000000,
+	0x00000060, /* DB_RENDER_CONTROL */
+	0x00000000, /* DB_COUNT_CONTROL */
+	0x00000000, /* DB_DEPTH_VIEW */
+	0x0000002a, /* DB_RENDER_OVERRIDE */
+	0x00000000, /* DB_RENDER_OVERRIDE2 */
+	0x00000000, /* DB_HTILE_DATA_BASE */
 
 	0xc0026900,
-	0x00000316,
-	0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
-	0x00000010, /*  */
+	0x0000000a,
+	0x00000000, /* DB_STENCIL_CLEAR */
+	0x00000000, /* DB_DEPTH_CLEAR */
+
+	0xc0036900,
+	0x0000000f,
+	0x00000000, /* DB_DEPTH_INFO */
+	0x00000000, /* DB_Z_INFO */
+	0x00000000, /* DB_STENCIL_INFO */
+
+	0xc0016900,
+	0x00000080,
+	0x00000000, /* PA_SC_WINDOW_OFFSET */
+
+	0xc00d6900,
+	0x00000083,
+	0x0000ffff, /* PA_SC_CLIPRECT_RULE */
+	0x00000000, /* PA_SC_CLIPRECT_0_TL */
+	0x20002000, /* PA_SC_CLIPRECT_0_BR */
+	0x00000000,
+	0x20002000,
+	0x00000000,
+	0x20002000,
+	0x00000000,
+	0x20002000,
+	0xaaaaaaaa, /* PA_SC_EDGERULE */
+	0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
+	0x0000000f, /* CB_TARGET_MASK */
+	0x0000000f, /* CB_SHADER_MASK */
+
+	0xc0226900,
+	0x00000094,
+	0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
+	0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x00000000, /* PA_SC_VPORT_ZMIN_0 */
+	0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
+
+	0xc0016900,
+	0x000000d4,
+	0x00000000, /* SX_MISC */
 
 	0xc0026900,
 	0x000000d9,
 	0x00000000, /* CP_RINGID */
 	0x00000000, /* CP_VMID */
+
+	0xc0096900,
+	0x00000100,
+	0x00ffffff, /* VGT_MAX_VTX_INDX */
+	0x00000000, /* VGT_MIN_VTX_INDX */
+	0x00000000, /* VGT_INDX_OFFSET */
+	0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
+	0x00000000, /* SX_ALPHA_TEST_CONTROL */
+	0x00000000, /* CB_BLEND_RED */
+	0x00000000, /* CB_BLEND_GREEN */
+	0x00000000, /* CB_BLEND_BLUE */
+	0x00000000, /* CB_BLEND_ALPHA */
+
+	0xc0016900,
+	0x00000187,
+	0x00000100, /* SPI_VS_OUT_ID_0 */
+
+	0xc0026900,
+	0x00000191,
+	0x00000100, /* SPI_PS_INPUT_CNTL_0 */
+	0x00000101, /* SPI_PS_INPUT_CNTL_1 */
+
+	0xc0016900,
+	0x000001b1,
+	0x00000000, /* SPI_VS_OUT_CONFIG */
+
+	0xc0106900,
+	0x000001b3,
+	0x20000001, /* SPI_PS_IN_CONTROL_0 */
+	0x00000000, /* SPI_PS_IN_CONTROL_1 */
+	0x00000000, /* SPI_INTERP_CONTROL_0 */
+	0x00000000, /* SPI_INPUT_Z */
+	0x00000000, /* SPI_FOG_CNTL */
+	0x00100000, /* SPI_BARYC_CNTL */
+	0x00000000, /* SPI_PS_IN_CONTROL_2 */
+	0x00000000, /* SPI_COMPUTE_INPUT_CNTL */
+	0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */
+	0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */
+	0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */
+	0x00000000, /* SPI_GPR_MGMT */
+	0x00000000, /* SPI_LDS_MGMT */
+	0x00000000, /* SPI_STACK_MGMT */
+	0x00000000, /* SPI_WAVE_MGMT_1 */
+	0x00000000, /* SPI_WAVE_MGMT_2 */
+
+	0xc0016900,
+	0x000001e0,
+	0x00000000, /* CB_BLEND0_CONTROL */
+
+	0xc00e6900,
+	0x00000200,
+	0x00000000, /* DB_DEPTH_CONTROL */
+	0x00000000, /* DB_EQAA */
+	0x00cc0010, /* CB_COLOR_CONTROL */
+	0x00000210, /* DB_SHADER_CONTROL */
+	0x00010000, /* PA_CL_CLIP_CNTL */
+	0x00000004, /* PA_SU_SC_MODE_CNTL */
+	0x00000100, /* PA_CL_VTE_CNTL */
+	0x00000000, /* PA_CL_VS_OUT_CNTL */
+	0x00000000, /* PA_CL_NANINF_CNTL */
+	0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
+	0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
+	0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
+	0x00000000, /*  */
+	0x00000000, /*  */
+
+	0xc0026900,
+	0x00000229,
+	0x00000000, /* SQ_PGM_START_FS */
+	0x00000000,
+
+	0xc0016900,
+	0x0000023b,
+	0x00000000, /* SQ_LDS_ALLOC_PS */
+
+	0xc0066900,
+	0x00000240,
+	0x00000000, /* SQ_ESGS_RING_ITEMSIZE */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+
+	0xc0046900,
+	0x00000247,
+	0x00000000, /* SQ_GS_VERT_ITEMSIZE */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+
+	0xc0116900,
+	0x00000280,
+	0x00000000, /* PA_SU_POINT_SIZE */
+	0x00000000, /* PA_SU_POINT_MINMAX */
+	0x00000008, /* PA_SU_LINE_CNTL */
+	0x00000000, /* PA_SC_LINE_STIPPLE */
+	0x00000000, /* VGT_OUTPUT_PATH_CNTL */
+	0x00000000, /* VGT_HOS_CNTL */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000, /* VGT_GS_MODE */
+
+	0xc0026900,
+	0x00000292,
+	0x00000000, /* PA_SC_MODE_CNTL_0 */
+	0x00000000, /* PA_SC_MODE_CNTL_1 */
+
+	0xc0016900,
+	0x000002a1,
+	0x00000000, /* VGT_PRIMITIVEID_EN */
+
+	0xc0016900,
+	0x000002a5,
+	0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
+
+	0xc0026900,
+	0x000002a8,
+	0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
+	0x00000000,
+
+	0xc0026900,
+	0x000002ad,
+	0x00000000, /* VGT_REUSE_OFF */
+	0x00000000,
+
+	0xc0016900,
+	0x000002d5,
+	0x00000000, /* VGT_SHADER_STAGES_EN */
+
+	0xc0016900,
+	0x000002dc,
+	0x0000aa00, /* DB_ALPHA_TO_MASK */
+
+	0xc0066900,
+	0x000002de,
+	0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+
+	0xc0026900,
+	0x000002e5,
+	0x00000000, /* VGT_STRMOUT_CONFIG */
+	0x00000000,
+
+	0xc01b6900,
+	0x000002f5,
+	0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
+	0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
+	0x00000000, /* PA_SC_LINE_CNTL */
+	0x00000000, /* PA_SC_AA_CONFIG */
+	0x00000005, /* PA_SU_VTX_CNTL */
+	0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
+	0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
+	0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
+	0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
+	0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
+	0xffffffff,
+
+	0xc0026900,
+	0x00000316,
+	0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+	0x00000010, /*  */
 };
 
+const u32 cayman_vs[] =
+{
+	0x00000004,
+	0x80400400,
+	0x0000a03c,
+	0x95000688,
+	0x00004000,
+	0x15000688,
+	0x00000000,
+	0x88000000,
+	0x04000000,
+	0x67961001,
+#ifdef __BIG_ENDIAN
+	0x00020000,
+#else
+	0x00000000,
+#endif
+	0x00000000,
+	0x04000000,
+	0x67961000,
+#ifdef __BIG_ENDIAN
+	0x00020008,
+#else
+	0x00000008,
+#endif
+	0x00000000,
+};
+
+const u32 cayman_ps[] =
+{
+	0x00000004,
+	0xa00c0000,
+	0x00000008,
+	0x80400000,
+	0x00000000,
+	0x95000688,
+	0x00000000,
+	0x88000000,
+	0x00380400,
+	0x00146b10,
+	0x00380000,
+	0x20146b10,
+	0x00380400,
+	0x40146b00,
+	0x80380000,
+	0x60146b00,
+	0x00000010,
+	0x000d1000,
+	0xb0800000,
+	0x00000000,
+};
+
+const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps);
+const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs);
 const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.h b/drivers/gpu/drm/radeon/cayman_blit_shaders.h
index 33b75e5d..f5d0e9a 100644
--- a/drivers/gpu/drm/radeon/cayman_blit_shaders.h
+++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.h
@@ -25,8 +25,11 @@
 #ifndef CAYMAN_BLIT_SHADERS_H
 #define CAYMAN_BLIT_SHADERS_H
 
+extern const u32 cayman_ps[];
+extern const u32 cayman_vs[];
 extern const u32 cayman_default_state[];
 
+extern const u32 cayman_ps_size, cayman_vs_size;
 extern const u32 cayman_default_size;
 
 #endif
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 7c37638..98ea597 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -88,21 +88,39 @@
 /* get temperature in millidegrees */
 int evergreen_get_temp(struct radeon_device *rdev)
 {
-	u32 temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >>
-		ASIC_T_SHIFT;
-	u32 actual_temp = 0;
+	u32 temp, toffset, actual_temp = 0;
 
-	if (temp & 0x400)
-		actual_temp = -256;
-	else if (temp & 0x200)
-		actual_temp = 255;
-	else if (temp & 0x100) {
-		actual_temp = temp & 0x1ff;
-		actual_temp |= ~0x1ff;
-	} else
-		actual_temp = temp & 0xff;
+	if (rdev->family == CHIP_JUNIPER) {
+		toffset = (RREG32(CG_THERMAL_CTRL) & TOFFSET_MASK) >>
+			TOFFSET_SHIFT;
+		temp = (RREG32(CG_TS0_STATUS) & TS0_ADC_DOUT_MASK) >>
+			TS0_ADC_DOUT_SHIFT;
 
-	return (actual_temp * 1000) / 2;
+		if (toffset & 0x100)
+			actual_temp = temp / 2 - (0x200 - toffset);
+		else
+			actual_temp = temp / 2 + toffset;
+
+		actual_temp = actual_temp * 1000;
+
+	} else {
+		temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >>
+			ASIC_T_SHIFT;
+
+		if (temp & 0x400)
+			actual_temp = -256;
+		else if (temp & 0x200)
+			actual_temp = 255;
+		else if (temp & 0x100) {
+			actual_temp = temp & 0x1ff;
+			actual_temp |= ~0x1ff;
+		} else
+			actual_temp = temp & 0xff;
+
+		actual_temp = (actual_temp * 1000) / 2;
+	}
+
+	return actual_temp;
 }
 
 int sumo_get_temp(struct radeon_device *rdev)
@@ -1415,6 +1433,8 @@
 	case CHIP_CEDAR:
 	case CHIP_REDWOOD:
 	case CHIP_PALM:
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
 	case CHIP_TURKS:
 	case CHIP_CAICOS:
 		force_no_swizzle = false;
@@ -1544,6 +1564,8 @@
 	case CHIP_REDWOOD:
 	case CHIP_CEDAR:
 	case CHIP_PALM:
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
 	case CHIP_TURKS:
 	case CHIP_CAICOS:
 	default:
@@ -1689,6 +1711,54 @@
 		rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
 		rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
 		break;
+	case CHIP_SUMO:
+		rdev->config.evergreen.num_ses = 1;
+		rdev->config.evergreen.max_pipes = 4;
+		rdev->config.evergreen.max_tile_pipes = 2;
+		if (rdev->pdev->device == 0x9648)
+			rdev->config.evergreen.max_simds = 3;
+		else if ((rdev->pdev->device == 0x9647) ||
+			 (rdev->pdev->device == 0x964a))
+			rdev->config.evergreen.max_simds = 4;
+		else
+			rdev->config.evergreen.max_simds = 5;
+		rdev->config.evergreen.max_backends = 2 * rdev->config.evergreen.num_ses;
+		rdev->config.evergreen.max_gprs = 256;
+		rdev->config.evergreen.max_threads = 248;
+		rdev->config.evergreen.max_gs_threads = 32;
+		rdev->config.evergreen.max_stack_entries = 256;
+		rdev->config.evergreen.sx_num_of_sets = 4;
+		rdev->config.evergreen.sx_max_export_size = 256;
+		rdev->config.evergreen.sx_max_export_pos_size = 64;
+		rdev->config.evergreen.sx_max_export_smx_size = 192;
+		rdev->config.evergreen.max_hw_contexts = 8;
+		rdev->config.evergreen.sq_num_cf_insts = 2;
+
+		rdev->config.evergreen.sc_prim_fifo_size = 0x40;
+		rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
+		break;
+	case CHIP_SUMO2:
+		rdev->config.evergreen.num_ses = 1;
+		rdev->config.evergreen.max_pipes = 4;
+		rdev->config.evergreen.max_tile_pipes = 4;
+		rdev->config.evergreen.max_simds = 2;
+		rdev->config.evergreen.max_backends = 1 * rdev->config.evergreen.num_ses;
+		rdev->config.evergreen.max_gprs = 256;
+		rdev->config.evergreen.max_threads = 248;
+		rdev->config.evergreen.max_gs_threads = 32;
+		rdev->config.evergreen.max_stack_entries = 512;
+		rdev->config.evergreen.sx_num_of_sets = 4;
+		rdev->config.evergreen.sx_max_export_size = 256;
+		rdev->config.evergreen.sx_max_export_pos_size = 64;
+		rdev->config.evergreen.sx_max_export_smx_size = 192;
+		rdev->config.evergreen.max_hw_contexts = 8;
+		rdev->config.evergreen.sq_num_cf_insts = 2;
+
+		rdev->config.evergreen.sc_prim_fifo_size = 0x40;
+		rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
+		break;
 	case CHIP_BARTS:
 		rdev->config.evergreen.num_ses = 2;
 		rdev->config.evergreen.max_pipes = 4;
@@ -2039,6 +2109,8 @@
 	switch (rdev->family) {
 	case CHIP_CEDAR:
 	case CHIP_PALM:
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
 	case CHIP_CAICOS:
 		/* no vertex cache */
 		sq_config &= ~VC_ENABLE;
@@ -2060,6 +2132,8 @@
 	switch (rdev->family) {
 	case CHIP_CEDAR:
 	case CHIP_PALM:
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
 		ps_thread_count = 96;
 		break;
 	default:
@@ -2099,6 +2173,8 @@
 	switch (rdev->family) {
 	case CHIP_CEDAR:
 	case CHIP_PALM:
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
 	case CHIP_CAICOS:
 		vgt_cache_invalidation = CACHE_INVALIDATION(TC_ONLY);
 		break;
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index ba06a69..57f3bc1 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -31,6 +31,7 @@
 
 #include "evergreend.h"
 #include "evergreen_blit_shaders.h"
+#include "cayman_blit_shaders.h"
 
 #define DI_PT_RECTLIST        0x11
 #define DI_INDEX_SIZE_16_BIT  0x0
@@ -152,6 +153,8 @@
 
 	if ((rdev->family == CHIP_CEDAR) ||
 	    (rdev->family == CHIP_PALM) ||
+	    (rdev->family == CHIP_SUMO) ||
+	    (rdev->family == CHIP_SUMO2) ||
 	    (rdev->family == CHIP_CAICOS))
 		cp_set_surface_sync(rdev,
 				    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
@@ -199,6 +202,16 @@
 set_scissors(struct radeon_device *rdev, int x1, int y1,
 	     int x2, int y2)
 {
+	/* workaround some hw bugs */
+	if (x2 == 0)
+		x1 = 1;
+	if (y2 == 0)
+		y1 = 1;
+	if (rdev->family == CHIP_CAYMAN) {
+		if ((x2 == 1) && (y2 == 1))
+			x2 = 2;
+	}
+
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 	radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
 	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
@@ -255,238 +268,284 @@
 	u64 gpu_addr;
 	int dwords;
 
-	switch (rdev->family) {
-	case CHIP_CEDAR:
-	default:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 96;
-		num_vs_threads = 16;
-		num_gs_threads = 16;
-		num_es_threads = 16;
-		num_hs_threads = 16;
-		num_ls_threads = 16;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	case CHIP_REDWOOD:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	case CHIP_JUNIPER:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 85;
-		num_vs_stack_entries = 85;
-		num_gs_stack_entries = 85;
-		num_es_stack_entries = 85;
-		num_hs_stack_entries = 85;
-		num_ls_stack_entries = 85;
-		break;
-	case CHIP_CYPRESS:
-	case CHIP_HEMLOCK:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 85;
-		num_vs_stack_entries = 85;
-		num_gs_stack_entries = 85;
-		num_es_stack_entries = 85;
-		num_hs_stack_entries = 85;
-		num_ls_stack_entries = 85;
-		break;
-	case CHIP_PALM:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 96;
-		num_vs_threads = 16;
-		num_gs_threads = 16;
-		num_es_threads = 16;
-		num_hs_threads = 16;
-		num_ls_threads = 16;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	case CHIP_BARTS:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 85;
-		num_vs_stack_entries = 85;
-		num_gs_stack_entries = 85;
-		num_es_stack_entries = 85;
-		num_hs_stack_entries = 85;
-		num_ls_stack_entries = 85;
-		break;
-	case CHIP_TURKS:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	case CHIP_CAICOS:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 10;
-		num_gs_threads = 10;
-		num_es_threads = 10;
-		num_hs_threads = 10;
-		num_ls_threads = 10;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	}
-
-	if ((rdev->family == CHIP_CEDAR) ||
-	    (rdev->family == CHIP_PALM) ||
-	    (rdev->family == CHIP_CAICOS))
-		sq_config = 0;
-	else
-		sq_config = VC_ENABLE;
-
-	sq_config |= (EXPORT_SRC_C |
-		      CS_PRIO(0) |
-		      LS_PRIO(0) |
-		      HS_PRIO(0) |
-		      PS_PRIO(0) |
-		      VS_PRIO(1) |
-		      GS_PRIO(2) |
-		      ES_PRIO(3));
-
-	sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
-				  NUM_VS_GPRS(num_vs_gprs) |
-				  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
-	sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
-				  NUM_ES_GPRS(num_es_gprs));
-	sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
-				  NUM_LS_GPRS(num_ls_gprs));
-	sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
-				   NUM_VS_THREADS(num_vs_threads) |
-				   NUM_GS_THREADS(num_gs_threads) |
-				   NUM_ES_THREADS(num_es_threads));
-	sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
-				     NUM_LS_THREADS(num_ls_threads));
-	sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
-				    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
-	sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
-				    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
-	sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
-				    NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
-
 	/* set clear context state */
 	radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
 	radeon_ring_write(rdev, 0);
 
-	/* disable dyn gprs */
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
-	radeon_ring_write(rdev, 0);
+	if (rdev->family < CHIP_CAYMAN) {
+		switch (rdev->family) {
+		case CHIP_CEDAR:
+		default:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 96;
+			num_vs_threads = 16;
+			num_gs_threads = 16;
+			num_es_threads = 16;
+			num_hs_threads = 16;
+			num_ls_threads = 16;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		case CHIP_REDWOOD:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		case CHIP_JUNIPER:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 85;
+			num_vs_stack_entries = 85;
+			num_gs_stack_entries = 85;
+			num_es_stack_entries = 85;
+			num_hs_stack_entries = 85;
+			num_ls_stack_entries = 85;
+			break;
+		case CHIP_CYPRESS:
+		case CHIP_HEMLOCK:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 85;
+			num_vs_stack_entries = 85;
+			num_gs_stack_entries = 85;
+			num_es_stack_entries = 85;
+			num_hs_stack_entries = 85;
+			num_ls_stack_entries = 85;
+			break;
+		case CHIP_PALM:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 96;
+			num_vs_threads = 16;
+			num_gs_threads = 16;
+			num_es_threads = 16;
+			num_hs_threads = 16;
+			num_ls_threads = 16;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		case CHIP_SUMO:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 96;
+			num_vs_threads = 25;
+			num_gs_threads = 25;
+			num_es_threads = 25;
+			num_hs_threads = 25;
+			num_ls_threads = 25;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		case CHIP_SUMO2:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 96;
+			num_vs_threads = 25;
+			num_gs_threads = 25;
+			num_es_threads = 25;
+			num_hs_threads = 25;
+			num_ls_threads = 25;
+			num_ps_stack_entries = 85;
+			num_vs_stack_entries = 85;
+			num_gs_stack_entries = 85;
+			num_es_stack_entries = 85;
+			num_hs_stack_entries = 85;
+			num_ls_stack_entries = 85;
+			break;
+		case CHIP_BARTS:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 85;
+			num_vs_stack_entries = 85;
+			num_gs_stack_entries = 85;
+			num_es_stack_entries = 85;
+			num_hs_stack_entries = 85;
+			num_ls_stack_entries = 85;
+			break;
+		case CHIP_TURKS:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		case CHIP_CAICOS:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 10;
+			num_gs_threads = 10;
+			num_es_threads = 10;
+			num_hs_threads = 10;
+			num_ls_threads = 10;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		}
 
-	/* SQ config */
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
-	radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
-	radeon_ring_write(rdev, sq_config);
-	radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
-	radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
-	radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, sq_thread_resource_mgmt);
-	radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
-	radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
-	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
-	radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
+		if ((rdev->family == CHIP_CEDAR) ||
+		    (rdev->family == CHIP_PALM) ||
+		    (rdev->family == CHIP_SUMO) ||
+		    (rdev->family == CHIP_SUMO2) ||
+		    (rdev->family == CHIP_CAICOS))
+			sq_config = 0;
+		else
+			sq_config = VC_ENABLE;
+
+		sq_config |= (EXPORT_SRC_C |
+			      CS_PRIO(0) |
+			      LS_PRIO(0) |
+			      HS_PRIO(0) |
+			      PS_PRIO(0) |
+			      VS_PRIO(1) |
+			      GS_PRIO(2) |
+			      ES_PRIO(3));
+
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
+					  NUM_VS_GPRS(num_vs_gprs) |
+					  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
+					  NUM_ES_GPRS(num_es_gprs));
+		sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
+					  NUM_LS_GPRS(num_ls_gprs));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
+					   NUM_VS_THREADS(num_vs_threads) |
+					   NUM_GS_THREADS(num_gs_threads) |
+					   NUM_ES_THREADS(num_es_threads));
+		sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
+					     NUM_LS_THREADS(num_ls_threads));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+					    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+					    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+		sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
+					    NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
+
+		/* disable dyn gprs */
+		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(rdev, 0);
+
+		/* SQ config */
+		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
+		radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(rdev, sq_config);
+		radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
+		radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
+		radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
+		radeon_ring_write(rdev, 0);
+		radeon_ring_write(rdev, 0);
+		radeon_ring_write(rdev, sq_thread_resource_mgmt);
+		radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
+		radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
+		radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
+		radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
+	}
 
 	/* CONTEXT_CONTROL */
 	radeon_ring_write(rdev, 0xc0012800);
@@ -560,7 +619,10 @@
 	mutex_init(&rdev->r600_blit.mutex);
 	rdev->r600_blit.state_offset = 0;
 
-	rdev->r600_blit.state_len = evergreen_default_size;
+	if (rdev->family < CHIP_CAYMAN)
+		rdev->r600_blit.state_len = evergreen_default_size;
+	else
+		rdev->r600_blit.state_len = cayman_default_size;
 
 	dwords = rdev->r600_blit.state_len;
 	while (dwords & 0xf) {
@@ -572,11 +634,17 @@
 	obj_size = ALIGN(obj_size, 256);
 
 	rdev->r600_blit.vs_offset = obj_size;
-	obj_size += evergreen_vs_size * 4;
+	if (rdev->family < CHIP_CAYMAN)
+		obj_size += evergreen_vs_size * 4;
+	else
+		obj_size += cayman_vs_size * 4;
 	obj_size = ALIGN(obj_size, 256);
 
 	rdev->r600_blit.ps_offset = obj_size;
-	obj_size += evergreen_ps_size * 4;
+	if (rdev->family < CHIP_CAYMAN)
+		obj_size += evergreen_ps_size * 4;
+	else
+		obj_size += cayman_ps_size * 4;
 	obj_size = ALIGN(obj_size, 256);
 
 	r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
@@ -599,16 +667,29 @@
 		return r;
 	}
 
-	memcpy_toio(ptr + rdev->r600_blit.state_offset,
-		    evergreen_default_state, rdev->r600_blit.state_len * 4);
+	if (rdev->family < CHIP_CAYMAN) {
+		memcpy_toio(ptr + rdev->r600_blit.state_offset,
+			    evergreen_default_state, rdev->r600_blit.state_len * 4);
 
-	if (num_packet2s)
-		memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
-			    packet2s, num_packet2s * 4);
-	for (i = 0; i < evergreen_vs_size; i++)
-		*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
-	for (i = 0; i < evergreen_ps_size; i++)
-		*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
+		if (num_packet2s)
+			memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
+				    packet2s, num_packet2s * 4);
+		for (i = 0; i < evergreen_vs_size; i++)
+			*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
+		for (i = 0; i < evergreen_ps_size; i++)
+			*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
+	} else {
+		memcpy_toio(ptr + rdev->r600_blit.state_offset,
+			    cayman_default_state, rdev->r600_blit.state_len * 4);
+
+		if (num_packet2s)
+			memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
+				    packet2s, num_packet2s * 4);
+		for (i = 0; i < cayman_vs_size; i++)
+			*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
+		for (i = 0; i < cayman_ps_size; i++)
+			*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
+	}
 	radeon_bo_kunmap(rdev->r600_blit.shader_obj);
 	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index f37e91e..1636e34 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -168,10 +168,16 @@
 #define		SE_DB_BUSY					(1 << 30)
 #define		SE_CB_BUSY					(1 << 31)
 /* evergreen */
+#define	CG_THERMAL_CTRL					0x72c
+#define		TOFFSET_MASK			        0x00003FE0
+#define		TOFFSET_SHIFT			        5
 #define	CG_MULT_THERMAL_STATUS				0x740
 #define		ASIC_T(x)			        ((x) << 16)
-#define		ASIC_T_MASK			        0x7FF0000
+#define		ASIC_T_MASK			        0x07FF0000
 #define		ASIC_T_SHIFT			        16
+#define	CG_TS0_STATUS					0x760
+#define		TS0_ADC_DOUT_MASK			0x000003FF
+#define		TS0_ADC_DOUT_SHIFT			0
 /* APU */
 #define	CG_THERMAL_STATUS			        0x678
 
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index b205ba1..16caafe 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1387,14 +1387,12 @@
 		return r;
 	cayman_gpu_init(rdev);
 
-#if 0
-	r = cayman_blit_init(rdev);
+	r = evergreen_blit_init(rdev);
 	if (r) {
-		cayman_blit_fini(rdev);
+		evergreen_blit_fini(rdev);
 		rdev->asic->copy = NULL;
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
-#endif
 
 	/* allocate wb buffer */
 	r = radeon_wb_init(rdev);
@@ -1452,7 +1450,7 @@
 
 int cayman_suspend(struct radeon_device *rdev)
 {
-	/* int r; */
+	int r;
 
 	/* FIXME: we should wait for ring to be empty */
 	cayman_cp_enable(rdev, false);
@@ -1461,14 +1459,13 @@
 	radeon_wb_disable(rdev);
 	cayman_pcie_gart_disable(rdev);
 
-#if 0
 	/* unpin shaders bo */
 	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 	if (likely(r == 0)) {
 		radeon_bo_unpin(rdev->r600_blit.shader_obj);
 		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 	}
-#endif
+
 	return 0;
 }
 
@@ -1580,7 +1577,7 @@
 
 void cayman_fini(struct radeon_device *rdev)
 {
-	/* cayman_blit_fini(rdev); */
+	evergreen_blit_fini(rdev);
 	cayman_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 6f27593..d74d4d7 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -87,6 +87,10 @@
 MODULE_FIRMWARE("radeon/PALM_pfp.bin");
 MODULE_FIRMWARE("radeon/PALM_me.bin");
 MODULE_FIRMWARE("radeon/SUMO_rlc.bin");
+MODULE_FIRMWARE("radeon/SUMO_pfp.bin");
+MODULE_FIRMWARE("radeon/SUMO_me.bin");
+MODULE_FIRMWARE("radeon/SUMO2_pfp.bin");
+MODULE_FIRMWARE("radeon/SUMO2_me.bin");
 
 int r600_debugfs_mc_info_init(struct radeon_device *rdev);
 
@@ -2024,6 +2028,14 @@
 		chip_name = "PALM";
 		rlc_chip_name = "SUMO";
 		break;
+	case CHIP_SUMO:
+		chip_name = "SUMO";
+		rlc_chip_name = "SUMO";
+		break;
+	case CHIP_SUMO2:
+		chip_name = "SUMO2";
+		rlc_chip_name = "SUMO";
+		break;
 	default: BUG();
 	}
 
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index fd18be9..909bda8 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -71,20 +71,21 @@
 	u64			db_bo_mc;
 };
 
-#define FMT_8_BIT(fmt, vc) [fmt] = { 1, 1, 1, vc }
-#define FMT_16_BIT(fmt, vc) [fmt] = { 1, 1, 2, vc }
-#define FMT_24_BIT(fmt) [fmt] = { 1, 1, 3, 0 }
-#define FMT_32_BIT(fmt, vc) [fmt] = { 1, 1, 4, vc }
-#define FMT_48_BIT(fmt) [fmt] = { 1, 1, 6, 0 }
-#define FMT_64_BIT(fmt, vc) [fmt] = { 1, 1, 8, vc }
-#define FMT_96_BIT(fmt) [fmt] = { 1, 1, 12, 0 }
-#define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16, vc }
+#define FMT_8_BIT(fmt, vc)   [fmt] = { 1, 1, 1, vc, CHIP_R600 }
+#define FMT_16_BIT(fmt, vc)  [fmt] = { 1, 1, 2, vc, CHIP_R600 }
+#define FMT_24_BIT(fmt)      [fmt] = { 1, 1, 3,  0, CHIP_R600 }
+#define FMT_32_BIT(fmt, vc)  [fmt] = { 1, 1, 4, vc, CHIP_R600 }
+#define FMT_48_BIT(fmt)      [fmt] = { 1, 1, 6,  0, CHIP_R600 }
+#define FMT_64_BIT(fmt, vc)  [fmt] = { 1, 1, 8, vc, CHIP_R600 }
+#define FMT_96_BIT(fmt)      [fmt] = { 1, 1, 12, 0, CHIP_R600 }
+#define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16,vc, CHIP_R600 }
 
 struct gpu_formats {
 	unsigned blockwidth;
 	unsigned blockheight;
 	unsigned blocksize;
 	unsigned valid_color;
+	enum radeon_family min_family;
 };
 
 static const struct gpu_formats color_formats_table[] = {
@@ -154,7 +155,11 @@
 	[V_038004_FMT_BC3] = { 4, 4, 16, 0 },
 	[V_038004_FMT_BC4] = { 4, 4, 8, 0 },
 	[V_038004_FMT_BC5] = { 4, 4, 16, 0},
+	[V_038004_FMT_BC6] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
+	[V_038004_FMT_BC7] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
 
+	/* The other Evergreen formats */
+	[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
 };
 
 static inline bool fmt_is_valid_color(u32 format)
@@ -168,11 +173,14 @@
 	return false;
 }
 
-static inline bool fmt_is_valid_texture(u32 format)
+static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
 	
+	if (family < color_formats_table[format].min_family)
+		return false;
+
 	if (color_formats_table[format].blockwidth > 0)
 		return true;
 
@@ -1325,7 +1333,7 @@
 		return -EINVAL;
 	}
 	format = G_038004_DATA_FORMAT(word1);
-	if (!fmt_is_valid_texture(format)) {
+	if (!fmt_is_valid_texture(format, p->family)) {
 		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
 			 __func__, __LINE__, format);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index b2b944b..f140a0d 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -1309,6 +1309,9 @@
 #define     V_038004_FMT_BC3                           0x00000033
 #define     V_038004_FMT_BC4                           0x00000034
 #define     V_038004_FMT_BC5                           0x00000035
+#define     V_038004_FMT_BC6                           0x00000036
+#define     V_038004_FMT_BC7                           0x00000037
+#define     V_038004_FMT_32_AS_32_32_32_32             0x00000038
 #define R_038010_SQ_TEX_RESOURCE_WORD4_0             0x038010
 #define   S_038010_FORMAT_COMP_X(x)                    (((x) & 0x3) << 0)
 #define   G_038010_FORMAT_COMP_X(x)                    (((x) >> 0) & 0x3)
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index d948265..9bd162f 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -906,9 +906,9 @@
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = NULL,
-	.copy_dma = NULL,
-	.copy = NULL,
+	.copy_blit = &evergreen_copy_blit,
+	.copy_dma = &evergreen_copy_blit,
+	.copy = &evergreen_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -1020,6 +1020,8 @@
 		rdev->asic = &evergreen_asic;
 		break;
 	case CHIP_PALM:
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
 		rdev->asic = &sumo_asic;
 		break;
 	case CHIP_BARTS:
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 5b61364..d77ede3 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -82,6 +82,8 @@
 	"CYPRESS",
 	"HEMLOCK",
 	"PALM",
+	"SUMO",
+	"SUMO2",
 	"BARTS",
 	"TURKS",
 	"CAICOS",
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index ae247ee..292f73f 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -264,6 +264,8 @@
 		radeon_bo_unreserve(work->old_rbo);
 	} else
 		DRM_ERROR("failed to reserve buffer after flip\n");
+
+	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
 	kfree(work);
 }
 
@@ -371,6 +373,8 @@
 	new_radeon_fb = to_radeon_framebuffer(fb);
 	/* schedule unpin of the old buffer */
 	obj = old_radeon_fb->obj;
+	/* take a reference to the old object */
+	drm_gem_object_reference(obj);
 	rbo = gem_to_radeon_bo(obj);
 	work->old_rbo = rbo;
 	INIT_WORK(&work->work, radeon_unpin_work_func);
@@ -378,12 +382,9 @@
 	/* We borrow the event spin lock for protecting unpin_work */
 	spin_lock_irqsave(&dev->event_lock, flags);
 	if (radeon_crtc->unpin_work) {
-		spin_unlock_irqrestore(&dev->event_lock, flags);
-		kfree(work);
-		radeon_fence_unref(&fence);
-
 		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
-		return -EBUSY;
+		r = -EBUSY;
+		goto unlock_free;
 	}
 	radeon_crtc->unpin_work = work;
 	radeon_crtc->deferred_flip_completion = 0;
@@ -497,6 +498,8 @@
 pflip_cleanup:
 	spin_lock_irqsave(&dev->event_lock, flags);
 	radeon_crtc->unpin_work = NULL;
+unlock_free:
+	drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 	radeon_fence_unref(&fence);
 	kfree(work);
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 1b55755..03f124d 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -954,10 +954,15 @@
 	int dp_lane_count = 0;
 	int connector_object_id = 0;
 	int igp_lane_info = 0;
+	int dig_encoder = dig->dig_encoder;
 
-	if (action == ATOM_TRANSMITTER_ACTION_INIT)
+	if (action == ATOM_TRANSMITTER_ACTION_INIT) {
 		connector = radeon_get_connector_for_encoder_init(encoder);
-	else
+		/* just needed to avoid bailing in the encoder check.  the encoder
+		 * isn't used for init
+		 */
+		dig_encoder = 0;
+	} else
 		connector = radeon_get_connector_for_encoder(encoder);
 
 	if (connector) {
@@ -973,7 +978,7 @@
 	}
 
 	/* no dig encoder assigned */
-	if (dig->dig_encoder == -1)
+	if (dig_encoder == -1)
 		return;
 
 	if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_DP)
@@ -1023,7 +1028,7 @@
 
 		if (dig->linkb)
 			args.v3.acConfig.ucLinkSel = 1;
-		if (dig->dig_encoder & 1)
+		if (dig_encoder & 1)
 			args.v3.acConfig.ucEncoderSel = 1;
 
 		/* Select the PLL for the PHY
@@ -1073,7 +1078,7 @@
 				args.v3.acConfig.fDualLinkConnector = 1;
 		}
 	} else if (ASIC_IS_DCE32(rdev)) {
-		args.v2.acConfig.ucEncoderSel = dig->dig_encoder;
+		args.v2.acConfig.ucEncoderSel = dig_encoder;
 		if (dig->linkb)
 			args.v2.acConfig.ucLinkSel = 1;
 
@@ -1100,7 +1105,7 @@
 	} else {
 		args.v1.ucConfig = ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL;
 
-		if (dig->dig_encoder)
+		if (dig_encoder)
 			args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG2_ENCODER;
 		else
 			args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
diff --git a/drivers/gpu/drm/radeon/radeon_family.h b/drivers/gpu/drm/radeon/radeon_family.h
index 6f1d9e5..ec2f1ea 100644
--- a/drivers/gpu/drm/radeon/radeon_family.h
+++ b/drivers/gpu/drm/radeon/radeon_family.h
@@ -81,6 +81,8 @@
 	CHIP_CYPRESS,
 	CHIP_HEMLOCK,
 	CHIP_PALM,
+	CHIP_SUMO,
+	CHIP_SUMO2,
 	CHIP_BARTS,
 	CHIP_TURKS,
 	CHIP_CAICOS,
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 86eda1e..aaa19dc 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -487,6 +487,7 @@
 	case THERMAL_TYPE_RV6XX:
 	case THERMAL_TYPE_RV770:
 	case THERMAL_TYPE_EVERGREEN:
+	case THERMAL_TYPE_NI:
 	case THERMAL_TYPE_SUMO:
 		rdev->pm.int_hwmon_dev = hwmon_device_register(rdev->dev);
 		if (IS_ERR(rdev->pm.int_hwmon_dev)) {
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600
index 92f1900..ea49752 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/r600
+++ b/drivers/gpu/drm/radeon/reg_srcs/r600
@@ -758,6 +758,5 @@
 0x00009714 VC_ENHANCE
 0x00009830 DB_DEBUG
 0x00009838 DB_WATERMARKS
-0x00028D28 DB_SRESULTS_COMPARE_STATE0
 0x00028D44 DB_ALPHA_TO_MASK
 0x00009700 VC_CNTL
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 259ece0..5b2e215 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -435,6 +435,9 @@
 		reg = regulator_get(host->dev, "vmmc_aux");
 		host->vcc_aux = IS_ERR(reg) ? NULL : reg;
 
+		/* For eMMC do not power off when not in sleep state */
+		if (mmc_slot(host).no_regulator_off_init)
+			return 0;
 		/*
 		* UGLY HACK:  workaround regulator framework bugs.
 		* When the bootloader leaves a supply active, it's
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 55e8f72..570d4da 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -416,7 +416,7 @@
 
 	/* special handling for no target buffer empty */
 	if ((!q->is_input_q &&
-	    (q->sbal[q->first_to_check]->element[15].flags & 0xff) == 0x10)) {
+	    (q->sbal[q->first_to_check]->element[15].sflags) == 0x10)) {
 		qperf_inc(q, target_full);
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "OUTFULL FTC:%02x",
 			      q->first_to_check);
@@ -427,8 +427,8 @@
 	DBF_ERROR((q->is_input_q) ? "IN:%2d" : "OUT:%2d", q->nr);
 	DBF_ERROR("FTC:%3d C:%3d", q->first_to_check, count);
 	DBF_ERROR("F14:%2x F15:%2x",
-		  q->sbal[q->first_to_check]->element[14].flags & 0xff,
-		  q->sbal[q->first_to_check]->element[15].flags & 0xff);
+		  q->sbal[q->first_to_check]->element[14].sflags,
+		  q->sbal[q->first_to_check]->element[15].sflags);
 
 	/*
 	 * Interrupts may be avoided as long as the error is present
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 55c6aa1..d3cee33 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -361,7 +361,7 @@
 
 static inline int qeth_is_last_sbale(struct qdio_buffer_element *sbale)
 {
-	return (sbale->flags & SBAL_FLAGS_LAST_ENTRY);
+	return (sbale->eflags & SBAL_EFLAGS_LAST_ENTRY);
 }
 
 enum qeth_qdio_buffer_states {
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 503678a..dd08f7b 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -890,7 +890,7 @@
 	struct sk_buff *skb;
 
 	/* is PCI flag set on buffer? */
-	if (buf->buffer->element[0].flags & 0x40)
+	if (buf->buffer->element[0].sflags & SBAL_SFLAGS0_PCI_REQ)
 		atomic_dec(&queue->set_pci_flags_count);
 
 	skb = skb_dequeue(&buf->skb_list);
@@ -906,9 +906,11 @@
 		buf->is_header[i] = 0;
 		buf->buffer->element[i].length = 0;
 		buf->buffer->element[i].addr = NULL;
-		buf->buffer->element[i].flags = 0;
+		buf->buffer->element[i].eflags = 0;
+		buf->buffer->element[i].sflags = 0;
 	}
-	buf->buffer->element[15].flags = 0;
+	buf->buffer->element[15].eflags = 0;
+	buf->buffer->element[15].sflags = 0;
 	buf->next_element_to_fill = 0;
 	atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY);
 }
@@ -2368,9 +2370,10 @@
 		buf->buffer->element[i].length = PAGE_SIZE;
 		buf->buffer->element[i].addr =  pool_entry->elements[i];
 		if (i == QETH_MAX_BUFFER_ELEMENTS(card) - 1)
-			buf->buffer->element[i].flags = SBAL_FLAGS_LAST_ENTRY;
+			buf->buffer->element[i].eflags = SBAL_EFLAGS_LAST_ENTRY;
 		else
-			buf->buffer->element[i].flags = 0;
+			buf->buffer->element[i].eflags = 0;
+		buf->buffer->element[i].sflags = 0;
 	}
 	return 0;
 }
@@ -2718,11 +2721,11 @@
 	if (qdio_error) {
 		QETH_CARD_TEXT(card, 2, dbftext);
 		QETH_CARD_TEXT_(card, 2, " F15=%02X",
-			       buf->element[15].flags & 0xff);
+			       buf->element[15].sflags);
 		QETH_CARD_TEXT_(card, 2, " F14=%02X",
-			       buf->element[14].flags & 0xff);
+			       buf->element[14].sflags);
 		QETH_CARD_TEXT_(card, 2, " qerr=%X", qdio_error);
-		if ((buf->element[15].flags & 0xff) == 0x12) {
+		if ((buf->element[15].sflags) == 0x12) {
 			card->stats.rx_dropped++;
 			return 0;
 		} else
@@ -2798,7 +2801,7 @@
 static int qeth_handle_send_error(struct qeth_card *card,
 		struct qeth_qdio_out_buffer *buffer, unsigned int qdio_err)
 {
-	int sbalf15 = buffer->buffer->element[15].flags & 0xff;
+	int sbalf15 = buffer->buffer->element[15].sflags;
 
 	QETH_CARD_TEXT(card, 6, "hdsnderr");
 	if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -2907,8 +2910,8 @@
 
 	for (i = index; i < index + count; ++i) {
 		buf = &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q];
-		buf->buffer->element[buf->next_element_to_fill - 1].flags |=
-				SBAL_FLAGS_LAST_ENTRY;
+		buf->buffer->element[buf->next_element_to_fill - 1].eflags |=
+				SBAL_EFLAGS_LAST_ENTRY;
 
 		if (queue->card->info.type == QETH_CARD_TYPE_IQD)
 			continue;
@@ -2921,7 +2924,7 @@
 				/* it's likely that we'll go to packing
 				 * mode soon */
 				atomic_inc(&queue->set_pci_flags_count);
-				buf->buffer->element[0].flags |= 0x40;
+				buf->buffer->element[0].sflags |= SBAL_SFLAGS0_PCI_REQ;
 			}
 		} else {
 			if (!atomic_read(&queue->set_pci_flags_count)) {
@@ -2934,7 +2937,7 @@
 				 * further send was requested by the stack
 				 */
 				atomic_inc(&queue->set_pci_flags_count);
-				buf->buffer->element[0].flags |= 0x40;
+				buf->buffer->element[0].sflags |= SBAL_SFLAGS0_PCI_REQ;
 			}
 		}
 	}
@@ -3180,20 +3183,20 @@
 		if (!length) {
 			if (first_lap)
 				if (skb_shinfo(skb)->nr_frags)
-					buffer->element[element].flags =
-						SBAL_FLAGS_FIRST_FRAG;
+					buffer->element[element].eflags =
+						SBAL_EFLAGS_FIRST_FRAG;
 				else
-					buffer->element[element].flags = 0;
+					buffer->element[element].eflags = 0;
 			else
-				buffer->element[element].flags =
-				    SBAL_FLAGS_MIDDLE_FRAG;
+				buffer->element[element].eflags =
+				    SBAL_EFLAGS_MIDDLE_FRAG;
 		} else {
 			if (first_lap)
-				buffer->element[element].flags =
-				    SBAL_FLAGS_FIRST_FRAG;
+				buffer->element[element].eflags =
+				    SBAL_EFLAGS_FIRST_FRAG;
 			else
-				buffer->element[element].flags =
-				    SBAL_FLAGS_MIDDLE_FRAG;
+				buffer->element[element].eflags =
+				    SBAL_EFLAGS_MIDDLE_FRAG;
 		}
 		data += length_here;
 		element++;
@@ -3205,12 +3208,12 @@
 		buffer->element[element].addr = (char *)page_to_phys(frag->page)
 			+ frag->page_offset;
 		buffer->element[element].length = frag->size;
-		buffer->element[element].flags = SBAL_FLAGS_MIDDLE_FRAG;
+		buffer->element[element].eflags = SBAL_EFLAGS_MIDDLE_FRAG;
 		element++;
 	}
 
-	if (buffer->element[element - 1].flags)
-		buffer->element[element - 1].flags = SBAL_FLAGS_LAST_FRAG;
+	if (buffer->element[element - 1].eflags)
+		buffer->element[element - 1].eflags = SBAL_EFLAGS_LAST_FRAG;
 	*next_element_to_fill = element;
 }
 
@@ -3234,7 +3237,7 @@
 		/*fill first buffer entry only with header information */
 		buffer->element[element].addr = skb->data;
 		buffer->element[element].length = hdr_len;
-		buffer->element[element].flags = SBAL_FLAGS_FIRST_FRAG;
+		buffer->element[element].eflags = SBAL_EFLAGS_FIRST_FRAG;
 		buf->next_element_to_fill++;
 		skb->data += hdr_len;
 		skb->len  -= hdr_len;
@@ -3246,7 +3249,7 @@
 		buffer->element[element].addr = hdr;
 		buffer->element[element].length = sizeof(struct qeth_hdr) +
 							hd_len;
-		buffer->element[element].flags = SBAL_FLAGS_FIRST_FRAG;
+		buffer->element[element].eflags = SBAL_EFLAGS_FIRST_FRAG;
 		buf->is_header[element] = 1;
 		buf->next_element_to_fill++;
 	}
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 8512b5c..022fb6a 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -640,7 +640,7 @@
 }
 
 static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio,
-						u32 fsf_cmd, u32 sbtype,
+						u32 fsf_cmd, u8 sbtype,
 						mempool_t *pool)
 {
 	struct zfcp_adapter *adapter = qdio->adapter;
@@ -841,7 +841,7 @@
 	if (zfcp_qdio_sbal_get(qdio))
 		goto out;
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_ABORT_FCP_CMND,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  qdio->adapter->pool.scsi_abort);
 	if (IS_ERR(req)) {
 		req = NULL;
@@ -1012,7 +1012,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_SEND_GENERIC,
-				  SBAL_FLAGS0_TYPE_WRITE_READ, pool);
+				  SBAL_SFLAGS0_TYPE_WRITE_READ, pool);
 
 	if (IS_ERR(req)) {
 		ret = PTR_ERR(req);
@@ -1110,7 +1110,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_SEND_ELS,
-				  SBAL_FLAGS0_TYPE_WRITE_READ, NULL);
+				  SBAL_SFLAGS0_TYPE_WRITE_READ, NULL);
 
 	if (IS_ERR(req)) {
 		ret = PTR_ERR(req);
@@ -1156,7 +1156,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_EXCHANGE_CONFIG_DATA,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  qdio->adapter->pool.erp_req);
 
 	if (IS_ERR(req)) {
@@ -1198,7 +1198,7 @@
 		goto out_unlock;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_EXCHANGE_CONFIG_DATA,
-				  SBAL_FLAGS0_TYPE_READ, NULL);
+				  SBAL_SFLAGS0_TYPE_READ, NULL);
 
 	if (IS_ERR(req)) {
 		retval = PTR_ERR(req);
@@ -1250,7 +1250,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_EXCHANGE_PORT_DATA,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  qdio->adapter->pool.erp_req);
 
 	if (IS_ERR(req)) {
@@ -1296,7 +1296,7 @@
 		goto out_unlock;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_EXCHANGE_PORT_DATA,
-				  SBAL_FLAGS0_TYPE_READ, NULL);
+				  SBAL_SFLAGS0_TYPE_READ, NULL);
 
 	if (IS_ERR(req)) {
 		retval = PTR_ERR(req);
@@ -1412,7 +1412,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_OPEN_PORT_WITH_DID,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  qdio->adapter->pool.erp_req);
 
 	if (IS_ERR(req)) {
@@ -1478,7 +1478,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_CLOSE_PORT,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  qdio->adapter->pool.erp_req);
 
 	if (IS_ERR(req)) {
@@ -1553,7 +1553,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_OPEN_PORT_WITH_DID,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  qdio->adapter->pool.erp_req);
 
 	if (IS_ERR(req)) {
@@ -1606,7 +1606,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_CLOSE_PORT,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  qdio->adapter->pool.erp_req);
 
 	if (IS_ERR(req)) {
@@ -1698,7 +1698,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_CLOSE_PHYSICAL_PORT,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  qdio->adapter->pool.erp_req);
 
 	if (IS_ERR(req)) {
@@ -1812,7 +1812,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_OPEN_LUN,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  adapter->pool.erp_req);
 
 	if (IS_ERR(req)) {
@@ -1901,7 +1901,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_CLOSE_LUN,
-				  SBAL_FLAGS0_TYPE_READ,
+				  SBAL_SFLAGS0_TYPE_READ,
 				  qdio->adapter->pool.erp_req);
 
 	if (IS_ERR(req)) {
@@ -2161,7 +2161,7 @@
 {
 	struct zfcp_fsf_req *req;
 	struct fcp_cmnd *fcp_cmnd;
-	unsigned int sbtype = SBAL_FLAGS0_TYPE_READ;
+	u8 sbtype = SBAL_SFLAGS0_TYPE_READ;
 	int real_bytes, retval = -EIO, dix_bytes = 0;
 	struct scsi_device *sdev = scsi_cmnd->device;
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
@@ -2181,7 +2181,7 @@
 	}
 
 	if (scsi_cmnd->sc_data_direction == DMA_TO_DEVICE)
-		sbtype = SBAL_FLAGS0_TYPE_WRITE;
+		sbtype = SBAL_SFLAGS0_TYPE_WRITE;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_FCP_CMND,
 				  sbtype, adapter->pool.scsi_req);
@@ -2280,7 +2280,7 @@
 		goto out;
 
 	req = zfcp_fsf_req_create(qdio, FSF_QTCB_FCP_CMND,
-				  SBAL_FLAGS0_TYPE_WRITE,
+				  SBAL_SFLAGS0_TYPE_WRITE,
 				  qdio->adapter->pool.scsi_req);
 
 	if (IS_ERR(req)) {
@@ -2328,17 +2328,18 @@
 	struct zfcp_qdio *qdio = adapter->qdio;
 	struct zfcp_fsf_req *req = NULL;
 	struct fsf_qtcb_bottom_support *bottom;
-	int direction, retval = -EIO, bytes;
+	int retval = -EIO, bytes;
+	u8 direction;
 
 	if (!(adapter->adapter_features & FSF_FEATURE_CFDC))
 		return ERR_PTR(-EOPNOTSUPP);
 
 	switch (fsf_cfdc->command) {
 	case FSF_QTCB_DOWNLOAD_CONTROL_FILE:
-		direction = SBAL_FLAGS0_TYPE_WRITE;
+		direction = SBAL_SFLAGS0_TYPE_WRITE;
 		break;
 	case FSF_QTCB_UPLOAD_CONTROL_FILE:
-		direction = SBAL_FLAGS0_TYPE_READ;
+		direction = SBAL_SFLAGS0_TYPE_READ;
 		break;
 	default:
 		return ERR_PTR(-EINVAL);
@@ -2413,7 +2414,7 @@
 		fsf_req->qdio_req.sbal_response = sbal_idx;
 		zfcp_fsf_req_complete(fsf_req);
 
-		if (likely(sbale->flags & SBAL_FLAGS_LAST_ENTRY))
+		if (likely(sbale->eflags & SBAL_EFLAGS_LAST_ENTRY))
 			break;
 	}
 }
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 98e97d9..d9c40ea 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -124,7 +124,7 @@
 
 	/* set last entry flag in current SBALE of current SBAL */
 	sbale = zfcp_qdio_sbale_curr(qdio, q_req);
-	sbale->flags |= SBAL_FLAGS_LAST_ENTRY;
+	sbale->eflags |= SBAL_EFLAGS_LAST_ENTRY;
 
 	/* don't exceed last allowed SBAL */
 	if (q_req->sbal_last == q_req->sbal_limit)
@@ -132,7 +132,7 @@
 
 	/* set chaining flag in first SBALE of current SBAL */
 	sbale = zfcp_qdio_sbale_req(qdio, q_req);
-	sbale->flags |= SBAL_FLAGS0_MORE_SBALS;
+	sbale->sflags |= SBAL_SFLAGS0_MORE_SBALS;
 
 	/* calculate index of next SBAL */
 	q_req->sbal_last++;
@@ -147,7 +147,7 @@
 
 	/* set storage-block type for new SBAL */
 	sbale = zfcp_qdio_sbale_curr(qdio, q_req);
-	sbale->flags |= q_req->sbtype;
+	sbale->sflags |= q_req->sbtype;
 
 	return sbale;
 }
@@ -177,7 +177,7 @@
 
 	/* set storage-block type for this request */
 	sbale = zfcp_qdio_sbale_req(qdio, q_req);
-	sbale->flags |= q_req->sbtype;
+	sbale->sflags |= q_req->sbtype;
 
 	for (; sg; sg = sg_next(sg)) {
 		sbale = zfcp_qdio_sbale_next(qdio, q_req);
@@ -384,7 +384,8 @@
 	for (cc = 0; cc < QDIO_MAX_BUFFERS_PER_Q; cc++) {
 		sbale = &(qdio->res_q[cc]->element[0]);
 		sbale->length = 0;
-		sbale->flags = SBAL_FLAGS_LAST_ENTRY;
+		sbale->eflags = SBAL_EFLAGS_LAST_ENTRY;
+		sbale->sflags = 0;
 		sbale->addr = NULL;
 	}
 
diff --git a/drivers/s390/scsi/zfcp_qdio.h b/drivers/s390/scsi/zfcp_qdio.h
index 2297d8d..54e22ac 100644
--- a/drivers/s390/scsi/zfcp_qdio.h
+++ b/drivers/s390/scsi/zfcp_qdio.h
@@ -67,7 +67,7 @@
  * @qdio_outb_usage: usage of outbound queue
  */
 struct zfcp_qdio_req {
-	u32	sbtype;
+	u8	sbtype;
 	u8	sbal_number;
 	u8	sbal_first;
 	u8	sbal_last;
@@ -116,7 +116,7 @@
  */
 static inline
 void zfcp_qdio_req_init(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req,
-			unsigned long req_id, u32 sbtype, void *data, u32 len)
+			unsigned long req_id, u8 sbtype, void *data, u32 len)
 {
 	struct qdio_buffer_element *sbale;
 	int count = min(atomic_read(&qdio->req_q_free),
@@ -131,7 +131,8 @@
 
 	sbale = zfcp_qdio_sbale_req(qdio, q_req);
 	sbale->addr = (void *) req_id;
-	sbale->flags = SBAL_FLAGS0_COMMAND | sbtype;
+	sbale->eflags = 0;
+	sbale->sflags = SBAL_SFLAGS0_COMMAND | sbtype;
 
 	if (unlikely(!data))
 		return;
@@ -173,7 +174,7 @@
 	struct qdio_buffer_element *sbale;
 
 	sbale = zfcp_qdio_sbale_curr(qdio, q_req);
-	sbale->flags |= SBAL_FLAGS_LAST_ENTRY;
+	sbale->eflags |= SBAL_EFLAGS_LAST_ENTRY;
 }
 
 /**
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 1b125c2..2278dad 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -389,7 +389,6 @@
 	mutex_unlock(&inode->i_mutex);
 	if (!error)
 		d_delete(dentry);
-	dput(dentry);
 	return error;
 }
 
diff --git a/drivers/video/arcfb.c b/drivers/video/arcfb.c
index 3ec4923..c22e8d3 100644
--- a/drivers/video/arcfb.c
+++ b/drivers/video/arcfb.c
@@ -515,11 +515,10 @@
 
 	/* We need a flat backing store for the Arc's
 	   less-flat actual paged framebuffer */
-	if (!(videomemory = vmalloc(videomemorysize)))
+	videomemory = vzalloc(videomemorysize);
+	if (!videomemory)
 		return retval;
 
-	memset(videomemory, 0, videomemorysize);
-
 	info = framebuffer_alloc(sizeof(struct arcfb_par), &dev->dev);
 	if (!info)
 		goto err;
diff --git a/drivers/video/bf537-lq035.c b/drivers/video/bf537-lq035.c
index 47c21fb..bea53c1 100644
--- a/drivers/video/bf537-lq035.c
+++ b/drivers/video/bf537-lq035.c
@@ -789,6 +789,7 @@
 	i2c_add_driver(&ad5280_driver);
 
 	memset(&props, 0, sizeof(props));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = MAX_BRIGHENESS;
 	bl_dev = backlight_device_register("bf537-bl", NULL, NULL,
 					   &bfin_lq035fb_bl_ops, &props);
diff --git a/drivers/video/broadsheetfb.c b/drivers/video/broadsheetfb.c
index ebda687..377dde3 100644
--- a/drivers/video/broadsheetfb.c
+++ b/drivers/video/broadsheetfb.c
@@ -1101,12 +1101,10 @@
 
 	videomemorysize = roundup((dpyw*dpyh), PAGE_SIZE);
 
-	videomemory = vmalloc(videomemorysize);
+	videomemory = vzalloc(videomemorysize);
 	if (!videomemory)
 		goto err_fb_rel;
 
-	memset(videomemory, 0, videomemorysize);
-
 	info->screen_base = (char *)videomemory;
 	info->fbops = &broadsheetfb_ops;
 
diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c
index fb20584..69c49df 100644
--- a/drivers/video/efifb.c
+++ b/drivers/video/efifb.c
@@ -16,6 +16,8 @@
 #include <linux/pci.h>
 #include <video/vga.h>
 
+static bool request_mem_succeeded = false;
+
 static struct fb_var_screeninfo efifb_defined __devinitdata = {
 	.activate		= FB_ACTIVATE_NOW,
 	.height			= -1,
@@ -281,7 +283,9 @@
 {
 	if (info->screen_base)
 		iounmap(info->screen_base);
-	release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
+	if (request_mem_succeeded)
+		release_mem_region(info->apertures->ranges[0].base,
+				   info->apertures->ranges[0].size);
 	framebuffer_release(info);
 }
 
@@ -326,14 +330,13 @@
 	return 0;
 }
 
-static int __devinit efifb_probe(struct platform_device *dev)
+static int __init efifb_probe(struct platform_device *dev)
 {
 	struct fb_info *info;
 	int err;
 	unsigned int size_vmode;
 	unsigned int size_remap;
 	unsigned int size_total;
-	int request_succeeded = 0;
 
 	if (!screen_info.lfb_depth)
 		screen_info.lfb_depth = 32;
@@ -387,7 +390,7 @@
 	efifb_fix.smem_len = size_remap;
 
 	if (request_mem_region(efifb_fix.smem_start, size_remap, "efifb")) {
-		request_succeeded = 1;
+		request_mem_succeeded = true;
 	} else {
 		/* We cannot make this fatal. Sometimes this comes from magic
 		   spaces our resource handlers simply don't know about */
@@ -413,7 +416,7 @@
 	info->apertures->ranges[0].base = efifb_fix.smem_start;
 	info->apertures->ranges[0].size = size_remap;
 
-	info->screen_base = ioremap(efifb_fix.smem_start, efifb_fix.smem_len);
+	info->screen_base = ioremap_wc(efifb_fix.smem_start, efifb_fix.smem_len);
 	if (!info->screen_base) {
 		printk(KERN_ERR "efifb: abort, cannot ioremap video memory "
 				"0x%x @ 0x%lx\n",
@@ -491,13 +494,12 @@
 err_release_fb:
 	framebuffer_release(info);
 err_release_mem:
-	if (request_succeeded)
+	if (request_mem_succeeded)
 		release_mem_region(efifb_fix.smem_start, size_total);
 	return err;
 }
 
 static struct platform_driver efifb_driver = {
-	.probe	= efifb_probe,
 	.driver	= {
 		.name	= "efifb",
 	},
@@ -528,13 +530,21 @@
 	if (!screen_info.lfb_linelength)
 		return -ENODEV;
 
-	ret = platform_driver_register(&efifb_driver);
+	ret = platform_device_register(&efifb_device);
+	if (ret)
+		return ret;
 
-	if (!ret) {
-		ret = platform_device_register(&efifb_device);
-		if (ret)
-			platform_driver_unregister(&efifb_driver);
+	/*
+	 * This is not just an optimization.  We will interfere
+	 * with a real driver if we get reprobed, so don't allow
+	 * it.
+	 */
+	ret = platform_driver_probe(&efifb_driver, efifb_probe);
+	if (ret) {
+		platform_device_unregister(&efifb_driver);
+		return ret;
 	}
+
 	return ret;
 }
 module_init(efifb_init);
diff --git a/drivers/video/hecubafb.c b/drivers/video/hecubafb.c
index 1b94643..fbef15f 100644
--- a/drivers/video/hecubafb.c
+++ b/drivers/video/hecubafb.c
@@ -231,11 +231,10 @@
 
 	videomemorysize = (DPY_W*DPY_H)/8;
 
-	if (!(videomemory = vmalloc(videomemorysize)))
+	videomemory = vzalloc(videomemorysize);
+	if (!videomemory)
 		return retval;
 
-	memset(videomemory, 0, videomemorysize);
-
 	info = framebuffer_alloc(sizeof(struct hecubafb_par), &dev->dev);
 	if (!info)
 		goto err_fballoc;
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index d2ccfd6..f135dbe 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -856,10 +856,10 @@
 		dma_free_writecombine(&pdev->dev,fbi->map_size,fbi->map_cpu,
 			fbi->map_dma);
 failed_map:
-	clk_put(fbi->clk);
-failed_getclock:
 	iounmap(fbi->regs);
 failed_ioremap:
+	clk_put(fbi->clk);
+failed_getclock:
 	release_mem_region(res->start, resource_size(res));
 failed_req:
 	kfree(info->pseudo_palette);
diff --git a/drivers/video/metronomefb.c b/drivers/video/metronomefb.c
index ed64edf..97d45e5 100644
--- a/drivers/video/metronomefb.c
+++ b/drivers/video/metronomefb.c
@@ -628,12 +628,10 @@
 	/* we need to add a spare page because our csum caching scheme walks
 	 * to the end of the page */
 	videomemorysize = PAGE_SIZE + (fw * fh);
-	videomemory = vmalloc(videomemorysize);
+	videomemory = vzalloc(videomemorysize);
 	if (!videomemory)
 		goto err_fb_rel;
 
-	memset(videomemory, 0, videomemorysize);
-
 	info->screen_base = (char __force __iomem *)videomemory;
 	info->fbops = &metronomefb_ops;
 
diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c
index 48c3ea8..cb175fe 100644
--- a/drivers/video/modedb.c
+++ b/drivers/video/modedb.c
@@ -1128,3 +1128,4 @@
 EXPORT_SYMBOL(fb_find_nearest_mode);
 EXPORT_SYMBOL(fb_videomode_to_modelist);
 EXPORT_SYMBOL(fb_find_mode);
+EXPORT_SYMBOL(fb_find_mode_cvt);
diff --git a/drivers/video/pxa168fb.c b/drivers/video/pxa168fb.c
index 35f61dd..bb95ec5 100644
--- a/drivers/video/pxa168fb.c
+++ b/drivers/video/pxa168fb.c
@@ -623,19 +623,21 @@
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
 		dev_err(&pdev->dev, "no IO memory defined\n");
-		return -ENOENT;
+		ret = -ENOENT;
+		goto failed_put_clk;
 	}
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		dev_err(&pdev->dev, "no IRQ defined\n");
-		return -ENOENT;
+		ret = -ENOENT;
+		goto failed_put_clk;
 	}
 
 	info = framebuffer_alloc(sizeof(struct pxa168fb_info), &pdev->dev);
 	if (info == NULL) {
-		clk_put(clk);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto failed_put_clk;
 	}
 
 	/* Initialize private data */
@@ -671,7 +673,7 @@
 	fbi->reg_base = ioremap_nocache(res->start, resource_size(res));
 	if (fbi->reg_base == NULL) {
 		ret = -ENOMEM;
-		goto failed;
+		goto failed_free_info;
 	}
 
 	/*
@@ -683,7 +685,7 @@
 						&fbi->fb_start_dma, GFP_KERNEL);
 	if (info->screen_base == NULL) {
 		ret = -ENOMEM;
-		goto failed;
+		goto failed_free_info;
 	}
 
 	info->fix.smem_start = (unsigned long)fbi->fb_start_dma;
@@ -772,8 +774,9 @@
 failed_free_fbmem:
 	dma_free_coherent(fbi->dev, info->fix.smem_len,
 			info->screen_base, fbi->fb_start_dma);
-failed:
+failed_free_info:
 	kfree(info);
+failed_put_clk:
 	clk_put(clk);
 
 	dev_err(&pdev->dev, "frame buffer device init failed with %d\n", ret);
diff --git a/drivers/video/savage/savagefb_driver.c b/drivers/video/savage/savagefb_driver.c
index 3b7f2f5..4de541c 100644
--- a/drivers/video/savage/savagefb_driver.c
+++ b/drivers/video/savage/savagefb_driver.c
@@ -2237,6 +2237,22 @@
 				 &info->modelist);
 #endif
 	info->var = savagefb_var800x600x8;
+	/* if a panel was detected, default to a CVT mode instead */
+	if (par->SavagePanelWidth) {
+		struct fb_videomode cvt_mode;
+
+		memset(&cvt_mode, 0, sizeof(cvt_mode));
+		cvt_mode.xres = par->SavagePanelWidth;
+		cvt_mode.yres = par->SavagePanelHeight;
+		cvt_mode.refresh = 60;
+		/* FIXME: if we know there is only the panel
+		 * we can enable reduced blanking as well */
+		if (fb_find_mode_cvt(&cvt_mode, 0, 0))
+			printk(KERN_WARNING "No CVT mode found for panel\n");
+		else if (fb_find_mode(&info->var, info, NULL, NULL, 0,
+				      &cvt_mode, 0) != 3)
+			info->var = savagefb_var800x600x8;
+	}
 
 	if (mode_option) {
 		fb_find_mode(&info->var, info, mode_option,
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index 404c03b..019dbd3 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -470,7 +470,7 @@
 	unsigned long tmp;
 	int bpp = 0;
 	unsigned long ldddsr;
-	int k, m;
+	int k, m, ret;
 
 	/* enable clocks before accessing the hardware */
 	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
@@ -540,7 +540,7 @@
 
 		board_cfg = &ch->cfg.board_cfg;
 		if (board_cfg->setup_sys) {
-			int ret = board_cfg->setup_sys(board_cfg->board_data,
+			ret = board_cfg->setup_sys(board_cfg->board_data,
 						ch, &sh_mobile_lcdc_sys_bus_ops);
 			if (ret)
 				return ret;
diff --git a/drivers/video/vga16fb.c b/drivers/video/vga16fb.c
index 53b2c5a..305c975 100644
--- a/drivers/video/vga16fb.c
+++ b/drivers/video/vga16fb.c
@@ -1265,9 +1265,11 @@
 
 static void vga16fb_destroy(struct fb_info *info)
 {
+	struct platform_device *dev = container_of(info->device, struct platform_device, dev);
 	iounmap(info->screen_base);
 	fb_dealloc_cmap(&info->cmap);
 	/* XXX unshare VGA regions */
+	platform_set_drvdata(dev, NULL);
 	framebuffer_release(info);
 }
 
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index a20218c..beac52f 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -395,10 +395,9 @@
 	spin_lock_init(&info->dirty_lock);
 	spin_lock_init(&info->resize_lock);
 
-	info->fb = vmalloc(fb_size);
+	info->fb = vzalloc(fb_size);
 	if (info->fb == NULL)
 		goto error_nomem;
-	memset(info->fb, 0, fb_size);
 
 	info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 117e74e..0bb4ebb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -825,7 +825,7 @@
 	} else {
 		char b[BDEVNAME_SIZE];
 
-		s->s_flags = flags;
+		s->s_flags = flags | MS_NOSEC;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		error = btrfs_fill_super(s, fs_devices, data,
 					 flags & MS_SILENT ? 1 : 0);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7257752..7018e1d 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -102,7 +102,7 @@
 		if (attr & ATTR_SYS)
 			inode->i_flags |= S_IMMUTABLE;
 		else
-			inode->i_flags &= S_IMMUTABLE;
+			inode->i_flags &= ~S_IMMUTABLE;
 	}
 
 	fat_save_attrs(inode, attr);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cc6ec4b..38f84cd 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -921,6 +921,8 @@
 	if (sb->s_flags & MS_MANDLOCK)
 		goto err;
 
+	sb->s_flags &= ~MS_NOSEC;
+
 	if (!parse_fuse_opt((char *) data, &d, is_bdev))
 		goto err;
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2792a79..1c1336e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -663,14 +663,19 @@
 		drop_ref = 1;
 	}
 	spin_lock(&gl->gl_spin);
-	if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+	if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
 	    gl->gl_state != LM_ST_UNLOCKED &&
 	    gl->gl_demote_state != LM_ST_EXCLUSIVE) {
 		unsigned long holdtime, now = jiffies;
+
 		holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
 		if (time_before(now, holdtime))
 			delay = holdtime - now;
-		set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags);
+
+		if (!delay) {
+			clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
+			set_bit(GLF_DEMOTE, &gl->gl_flags);
+		}
 	}
 	run_queue(gl, 0);
 	spin_unlock(&gl->gl_spin);
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 278e3fb..583636f 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1123,7 +1123,7 @@
 	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
 				 log);
 	if (IS_ERR(bdev)) {
-		rc = -PTR_ERR(bdev);
+		rc = PTR_ERR(bdev);
 		goto free;
 	}
 
diff --git a/fs/namei.c b/fs/namei.c
index e2e4e8d..9802345 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2624,6 +2624,10 @@
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
 		goto exit2;
+	if (!dentry->d_inode) {
+		error = -ENOENT;
+		goto exit3;
+	}
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto exit3;
@@ -2709,11 +2713,10 @@
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		/* Why not before? Because we want correct error value */
-		if (nd.last.name[nd.last.len])
-			goto slashes;
 		inode = dentry->d_inode;
-		if (inode)
-			ihold(inode);
+		if (nd.last.name[nd.last.len] || !inode)
+			goto slashes;
+		ihold(inode);
 		error = mnt_want_write(nd.path.mnt);
 		if (error)
 			goto exit2;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index cdbaf5e..56f6102 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1072,7 +1072,7 @@
 
 	sb->s_magic = OCFS2_SUPER_MAGIC;
 
-	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+	sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) |
 		((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
 	/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
diff --git a/fs/super.c b/fs/super.c
index c755939..ab3d672 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -822,7 +822,7 @@
 	} else {
 		char b[BDEVNAME_SIZE];
 
-		s->s_flags = flags;
+		s->s_flags = flags | MS_NOSEC;
 		s->s_mode = mode;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(bdev));
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index f04b2a3..e08f344 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -467,6 +467,17 @@
 	{0x1002, 0x9614, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9615, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9616, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9640, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9641, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9642, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9643, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9644, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
+	{0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
+	{0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x964e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
+	{0x1002, 0x964f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
 	{0x1002, 0x9710, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9711, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9712, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c55d6b7..646a183 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -208,6 +208,7 @@
 #define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
 #define MS_I_VERSION	(1<<23) /* Update inode I_version field */
 #define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
+#define MS_NOSEC	(1<<28)
 #define MS_BORN		(1<<29)
 #define MS_ACTIVE	(1<<30)
 #define MS_NOUSER	(1<<31)
@@ -2591,7 +2592,7 @@
 
 static inline void inode_has_no_xattr(struct inode *inode)
 {
-	if (!is_sxid(inode->i_mode))
+	if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC))
 		inode->i_flags |= S_NOSEC;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2a8621c..a837b20 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1063,6 +1063,7 @@
  */
 #define WF_SYNC		0x01		/* waker goes to sleep after wakup */
 #define WF_FORK		0x02		/* child wakeup after fork */
+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
 
 #define ENQUEUE_WAKEUP		1
 #define ENQUEUE_HEAD		2
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 63437d0..298c927 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3426,7 +3426,7 @@
 	int ret = 0;
 
 	if (unlikely(current->lockdep_recursion))
-		return ret;
+		return 1; /* avoid false negative lockdep_assert_held() */
 
 	raw_local_irq_save(flags);
 	check_flags(flags);
diff --git a/kernel/sched.c b/kernel/sched.c
index cbb3a0e..3f2e502 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -605,10 +605,10 @@
 /*
  * Return the group to which this tasks belongs.
  *
- * We use task_subsys_state_check() and extend the RCU verification
- * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
- * holds that lock for each task it moves into the cgroup. Therefore
- * by holding that lock, we pin the task to the current cgroup.
+ * We use task_subsys_state_check() and extend the RCU verification with
+ * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
+ * task it moves into the cgroup. Therefore by holding either of those locks,
+ * we pin the task to the current cgroup.
  */
 static inline struct task_group *task_group(struct task_struct *p)
 {
@@ -616,7 +616,8 @@
 	struct cgroup_subsys_state *css;
 
 	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
-			lockdep_is_held(&p->pi_lock));
+			lockdep_is_held(&p->pi_lock) ||
+			lockdep_is_held(&task_rq(p)->lock));
 	tg = container_of(css, struct task_group, css);
 
 	return autogroup_task_group(p, tg);
@@ -2200,6 +2201,16 @@
 			!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
 
 #ifdef CONFIG_LOCKDEP
+	/*
+	 * The caller should hold either p->pi_lock or rq->lock, when changing
+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
+	 *
+	 * sched_move_task() holds both and thus holding either pins the cgroup,
+	 * see set_task_rq().
+	 *
+	 * Furthermore, all task_rq users should acquire both locks, see
+	 * task_rq_lock().
+	 */
 	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
 				      lockdep_is_held(&task_rq(p)->lock)));
 #endif
@@ -2447,6 +2458,10 @@
 		}
 		rcu_read_unlock();
 	}
+
+	if (wake_flags & WF_MIGRATED)
+		schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+
 #endif /* CONFIG_SMP */
 
 	schedstat_inc(rq, ttwu_count);
@@ -2455,9 +2470,6 @@
 	if (wake_flags & WF_SYNC)
 		schedstat_inc(p, se.statistics.nr_wakeups_sync);
 
-	if (cpu != task_cpu(p))
-		schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-
 #endif /* CONFIG_SCHEDSTATS */
 }
 
@@ -2600,6 +2612,7 @@
 
 #if defined(CONFIG_SMP)
 	if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
+		sched_clock_cpu(cpu); /* sync clocks x-cpu */
 		ttwu_queue_remote(p, cpu);
 		return;
 	}
@@ -2674,8 +2687,10 @@
 		p->sched_class->task_waking(p);
 
 	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
-	if (task_cpu(p) != cpu)
+	if (task_cpu(p) != cpu) {
+		wake_flags |= WF_MIGRATED;
 		set_task_cpu(p, cpu);
+	}
 #endif /* CONFIG_SMP */
 
 	ttwu_queue(p, cpu);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index c027d4f..e4c699d 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -182,7 +182,10 @@
 	unsigned long flags;
 
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
-	BUG_ON(!dev->cpumask);
+	if (!dev->cpumask) {
+		WARN_ON(num_possible_cpus() > 1);
+		dev->cpumask = cpumask_of(smp_processor_id());
+	}
 
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 
diff --git a/kernel/timer.c b/kernel/timer.c
index fd61986..8cff361 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -749,16 +749,15 @@
 	unsigned long expires_limit, mask;
 	int bit;
 
-	expires_limit = expires;
-
 	if (timer->slack >= 0) {
 		expires_limit = expires + timer->slack;
 	} else {
-		unsigned long now = jiffies;
+		long delta = expires - jiffies;
 
-		/* No slack, if already expired else auto slack 0.4% */
-		if (time_after(expires, now))
-			expires_limit = expires + (expires - now)/256;
+		if (delta < 256)
+			return expires;
+
+		expires_limit = expires + delta / 256;
 	}
 	mask = expires ^ expires_limit;
 	if (mask == 0)
@@ -795,6 +794,8 @@
  */
 int mod_timer(struct timer_list *timer, unsigned long expires)
 {
+	expires = apply_slack(timer, expires);
+
 	/*
 	 * This is a common optimization triggered by the
 	 * networking code - if the timer is re-modified
@@ -803,8 +804,6 @@
 	if (timer_pending(timer) && timer->expires == expires)
 		return 1;
 
-	expires = apply_slack(timer, expires);
-
 	return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
 }
 EXPORT_SYMBOL(mod_timer);
diff --git a/mm/filemap.c b/mm/filemap.c
index d7b1057..a8251a8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2000,7 +2000,7 @@
 		error = security_inode_killpriv(dentry);
 	if (!error && killsuid)
 		error = __remove_suid(dentry, killsuid);
-	if (!error)
+	if (!error && (inode->i_sb->s_flags & MS_NOSEC))
 		inode->i_flags |= S_NOSEC;
 
 	return error;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 22cdb96..96ebc06 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -467,12 +467,8 @@
 		if (!kvm->buses[i])
 			goto out_err;
 	}
+
 	spin_lock_init(&kvm->mmu_lock);
-
-	r = kvm_init_mmu_notifier(kvm);
-	if (r)
-		goto out_err;
-
 	kvm->mm = current->mm;
 	atomic_inc(&kvm->mm->mm_count);
 	kvm_eventfd_init(kvm);
@@ -480,6 +476,11 @@
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
+
+	r = kvm_init_mmu_notifier(kvm);
+	if (r)
+		goto out_err;
+
 	raw_spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
 	raw_spin_unlock(&kvm_lock);
@@ -651,7 +652,9 @@
 	/* We can read the guest memory with __xxx_user() later on. */
 	if (user_alloc &&
 	    ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
-	     !access_ok(VERIFY_WRITE, mem->userspace_addr, mem->memory_size)))
+	     !access_ok(VERIFY_WRITE,
+			(void __user *)(unsigned long)mem->userspace_addr,
+			mem->memory_size)))
 		goto out;
 	if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 		goto out;