Merge branch 'core' of git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile into perf/core
diff --git a/CREDITS b/CREDITS
index 41d8e63..494b6e4 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2365,8 +2365,6 @@
 W: http://oops.ghostprotocols.net:81/blog/
 P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD  841A B6AB 4681 9224 DF01
 D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
-S: R. Brasílio Itiberê, 4270/1010 - Água Verde
-S: 80240-060 - Curitiba - Paraná
 S: Brazil
 
 N: Karsten Merker
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cdd2a6e..316c723 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1579,20 +1579,12 @@
 
 	nmi_watchdog=	[KNL,BUGS=X86] Debugging features for SMP kernels
 			Format: [panic,][num]
-			Valid num: 0,1,2
+			Valid num: 0
 			0 - turn nmi_watchdog off
-			1 - use the IO-APIC timer for the NMI watchdog
-			2 - use the local APIC for the NMI watchdog using
-			a performance counter. Note: This will use one
-			performance counter and the local APIC's performance
-			vector.
 			When panic is specified, panic when an NMI watchdog
 			timeout occurs.
 			This is useful when you use a panic=... timeout and
 			need the box quickly up again.
-			Instead of 1 and 2 it is possible to use the following
-			symbolic names: lapic and ioapic
-			Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
 
 	netpoll.carrier_timeout=
 			[NET] Specifies amount of time (in seconds) that
@@ -2175,11 +2167,6 @@
 	reset_devices	[KNL] Force drivers to reset the underlying device
 			during initialization.
 
-	resource_alloc_from_bottom
-			Allocate new resources from the beginning of available
-			space, not the end.  If you need to use this, please
-			report a bug.
-
 	resume=		[SWSUSP]
 			Specify the partition device for software suspend
 
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 489e9ba..41cc7b3 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -379,8 +379,8 @@
       zero)
 
   bool pm_runtime_suspended(struct device *dev);
-    - return true if the device's runtime PM status is 'suspended', or false
-      otherwise
+    - return true if the device's runtime PM status is 'suspended' and its
+      'power.disable_depth' field is equal to zero, or false otherwise
 
   void pm_runtime_allow(struct device *dev);
     - set the power.runtime_auto flag for the device and decrease its usage
diff --git a/MAINTAINERS b/MAINTAINERS
index 6a58887..f1f803c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4612,7 +4612,7 @@
 M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
 M:	Paul Mackerras <paulus@samba.org>
 M:	Ingo Molnar <mingo@elte.hu>
-M:	Arnaldo Carvalho de Melo <acme@redhat.com>
+M:	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
 S:	Supported
 F:	kernel/perf_event*.c
 F:	include/linux/perf_event.h
diff --git a/Makefile b/Makefile
index 5aa4427..77044b7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 37
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Flesh-Eating Bats with Fangs
 
 # *DOCUMENTATION*
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h
index fe792ca..5996e7a 100644
--- a/arch/alpha/include/asm/perf_event.h
+++ b/arch/alpha/include/asm/perf_event.h
@@ -1,10 +1,4 @@
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 
-#ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
-#else
-static inline void init_hw_perf_events(void)    { }
-#endif
-
 #endif /* __ASM_ALPHA_PERF_EVENT_H */
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 5f77afb..4c8bb37 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -112,8 +112,6 @@
 	wrent(entInt, 0);
 
 	alpha_mv.init_irq();
-
-	init_hw_perf_events();
 }
 
 /*
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 1cc4968..90561c4 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kdebug.h>
 #include <linux/mutex.h>
+#include <linux/init.h>
 
 #include <asm/hwrpb.h>
 #include <asm/atomic.h>
@@ -863,13 +864,13 @@
 /*
  * Init call to initialise performance events at kernel startup.
  */
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
 	pr_info("Performance events: ");
 
 	if (!supported_cpu()) {
 		pr_cont("No support for your CPU.\n");
-		return;
+		return 0;
 	}
 
 	pr_cont("Supported CPU type!\n");
@@ -881,6 +882,8 @@
 	/* And set up PMU specification */
 	alpha_pmu = &ev67_pmu;
 
-	perf_pmu_register(&pmu);
-}
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
+	return 0;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 07a5035..fdfa497 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3034,11 +3034,11 @@
 		pr_info("no hardware support available\n");
 	}
 
-	perf_pmu_register(&pmu);
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
 	return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 /*
  * Callchain handling code.
diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile
index 62d686f..d13add7 100644
--- a/arch/arm/mach-at91/Makefile
+++ b/arch/arm/mach-at91/Makefile
@@ -65,7 +65,7 @@
 obj-$(CONFIG_MACH_CPU9G20)	+= board-cpu9krea.o
 obj-$(CONFIG_MACH_STAMP9G20)	+= board-stamp9g20.o
 obj-$(CONFIG_MACH_PORTUXG20)	+= board-stamp9g20.o
-obj-$(CONFIG_MACH_PCONTROL_G20)	+= board-pcontrol-g20.o
+obj-$(CONFIG_MACH_PCONTROL_G20)	+= board-pcontrol-g20.o board-stamp9g20.o
 
 # AT91SAM9260/AT91SAM9G20 board-specific support
 obj-$(CONFIG_MACH_SNAPPER_9260)	+= board-snapper9260.o
diff --git a/arch/arm/mach-at91/board-pcontrol-g20.c b/arch/arm/mach-at91/board-pcontrol-g20.c
index bba5a56..feb6578 100644
--- a/arch/arm/mach-at91/board-pcontrol-g20.c
+++ b/arch/arm/mach-at91/board-pcontrol-g20.c
@@ -31,6 +31,7 @@
 
 #include <mach/board.h>
 #include <mach/at91sam9_smc.h>
+#include <mach/stamp9g20.h>
 
 #include "sam9_smc.h"
 #include "generic.h"
@@ -38,11 +39,7 @@
 
 static void __init pcontrol_g20_map_io(void)
 {
-	/* Initialize processor: 18.432 MHz crystal */
-	at91sam9260_initialize(18432000);
-
-	/* DGBU on ttyS0. (Rx, Tx) only TTL -> JTAG connector X7 17,19 ) */
-	at91_register_uart(0, 0, 0);
+	stamp9g20_map_io();
 
 	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS) piggyback  A2 */
 	at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS
@@ -54,9 +51,6 @@
 
 	/* USART2 on ttyS3. (Rx, Tx)  9bit-Bus  Multidrop-mode  X4 */
 	at91_register_uart(AT91SAM9260_ID_US4, 3, 0);
-
-	/* set serial console to ttyS0 (ie, DBGU) */
-	at91_set_serial_console(0);
 }
 
 
@@ -66,38 +60,6 @@
 }
 
 
-/*
- * NAND flash 512MiB 1,8V 8-bit, sector size 128 KiB
- */
-static struct atmel_nand_data __initdata nand_data = {
-	.ale		= 21,
-	.cle		= 22,
-	.rdy_pin	= AT91_PIN_PC13,
-	.enable_pin	= AT91_PIN_PC14,
-};
-
-/*
- * Bus timings; unit = 7.57ns
- */
-static struct sam9_smc_config __initdata nand_smc_config = {
-	.ncs_read_setup		= 0,
-	.nrd_setup		= 2,
-	.ncs_write_setup	= 0,
-	.nwe_setup		= 2,
-
-	.ncs_read_pulse		= 4,
-	.nrd_pulse		= 4,
-	.ncs_write_pulse	= 4,
-	.nwe_pulse		= 4,
-
-	.read_cycle		= 7,
-	.write_cycle		= 7,
-
-	.mode			= AT91_SMC_READMODE | AT91_SMC_WRITEMODE
-			| AT91_SMC_EXNWMODE_DISABLE | AT91_SMC_DBW_8,
-	.tdf_cycles		= 3,
-};
-
 static struct sam9_smc_config __initdata pcontrol_smc_config[2] = { {
 	.ncs_read_setup		= 16,
 	.nrd_setup		= 18,
@@ -138,14 +100,6 @@
 	.tdf_cycles		= 1,
 } };
 
-static void __init add_device_nand(void)
-{
-	/* configure chip-select 3 (NAND) */
-	sam9_smc_configure(3, &nand_smc_config);
-	at91_add_device_nand(&nand_data);
-}
-
-
 static void __init add_device_pcontrol(void)
 {
 	/* configure chip-select 4 (IO compatible to 8051  X4 ) */
@@ -156,23 +110,6 @@
 
 
 /*
- * MCI (SD/MMC)
- * det_pin, wp_pin and vcc_pin are not connected
- */
-#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE)
-static struct mci_platform_data __initdata mmc_data = {
-	.slot[0] = {
-		.bus_width	= 4,
-	},
-};
-#else
-static struct at91_mmc_data __initdata mmc_data = {
-	.wire4		= 1,
-};
-#endif
-
-
-/*
  * USB Host port
  */
 static struct at91_usbh_data __initdata usbh_data = {
@@ -265,42 +202,13 @@
 };
 
 
-/*
- * Dallas 1-Wire  DS2431
- */
-static struct w1_gpio_platform_data w1_gpio_pdata = {
-	.pin		= AT91_PIN_PA29,
-	.is_open_drain	= 1,
-};
-
-static struct platform_device w1_device = {
-	.name			= "w1-gpio",
-	.id			= -1,
-	.dev.platform_data	= &w1_gpio_pdata,
-};
-
-static void add_wire1(void)
-{
-	at91_set_GPIO_periph(w1_gpio_pdata.pin, 1);
-	at91_set_multi_drive(w1_gpio_pdata.pin, 1);
-	platform_device_register(&w1_device);
-}
-
-
 static void __init pcontrol_g20_board_init(void)
 {
-	at91_add_device_serial();
-	add_device_nand();
-#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE)
-	at91_add_device_mci(0, &mmc_data);
-#else
-	at91_add_device_mmc(0, &mmc_data);
-#endif
+	stamp9g20_board_init();
 	at91_add_device_usbh(&usbh_data);
 	at91_add_device_eth(&macb_data);
 	at91_add_device_i2c(pcontrol_g20_i2c_devices,
 		ARRAY_SIZE(pcontrol_g20_i2c_devices));
-	add_wire1();
 	add_device_pcontrol();
 	at91_add_device_spi(pcontrol_g20_spi_devices,
 		ARRAY_SIZE(pcontrol_g20_spi_devices));
diff --git a/arch/arm/mach-at91/board-stamp9g20.c b/arch/arm/mach-at91/board-stamp9g20.c
index 5206eef..f8902b1 100644
--- a/arch/arm/mach-at91/board-stamp9g20.c
+++ b/arch/arm/mach-at91/board-stamp9g20.c
@@ -32,7 +32,7 @@
 #include "generic.h"
 
 
-static void __init portuxg20_map_io(void)
+void __init stamp9g20_map_io(void)
 {
 	/* Initialize processor: 18.432 MHz crystal */
 	at91sam9260_initialize(18432000);
@@ -40,6 +40,24 @@
 	/* DGBU on ttyS0. (Rx & Tx only) */
 	at91_register_uart(0, 0, 0);
 
+	/* set serial console to ttyS0 (ie, DBGU) */
+	at91_set_serial_console(0);
+}
+
+static void __init stamp9g20evb_map_io(void)
+{
+	stamp9g20_map_io();
+
+	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
+	at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS
+						| ATMEL_UART_DTR | ATMEL_UART_DSR
+						| ATMEL_UART_DCD | ATMEL_UART_RI);
+}
+
+static void __init portuxg20_map_io(void)
+{
+	stamp9g20_map_io();
+
 	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
 	at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS
 						| ATMEL_UART_DTR | ATMEL_UART_DSR
@@ -56,26 +74,6 @@
 
 	/* USART5 on ttyS6. (Rx, Tx only) */
 	at91_register_uart(AT91SAM9260_ID_US5, 6, 0);
-
-	/* set serial console to ttyS0 (ie, DBGU) */
-	at91_set_serial_console(0);
-}
-
-static void __init stamp9g20_map_io(void)
-{
-	/* Initialize processor: 18.432 MHz crystal */
-	at91sam9260_initialize(18432000);
-
-	/* DGBU on ttyS0. (Rx & Tx only) */
-	at91_register_uart(0, 0, 0);
-
-	/* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
-	at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS
-						| ATMEL_UART_DTR | ATMEL_UART_DSR
-						| ATMEL_UART_DCD | ATMEL_UART_RI);
-
-	/* set serial console to ttyS0 (ie, DBGU) */
-	at91_set_serial_console(0);
 }
 
 static void __init init_irq(void)
@@ -156,7 +154,7 @@
 	.pullup_pin	= 0,		/* pull-up driven by UDC */
 };
 
-static struct at91_udc_data __initdata stamp9g20_udc_data = {
+static struct at91_udc_data __initdata stamp9g20evb_udc_data = {
 	.vbus_pin	= AT91_PIN_PA22,
 	.pullup_pin	= 0,		/* pull-up driven by UDC */
 };
@@ -190,7 +188,7 @@
 	}
 };
 
-static struct gpio_led stamp9g20_leds[] = {
+static struct gpio_led stamp9g20evb_leds[] = {
 	{
 		.name			= "D8",
 		.gpio			= AT91_PIN_PB18,
@@ -250,7 +248,7 @@
 }
 
 
-static void __init generic_board_init(void)
+void __init stamp9g20_board_init(void)
 {
 	/* Serial */
 	at91_add_device_serial();
@@ -262,34 +260,40 @@
 #else
 	at91_add_device_mmc(0, &mmc_data);
 #endif
-	/* USB Host */
-	at91_add_device_usbh(&usbh_data);
-	/* Ethernet */
-	at91_add_device_eth(&macb_data);
-	/* I2C */
-	at91_add_device_i2c(NULL, 0);
 	/* W1 */
 	add_w1();
 }
 
 static void __init portuxg20_board_init(void)
 {
-	generic_board_init();
-	/* SPI */
-	at91_add_device_spi(portuxg20_spi_devices, ARRAY_SIZE(portuxg20_spi_devices));
+	stamp9g20_board_init();
+	/* USB Host */
+	at91_add_device_usbh(&usbh_data);
 	/* USB Device */
 	at91_add_device_udc(&portuxg20_udc_data);
+	/* Ethernet */
+	at91_add_device_eth(&macb_data);
+	/* I2C */
+	at91_add_device_i2c(NULL, 0);
+	/* SPI */
+	at91_add_device_spi(portuxg20_spi_devices, ARRAY_SIZE(portuxg20_spi_devices));
 	/* LEDs */
 	at91_gpio_leds(portuxg20_leds, ARRAY_SIZE(portuxg20_leds));
 }
 
-static void __init stamp9g20_board_init(void)
+static void __init stamp9g20evb_board_init(void)
 {
-	generic_board_init();
+	stamp9g20_board_init();
+	/* USB Host */
+	at91_add_device_usbh(&usbh_data);
 	/* USB Device */
-	at91_add_device_udc(&stamp9g20_udc_data);
+	at91_add_device_udc(&stamp9g20evb_udc_data);
+	/* Ethernet */
+	at91_add_device_eth(&macb_data);
+	/* I2C */
+	at91_add_device_i2c(NULL, 0);
 	/* LEDs */
-	at91_gpio_leds(stamp9g20_leds, ARRAY_SIZE(stamp9g20_leds));
+	at91_gpio_leds(stamp9g20evb_leds, ARRAY_SIZE(stamp9g20evb_leds));
 }
 
 MACHINE_START(PORTUXG20, "taskit PortuxG20")
@@ -305,7 +309,7 @@
 	/* Maintainer: taskit GmbH */
 	.boot_params	= AT91_SDRAM_BASE + 0x100,
 	.timer		= &at91sam926x_timer,
-	.map_io		= stamp9g20_map_io,
+	.map_io		= stamp9g20evb_map_io,
 	.init_irq	= init_irq,
-	.init_machine	= stamp9g20_board_init,
+	.init_machine	= stamp9g20evb_board_init,
 MACHINE_END
diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c
index 7525cee..9113da6 100644
--- a/arch/arm/mach-at91/clock.c
+++ b/arch/arm/mach-at91/clock.c
@@ -658,7 +658,7 @@
 	/* Now set uhpck values */
 	uhpck.parent = &utmi_clk;
 	uhpck.pmc_mask = AT91SAM926x_PMC_UHP;
-	uhpck.rate_hz = utmi_clk.parent->rate_hz;
+	uhpck.rate_hz = utmi_clk.rate_hz;
 	uhpck.rate_hz /= 1 + ((at91_sys_read(AT91_PMC_USB) & AT91_PMC_OHCIUSBDIV) >> 8);
 }
 
diff --git a/arch/arm/mach-at91/include/mach/stamp9g20.h b/arch/arm/mach-at91/include/mach/stamp9g20.h
new file mode 100644
index 0000000..6120f9c
--- /dev/null
+++ b/arch/arm/mach-at91/include/mach/stamp9g20.h
@@ -0,0 +1,7 @@
+#ifndef __MACH_STAMP9G20_H
+#define __MACH_STAMP9G20_H
+
+void stamp9g20_map_io(void);
+void stamp9g20_board_init(void);
+
+#endif
diff --git a/arch/arm/mach-s3c2412/Kconfig b/arch/arm/mach-s3c2412/Kconfig
index fa2e5bf..6983cb4 100644
--- a/arch/arm/mach-s3c2412/Kconfig
+++ b/arch/arm/mach-s3c2412/Kconfig
@@ -28,9 +28,16 @@
 
 config S3C2412_PM
 	bool
+	select S3C2412_PM_SLEEP
 	help
 	  Internal config node to apply S3C2412 power management
 
+config S3C2412_PM_SLEEP
+	bool
+	help
+	  Internal config node to apply sleep for S3C2412 power management.
+	  Can be selected by another SoCs with similar sleep procedure.
+
 # Note, the S3C2412 IOtiming support is in plat-s3c24xx
 
 config S3C2412_CPUFREQ
diff --git a/arch/arm/mach-s3c2412/Makefile b/arch/arm/mach-s3c2412/Makefile
index 530ec46..6c48a91 100644
--- a/arch/arm/mach-s3c2412/Makefile
+++ b/arch/arm/mach-s3c2412/Makefile
@@ -14,7 +14,8 @@
 obj-$(CONFIG_CPU_S3C2412)	+= clock.o
 obj-$(CONFIG_CPU_S3C2412)	+= gpio.o
 obj-$(CONFIG_S3C2412_DMA)	+= dma.o
-obj-$(CONFIG_S3C2412_PM)	+= pm.o sleep.o
+obj-$(CONFIG_S3C2412_PM)	+= pm.o
+obj-$(CONFIG_S3C2412_PM_SLEEP)	+= sleep.o
 obj-$(CONFIG_S3C2412_CPUFREQ)	+= cpu-freq.o
 
 # Machine support
diff --git a/arch/arm/mach-s3c2416/Kconfig b/arch/arm/mach-s3c2416/Kconfig
index 27b3e7c..df8d149 100644
--- a/arch/arm/mach-s3c2416/Kconfig
+++ b/arch/arm/mach-s3c2416/Kconfig
@@ -27,6 +27,7 @@
 
 config S3C2416_PM
 	bool
+	select S3C2412_PM_SLEEP
 	help
 	  Internal config node to apply S3C2416 power management
 
diff --git a/arch/arm/mach-s5pv210/mach-aquila.c b/arch/arm/mach-s5pv210/mach-aquila.c
index 28677ca..461aa035 100644
--- a/arch/arm/mach-s5pv210/mach-aquila.c
+++ b/arch/arm/mach-s5pv210/mach-aquila.c
@@ -378,6 +378,12 @@
 static struct max8998_platform_data aquila_max8998_pdata = {
 	.num_regulators	= ARRAY_SIZE(aquila_regulators),
 	.regulators	= aquila_regulators,
+	.buck1_set1	= S5PV210_GPH0(3),
+	.buck1_set2	= S5PV210_GPH0(4),
+	.buck2_set3	= S5PV210_GPH0(5),
+	.buck1_max_voltage1 = 1200000,
+	.buck1_max_voltage2 = 1200000,
+	.buck2_max_voltage = 1200000,
 };
 #endif
 
diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index b1dcf96..e22d511 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -518,6 +518,12 @@
 static struct max8998_platform_data goni_max8998_pdata = {
 	.num_regulators	= ARRAY_SIZE(goni_regulators),
 	.regulators	= goni_regulators,
+	.buck1_set1	= S5PV210_GPH0(3),
+	.buck1_set2	= S5PV210_GPH0(4),
+	.buck2_set3	= S5PV210_GPH0(5),
+	.buck1_max_voltage1 = 1200000,
+	.buck1_max_voltage2 = 1200000,
+	.buck2_max_voltage = 1200000,
 };
 #endif
 
diff --git a/arch/arm/mach-shmobile/include/mach/entry-macro.S b/arch/arm/mach-shmobile/include/mach/entry-macro.S
index a285d13..f428c4d 100644
--- a/arch/arm/mach-shmobile/include/mach/entry-macro.S
+++ b/arch/arm/mach-shmobile/include/mach/entry-macro.S
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2010 Magnus Damm
  * Copyright (C) 2008 Renesas Solutions Corp.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -14,24 +15,45 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-#include <mach/hardware.h>
 #include <mach/irqs.h>
 
+#define INTCA_BASE	0xe6980000
+#define INTFLGA_OFFS	0x00000018 /* accept pending interrupt */
+#define INTEVTA_OFFS	0x00000020 /* vector number of accepted interrupt */
+#define INTLVLA_OFFS	0x00000030 /* priority level of accepted interrupt */
+#define INTLVLB_OFFS	0x00000034 /* previous priority level */
+
 	.macro  disable_fiq
 	.endm
 
 	.macro  get_irqnr_preamble, base, tmp
-	ldr     \base, =INTFLGA
+	ldr     \base, =INTCA_BASE
 	.endm
 
 	.macro  arch_ret_to_user, tmp1, tmp2
 	.endm
 
 	.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-	ldr     \irqnr, [\base]
+	/* The single INTFLGA read access below results in the following:
+	 *
+	 * 1. INTLVLB is updated with old priority value from INTLVLA
+	 * 2. Highest priority interrupt is accepted
+	 * 3. INTLVLA is updated to contain priority of accepted interrupt
+	 * 4. Accepted interrupt vector is stored in INTFLGA and INTEVTA
+	 */
+	ldr     \irqnr, [\base, #INTFLGA_OFFS]
+
+	/* Restore INTLVLA with the value saved in INTLVLB.
+	 * This is required to support interrupt priorities properly.
+	 */
+	ldrb	\tmp, [\base, #INTLVLB_OFFS]
+	strb    \tmp, [\base, #INTLVLA_OFFS]
+
+	/* Handle invalid vector number case */
 	cmp	\irqnr, #0
 	beq	1000f
-	/* intevt to irq number */
+
+	/* Convert vector to irq number, same as the evt2irq() macro */
 	lsr	\irqnr, \irqnr, #0x5
 	subs	\irqnr, \irqnr, #16
 
diff --git a/arch/arm/mach-shmobile/include/mach/vmalloc.h b/arch/arm/mach-shmobile/include/mach/vmalloc.h
index 4aecf6e..2b8fd8b 100644
--- a/arch/arm/mach-shmobile/include/mach/vmalloc.h
+++ b/arch/arm/mach-shmobile/include/mach/vmalloc.h
@@ -2,6 +2,6 @@
 #define __ASM_MACH_VMALLOC_H
 
 /* Vmalloc at ... - 0xe5ffffff */
-#define VMALLOC_END 0xe6000000
+#define VMALLOC_END 0xe6000000UL
 
 #endif /* __ASM_MACH_VMALLOC_H */
diff --git a/arch/arm/plat-s3c24xx/Kconfig b/arch/arm/plat-s3c24xx/Kconfig
index 5a27b1b..eb105e6 100644
--- a/arch/arm/plat-s3c24xx/Kconfig
+++ b/arch/arm/plat-s3c24xx/Kconfig
@@ -8,7 +8,7 @@
 	default y
 	select NO_IOPORT
 	select ARCH_REQUIRE_GPIOLIB
-	select S3C_DEVICE_NAND
+	select S3C_DEV_NAND
 	select S3C_GPIO_CFG_S3C24XX
 	help
 	  Base platform code for any Samsung S3C24XX device
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 67a2fa2..0a9b5b8 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -19,6 +19,8 @@
 	select GENERIC_ATOMIC64 if !64BIT
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
+	select HAVE_GENERIC_HARDIRQS
+	select GENERIC_IRQ_PROBE
 
 menu "Machine selection"
 
@@ -1664,6 +1666,28 @@
 
 endchoice
 
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	range 13 64 if SYS_SUPPORTS_HUGETLBFS && PAGE_SIZE_32KB
+	default "13" if SYS_SUPPORTS_HUGETLBFS && PAGE_SIZE_32KB
+	range 12 64 if SYS_SUPPORTS_HUGETLBFS && PAGE_SIZE_16KB
+	default "12" if SYS_SUPPORTS_HUGETLBFS && PAGE_SIZE_16KB
+	range 11 64
+	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+	  The page size is not necessarily 4KB.  Keep this in mind
+	  when choosing a value for this option.
+
 config BOARD_SCACHE
 	bool
 
@@ -1922,20 +1946,6 @@
 	bool
 
 #
-# Use the generic interrupt handling code in kernel/irq/:
-#
-config GENERIC_HARDIRQS
-	bool
-	default y
-
-config GENERIC_IRQ_PROBE
-	bool
-	default y
-
-config IRQ_PER_CPU
-	bool
-
-#
 # - Highmem only makes sense for the 32-bit kernel.
 # - The current highmem code will only work properly on physically indexed
 #   caches such as R3000, SB1, R7000 or those that look like they're virtually
diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index 3691630..9e7814d 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -27,6 +27,7 @@
 static void alchemy_8250_pm(struct uart_port *port, unsigned int state,
 			    unsigned int old_state)
 {
+#ifdef CONFIG_SERIAL_8250
 	switch (state) {
 	case 0:
 		if ((__raw_readl(port->membase + UART_MOD_CNTRL) & 3) != 3) {
@@ -49,6 +50,7 @@
 		serial8250_do_pm(port, state, old_state);
 		break;
 	}
+#endif
 }
 
 #define PORT(_base, _irq)					\
diff --git a/arch/mips/alchemy/devboards/prom.c b/arch/mips/alchemy/devboards/prom.c
index b30df5c..baeb213 100644
--- a/arch/mips/alchemy/devboards/prom.c
+++ b/arch/mips/alchemy/devboards/prom.c
@@ -54,10 +54,9 @@
 
 	prom_init_cmdline();
 	memsize_str = prom_getenv("memsize");
-	if (!memsize_str)
+	if (!memsize_str || strict_strtoul(memsize_str, 0, &memsize))
 		memsize = ALCHEMY_BOARD_DEFAULT_MEMSIZE;
-	else
-		strict_strtoul(memsize_str, 0, &memsize);
+
 	add_memory_region(0, memsize, BOOT_MEM_RAM);
 }
 
diff --git a/arch/mips/ar7/clock.c b/arch/mips/ar7/clock.c
index fc0e715..2ca4ada 100644
--- a/arch/mips/ar7/clock.c
+++ b/arch/mips/ar7/clock.c
@@ -239,12 +239,12 @@
 	calculate(base_clock, frequency, &prediv, &postdiv, &mul);
 
 	writel(((prediv - 1) << PREDIV_SHIFT) | (postdiv - 1), &clock->ctrl);
-	msleep(1);
+	mdelay(1);
 	writel(4, &clock->pll);
 	while (readl(&clock->pll) & PLL_STATUS)
 		;
 	writel(((mul - 1) << MUL_SHIFT) | (0xff << 3) | 0x0e, &clock->pll);
-	msleep(75);
+	mdelay(75);
 }
 
 static void __init tnetd7300_init_clocks(void)
@@ -456,7 +456,7 @@
 }
 EXPORT_SYMBOL(clk_put);
 
-int __init ar7_init_clocks(void)
+void __init ar7_init_clocks(void)
 {
 	switch (ar7_chip_id()) {
 	case AR7_CHIP_7100:
@@ -472,7 +472,4 @@
 	}
 	/* adjust vbus clock rate */
 	vbus_clk.rate = bus_clk.rate / 2;
-
-	return 0;
 }
-arch_initcall(ar7_init_clocks);
diff --git a/arch/mips/ar7/time.c b/arch/mips/ar7/time.c
index 5fb8a01..22c9321 100644
--- a/arch/mips/ar7/time.c
+++ b/arch/mips/ar7/time.c
@@ -30,6 +30,9 @@
 {
 	struct clk *cpu_clk;
 
+	/* Initialize ar7 clocks so the CPU clock frequency is correct */
+	ar7_init_clocks();
+
 	cpu_clk = clk_get(NULL, "cpu");
 	if (IS_ERR(cpu_clk)) {
 		printk(KERN_ERR "unable to get cpu clock\n");
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index b1aee33..c95f90b 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -32,7 +32,6 @@
 #include <asm/reboot.h>
 #include <asm/time.h>
 #include <bcm47xx.h>
-#include <asm/fw/cfe/cfe_api.h>
 #include <asm/mach-bcm47xx/nvram.h>
 
 struct ssb_bus ssb_bcm47xx;
@@ -57,68 +56,112 @@
 		cpu_relax();
 }
 
-static void str2eaddr(char *str, char *dest)
+#define READ_FROM_NVRAM(_outvar, name, buf) \
+	if (nvram_getenv(name, buf, sizeof(buf)) >= 0)\
+		sprom->_outvar = simple_strtoul(buf, NULL, 0);
+
+static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 {
-	int i = 0;
+	char buf[100];
+	u32 boardflags;
 
-	if (str == NULL) {
-		memset(dest, 0, 6);
-		return;
+	memset(sprom, 0, sizeof(struct ssb_sprom));
+
+	sprom->revision = 1; /* Fallback: Old hardware does not define this. */
+	READ_FROM_NVRAM(revision, "sromrev", buf);
+	if (nvram_getenv("il0macaddr", buf, sizeof(buf)) >= 0)
+		nvram_parse_macaddr(buf, sprom->il0mac);
+	if (nvram_getenv("et0macaddr", buf, sizeof(buf)) >= 0)
+		nvram_parse_macaddr(buf, sprom->et0mac);
+	if (nvram_getenv("et1macaddr", buf, sizeof(buf)) >= 0)
+		nvram_parse_macaddr(buf, sprom->et1mac);
+	READ_FROM_NVRAM(et0phyaddr, "et0phyaddr", buf);
+	READ_FROM_NVRAM(et1phyaddr, "et1phyaddr", buf);
+	READ_FROM_NVRAM(et0mdcport, "et0mdcport", buf);
+	READ_FROM_NVRAM(et1mdcport, "et1mdcport", buf);
+	READ_FROM_NVRAM(board_rev, "boardrev", buf);
+	READ_FROM_NVRAM(country_code, "ccode", buf);
+	READ_FROM_NVRAM(ant_available_a, "aa5g", buf);
+	READ_FROM_NVRAM(ant_available_bg, "aa2g", buf);
+	READ_FROM_NVRAM(pa0b0, "pa0b0", buf);
+	READ_FROM_NVRAM(pa0b1, "pa0b1", buf);
+	READ_FROM_NVRAM(pa0b2, "pa0b2", buf);
+	READ_FROM_NVRAM(pa1b0, "pa1b0", buf);
+	READ_FROM_NVRAM(pa1b1, "pa1b1", buf);
+	READ_FROM_NVRAM(pa1b2, "pa1b2", buf);
+	READ_FROM_NVRAM(pa1lob0, "pa1lob0", buf);
+	READ_FROM_NVRAM(pa1lob2, "pa1lob1", buf);
+	READ_FROM_NVRAM(pa1lob1, "pa1lob2", buf);
+	READ_FROM_NVRAM(pa1hib0, "pa1hib0", buf);
+	READ_FROM_NVRAM(pa1hib2, "pa1hib1", buf);
+	READ_FROM_NVRAM(pa1hib1, "pa1hib2", buf);
+	READ_FROM_NVRAM(gpio0, "wl0gpio0", buf);
+	READ_FROM_NVRAM(gpio1, "wl0gpio1", buf);
+	READ_FROM_NVRAM(gpio2, "wl0gpio2", buf);
+	READ_FROM_NVRAM(gpio3, "wl0gpio3", buf);
+	READ_FROM_NVRAM(maxpwr_bg, "pa0maxpwr", buf);
+	READ_FROM_NVRAM(maxpwr_al, "pa1lomaxpwr", buf);
+	READ_FROM_NVRAM(maxpwr_a, "pa1maxpwr", buf);
+	READ_FROM_NVRAM(maxpwr_ah, "pa1himaxpwr", buf);
+	READ_FROM_NVRAM(itssi_a, "pa1itssit", buf);
+	READ_FROM_NVRAM(itssi_bg, "pa0itssit", buf);
+	READ_FROM_NVRAM(tri2g, "tri2g", buf);
+	READ_FROM_NVRAM(tri5gl, "tri5gl", buf);
+	READ_FROM_NVRAM(tri5g, "tri5g", buf);
+	READ_FROM_NVRAM(tri5gh, "tri5gh", buf);
+	READ_FROM_NVRAM(rxpo2g, "rxpo2g", buf);
+	READ_FROM_NVRAM(rxpo5g, "rxpo5g", buf);
+	READ_FROM_NVRAM(rssisav2g, "rssisav2g", buf);
+	READ_FROM_NVRAM(rssismc2g, "rssismc2g", buf);
+	READ_FROM_NVRAM(rssismf2g, "rssismf2g", buf);
+	READ_FROM_NVRAM(bxa2g, "bxa2g", buf);
+	READ_FROM_NVRAM(rssisav5g, "rssisav5g", buf);
+	READ_FROM_NVRAM(rssismc5g, "rssismc5g", buf);
+	READ_FROM_NVRAM(rssismf5g, "rssismf5g", buf);
+	READ_FROM_NVRAM(bxa5g, "bxa5g", buf);
+	READ_FROM_NVRAM(cck2gpo, "cck2gpo", buf);
+	READ_FROM_NVRAM(ofdm2gpo, "ofdm2gpo", buf);
+	READ_FROM_NVRAM(ofdm5glpo, "ofdm5glpo", buf);
+	READ_FROM_NVRAM(ofdm5gpo, "ofdm5gpo", buf);
+	READ_FROM_NVRAM(ofdm5ghpo, "ofdm5ghpo", buf);
+
+	if (nvram_getenv("boardflags", buf, sizeof(buf)) >= 0) {
+		boardflags = simple_strtoul(buf, NULL, 0);
+		if (boardflags) {
+			sprom->boardflags_lo = (boardflags & 0x0000FFFFU);
+			sprom->boardflags_hi = (boardflags & 0xFFFF0000U) >> 16;
+		}
 	}
-
-	for (;;) {
-		dest[i++] = (char) simple_strtoul(str, NULL, 16);
-		str += 2;
-		if (!*str++ || i == 6)
-			break;
+	if (nvram_getenv("boardflags2", buf, sizeof(buf)) >= 0) {
+		boardflags = simple_strtoul(buf, NULL, 0);
+		if (boardflags) {
+			sprom->boardflags2_lo = (boardflags & 0x0000FFFFU);
+			sprom->boardflags2_hi = (boardflags & 0xFFFF0000U) >> 16;
+		}
 	}
 }
 
 static int bcm47xx_get_invariants(struct ssb_bus *bus,
 				   struct ssb_init_invariants *iv)
 {
-	char buf[100];
+	char buf[20];
 
 	/* Fill boardinfo structure */
 	memset(&(iv->boardinfo), 0 , sizeof(struct ssb_boardinfo));
 
-	if (cfe_getenv("boardvendor", buf, sizeof(buf)) >= 0 ||
-	    nvram_getenv("boardvendor", buf, sizeof(buf)) >= 0)
+	if (nvram_getenv("boardvendor", buf, sizeof(buf)) >= 0)
+		iv->boardinfo.vendor = (u16)simple_strtoul(buf, NULL, 0);
+	else
+		iv->boardinfo.vendor = SSB_BOARDVENDOR_BCM;
+	if (nvram_getenv("boardtype", buf, sizeof(buf)) >= 0)
 		iv->boardinfo.type = (u16)simple_strtoul(buf, NULL, 0);
-	if (cfe_getenv("boardtype", buf, sizeof(buf)) >= 0 ||
-	    nvram_getenv("boardtype", buf, sizeof(buf)) >= 0)
-		iv->boardinfo.type = (u16)simple_strtoul(buf, NULL, 0);
-	if (cfe_getenv("boardrev", buf, sizeof(buf)) >= 0 ||
-	    nvram_getenv("boardrev", buf, sizeof(buf)) >= 0)
+	if (nvram_getenv("boardrev", buf, sizeof(buf)) >= 0)
 		iv->boardinfo.rev = (u16)simple_strtoul(buf, NULL, 0);
 
-	/* Fill sprom structure */
-	memset(&(iv->sprom), 0, sizeof(struct ssb_sprom));
-	iv->sprom.revision = 3;
+	bcm47xx_fill_sprom(&iv->sprom);
 
-	if (cfe_getenv("et0macaddr", buf, sizeof(buf)) >= 0 ||
-	    nvram_getenv("et0macaddr", buf, sizeof(buf)) >= 0)
-		str2eaddr(buf, iv->sprom.et0mac);
-
-	if (cfe_getenv("et1macaddr", buf, sizeof(buf)) >= 0 ||
-	    nvram_getenv("et1macaddr", buf, sizeof(buf)) >= 0)
-		str2eaddr(buf, iv->sprom.et1mac);
-
-	if (cfe_getenv("et0phyaddr", buf, sizeof(buf)) >= 0 ||
-	    nvram_getenv("et0phyaddr", buf, sizeof(buf)) >= 0)
-		iv->sprom.et0phyaddr = simple_strtoul(buf, NULL, 0);
-
-	if (cfe_getenv("et1phyaddr", buf, sizeof(buf)) >= 0 ||
-	    nvram_getenv("et1phyaddr", buf, sizeof(buf)) >= 0)
-		iv->sprom.et1phyaddr = simple_strtoul(buf, NULL, 0);
-
-	if (cfe_getenv("et0mdcport", buf, sizeof(buf)) >= 0 ||
-	    nvram_getenv("et0mdcport", buf, sizeof(buf)) >= 0)
-		iv->sprom.et0mdcport = simple_strtoul(buf, NULL, 10);
-
-	if (cfe_getenv("et1mdcport", buf, sizeof(buf)) >= 0 ||
-	    nvram_getenv("et1mdcport", buf, sizeof(buf)) >= 0)
-		iv->sprom.et1mdcport = simple_strtoul(buf, NULL, 10);
+	if (nvram_getenv("cardbus", buf, sizeof(buf)) >= 0)
+		iv->has_cardbus_slot = !!simple_strtoul(buf, NULL, 10);
 
 	return 0;
 }
@@ -126,12 +169,28 @@
 void __init plat_mem_setup(void)
 {
 	int err;
+	char buf[100];
+	struct ssb_mipscore *mcore;
 
 	err = ssb_bus_ssbbus_register(&ssb_bcm47xx, SSB_ENUM_BASE,
 				      bcm47xx_get_invariants);
 	if (err)
 		panic("Failed to initialize SSB bus (err %d)\n", err);
 
+	mcore = &ssb_bcm47xx.mipscore;
+	if (nvram_getenv("kernel_args", buf, sizeof(buf)) >= 0) {
+		if (strstr(buf, "console=ttyS1")) {
+			struct ssb_serial_port port;
+
+			printk(KERN_DEBUG "Swapping serial ports!\n");
+			/* swap serial ports */
+			memcpy(&port, &mcore->serial_ports[0], sizeof(port));
+			memcpy(&mcore->serial_ports[0], &mcore->serial_ports[1],
+			       sizeof(port));
+			memcpy(&mcore->serial_ports[1], &port, sizeof(port));
+		}
+	}
+
 	_machine_restart = bcm47xx_machine_restart;
 	_machine_halt = bcm47xx_machine_halt;
 	pm_power_off = bcm47xx_machine_halt;
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 06d59dc..8687753 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -111,8 +111,8 @@
  * These are the PRID's for when 23:16 == PRID_COMP_BROADCOM
  */
 
-#define PRID_IMP_BMIPS4KC	0x4000
-#define PRID_IMP_BMIPS32	0x8000
+#define PRID_IMP_BMIPS32_REV4	0x4000
+#define PRID_IMP_BMIPS32_REV8	0x8000
 #define PRID_IMP_BMIPS3300	0x9000
 #define PRID_IMP_BMIPS3300_ALT	0x9100
 #define PRID_IMP_BMIPS3300_BUG	0x0000
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index fd1d39e..455c0ac 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -249,7 +249,8 @@
 
 #define SET_PERSONALITY(ex)						\
 do {									\
-	set_personality(PER_LINUX);					\
+	if (personality(current->personality) != PER_LINUX)		\
+		set_personality(PER_LINUX);				\
 									\
 	current->thread.abi = &mips_abi;				\
 } while (0)
@@ -296,6 +297,8 @@
 
 #define SET_PERSONALITY(ex)						\
 do {									\
+	unsigned int p;							\
+									\
 	clear_thread_flag(TIF_32BIT_REGS);				\
 	clear_thread_flag(TIF_32BIT_ADDR);				\
 									\
@@ -304,7 +307,8 @@
 	else								\
 		current->thread.abi = &mips_abi;			\
 									\
-	if (current->personality != PER_LINUX32)			\
+	p = personality(current->personality);				\
+	if (p != PER_LINUX32 && p != PER_LINUX)				\
 		set_personality(PER_LINUX);				\
 } while (0)
 
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index c98bf51..5b017f2 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -329,10 +329,14 @@
 			"dsrl32	%L0, %L0, 0"			"\n\t"	\
 			"dsll32	%M0, %M0, 0"			"\n\t"	\
 			"or	%L0, %L0, %M0"			"\n\t"	\
+			".set	push"				"\n\t"	\
+			".set	noreorder"			"\n\t"	\
+			".set	nomacro"			"\n\t"	\
 			"sd	%L0, %2"			"\n\t"	\
+			".set	pop"				"\n\t"	\
 			".set	mips0"				"\n"	\
 			: "=r" (__tmp)					\
-			: "0" (__val), "m" (*__mem));			\
+			: "0" (__val), "R" (*__mem));			\
 		if (irq)						\
 			local_irq_restore(__flags);			\
 	} else								\
@@ -355,12 +359,16 @@
 			local_irq_save(__flags);			\
 		__asm__ __volatile__(					\
 			".set	mips3"		"\t\t# __readq"	"\n\t"	\
+			".set	push"				"\n\t"	\
+			".set	noreorder"			"\n\t"	\
+			".set	nomacro"			"\n\t"	\
 			"ld	%L0, %1"			"\n\t"	\
+			".set	pop"				"\n\t"	\
 			"dsra32	%M0, %L0, 0"			"\n\t"	\
 			"sll	%L0, %L0, 0"			"\n\t"	\
 			".set	mips0"				"\n"	\
 			: "=r" (__val)					\
-			: "m" (*__mem));				\
+			: "R" (*__mem));				\
 		if (irq)						\
 			local_irq_restore(__flags);			\
 	} else {							\
diff --git a/arch/mips/include/asm/mach-ar7/ar7.h b/arch/mips/include/asm/mach-ar7/ar7.h
index 7919d76..07d3fad 100644
--- a/arch/mips/include/asm/mach-ar7/ar7.h
+++ b/arch/mips/include/asm/mach-ar7/ar7.h
@@ -201,7 +201,6 @@
 }
 
 int __init ar7_gpio_init(void);
-
-int __init ar7_gpio_init(void);
+void __init ar7_init_clocks(void);
 
 #endif /* __AR7_H__ */
diff --git a/arch/mips/include/asm/mach-bcm47xx/nvram.h b/arch/mips/include/asm/mach-bcm47xx/nvram.h
index c58ebd8..9759588 100644
--- a/arch/mips/include/asm/mach-bcm47xx/nvram.h
+++ b/arch/mips/include/asm/mach-bcm47xx/nvram.h
@@ -12,6 +12,7 @@
 #define __NVRAM_H
 
 #include <linux/types.h>
+#include <linux/kernel.h>
 
 struct nvram_header {
 	u32 magic;
@@ -36,4 +37,10 @@
 
 extern int nvram_getenv(char *name, char *val, size_t val_len);
 
+static inline void nvram_parse_macaddr(char *buf, u8 *macaddr)
+{
+	sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0], &macaddr[1],
+	       &macaddr[2], &macaddr[3], &macaddr[4], &macaddr[5]);
+}
+
 #endif
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 5742bb4..5c0a357 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 2009 Qi Hardware inc.,
  * Author: Xiangfu Liu <xiangfu@qi-hardware.com>
- * Copyright 2010, Lars-Petrer Clausen <lars@metafoo.de>
+ * Copyright 2010, Lars-Peter Clausen <lars@metafoo.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 or later
@@ -235,7 +235,7 @@
 	QI_LB60_GPIO_KEYIN(3),
 	QI_LB60_GPIO_KEYIN(4),
 	QI_LB60_GPIO_KEYIN(5),
-	QI_LB60_GPIO_KEYIN(7),
+	QI_LB60_GPIO_KEYIN(6),
 	QI_LB60_GPIO_KEYIN8,
 };
 
diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c
index 95bc2b5..1cc9e54 100644
--- a/arch/mips/jz4740/platform.c
+++ b/arch/mips/jz4740/platform.c
@@ -208,7 +208,7 @@
 
 /* PCM */
 struct platform_device jz4740_pcm_device = {
-	.name		= "jz4740-pcm",
+	.name		= "jz4740-pcm-audio",
 	.id		= -1,
 };
 
diff --git a/arch/mips/jz4740/prom.c b/arch/mips/jz4740/prom.c
index cfeac15..4a70407 100644
--- a/arch/mips/jz4740/prom.c
+++ b/arch/mips/jz4740/prom.c
@@ -23,7 +23,7 @@
 #include <asm/bootinfo.h>
 #include <asm/mach-jz4740/base.h>
 
-void jz4740_init_cmdline(int argc, char *argv[])
+static __init void jz4740_init_cmdline(int argc, char *argv[])
 {
 	unsigned int count = COMMAND_LINE_SIZE - 1;
 	int i;
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 2f4d7a9..98c5a97 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -32,7 +32,7 @@
 	cnt = read_c0_count();
 	cnt += delta;
 	write_c0_compare(cnt);
-	res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0;
+	res = ((int)(read_c0_count() - cnt) >= 0) ? -ETIME : 0;
 	return res;
 }
 
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 71620e1..68dae7b 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -905,7 +905,8 @@
 {
 	decode_configs(c);
 	switch (c->processor_id & 0xff00) {
-	case PRID_IMP_BMIPS32:
+	case PRID_IMP_BMIPS32_REV4:
+	case PRID_IMP_BMIPS32_REV8:
 		c->cputype = CPU_BMIPS32;
 		__cpu_name[cpu] = "Broadcom BMIPS32";
 		break;
@@ -933,10 +934,6 @@
 		__cpu_name[cpu] = "Broadcom BMIPS5000";
 		c->options |= MIPS_CPU_ULRI;
 		break;
-	case PRID_IMP_BMIPS4KC:
-		c->cputype = CPU_4KC;
-		__cpu_name[cpu] = "MIPS 4Kc";
-		break;
 	}
 }
 
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 6343b4a..876a75c 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -251,14 +251,15 @@
 
 SYSCALL_DEFINE1(32_personality, unsigned long, personality)
 {
+	unsigned int p = personality & 0xffffffff;
 	int ret;
-	personality &= 0xffffffff;
+
 	if (personality(current->personality) == PER_LINUX32 &&
-	    personality == PER_LINUX)
-		personality = PER_LINUX32;
-	ret = sys_personality(personality);
-	if (ret == PER_LINUX32)
-		ret = PER_LINUX;
+	    personality(p) == PER_LINUX)
+		p = (p & ~PER_MASK) | PER_LINUX32;
+	ret = sys_personality(p);
+	if (ret != -1 && personality(ret) == PER_LINUX32)
+		ret = (ret & ~PER_MASK) | PER_LINUX;
 	return ret;
 }
 
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 5c7c6fc..183e0d2 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1047,6 +1047,6 @@
 
 	return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 #endif /* defined(CONFIG_CPU_MIPS32)... */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 9996094..ae167df 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -142,7 +142,6 @@
 	childregs->regs[7] = 0;	/* Clear error flag */
 
 	childregs->regs[2] = 0;	/* Child gets zero as return value */
-	regs->regs[2] = p->pid;
 
 	if (childregs->cp0_status & ST0_CU0) {
 		childregs->regs[28] = (unsigned long) ti;
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index e000b27..9dbe583 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -100,7 +100,7 @@
 		return;
 
 	base = virt_to_phys((void *)initial_boot_params);
-	size = initial_boot_params->totalsize;
+	size = be32_to_cpu(initial_boot_params->totalsize);
 
 	/* Before we do anything, lets reserve the dt blob */
 	reserve_mem_mach(base, size);
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 43e7cdc5..c0e8141 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -153,7 +153,7 @@
 {
 	extern int gic_present;
 
-	/* This is Malta specific: IPI,performance and timer inetrrupts */
+	/* This is Malta specific: IPI,performance and timer interrupts */
 	if (gic_present)
 		change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 |
 					 STATUSF_IP6 | STATUSF_IP7);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 8e9fbe7..e971043 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -83,7 +83,8 @@
 extern asmlinkage void handle_reserved(void);
 
 extern int fpu_emulator_cop1Handler(struct pt_regs *xcp,
-	struct mips_fpu_struct *ctx, int has_fpu);
+				    struct mips_fpu_struct *ctx, int has_fpu,
+				    void *__user *fault_addr);
 
 void (*board_be_init)(void);
 int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
@@ -661,12 +662,36 @@
 	force_sig_info(SIGFPE, &info, current);
 }
 
+static int process_fpemu_return(int sig, void __user *fault_addr)
+{
+	if (sig == SIGSEGV || sig == SIGBUS) {
+		struct siginfo si = {0};
+		si.si_addr = fault_addr;
+		si.si_signo = sig;
+		if (sig == SIGSEGV) {
+			if (find_vma(current->mm, (unsigned long)fault_addr))
+				si.si_code = SEGV_ACCERR;
+			else
+				si.si_code = SEGV_MAPERR;
+		} else {
+			si.si_code = BUS_ADRERR;
+		}
+		force_sig_info(sig, &si, current);
+		return 1;
+	} else if (sig) {
+		force_sig(sig, current);
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
 /*
  * XXX Delayed fp exceptions when doing a lazy ctx switch XXX
  */
 asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
 {
-	siginfo_t info;
+	siginfo_t info = {0};
 
 	if (notify_die(DIE_FP, "FP exception", regs, 0, regs_to_trapnr(regs), SIGFPE)
 	    == NOTIFY_STOP)
@@ -675,6 +700,7 @@
 
 	if (fcr31 & FPU_CSR_UNI_X) {
 		int sig;
+		void __user *fault_addr = NULL;
 
 		/*
 		 * Unimplemented operation exception.  If we've got the full
@@ -690,7 +716,8 @@
 		lose_fpu(1);
 
 		/* Run the emulator */
-		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1);
+		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
+					       &fault_addr);
 
 		/*
 		 * We can't allow the emulated instruction to leave any of
@@ -702,8 +729,7 @@
 		own_fpu(1);	/* Using the FPU again.  */
 
 		/* If something went wrong, signal */
-		if (sig)
-			force_sig(sig, current);
+		process_fpemu_return(sig, fault_addr);
 
 		return;
 	} else if (fcr31 & FPU_CSR_INV_X)
@@ -996,11 +1022,11 @@
 
 		if (!raw_cpu_has_fpu) {
 			int sig;
+			void __user *fault_addr = NULL;
 			sig = fpu_emulator_cop1Handler(regs,
-						&current->thread.fpu, 0);
-			if (sig)
-				force_sig(sig, current);
-			else
+						       &current->thread.fpu,
+						       0, &fault_addr);
+			if (!process_fpemu_return(sig, fault_addr))
 				mt_ase_fp_affinity();
 		}
 
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 3eb3cde..6a1fdfe 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -1092,6 +1092,10 @@
 
 	/* this of-course trashes what was there before... */
 	v->pbuffer = vmalloc(P_SIZE);
+	if (!v->pbuffer) {
+		pr_warning("VPE loader: unable to allocate memory\n");
+		return -ENOMEM;
+	}
 	v->plen = P_SIZE;
 	v->load_addr = NULL;
 	v->len = 0;
@@ -1149,10 +1153,9 @@
 	if (ret < 0)
 		v->shared_ptr = NULL;
 
-	// cleanup any temp buffers
-	if (v->pbuffer)
-		vfree(v->pbuffer);
+	vfree(v->pbuffer);
 	v->plen = 0;
+
 	return ret;
 }
 
@@ -1169,11 +1172,6 @@
 	if (v == NULL)
 		return -ENODEV;
 
-	if (v->pbuffer == NULL) {
-		printk(KERN_ERR "VPE loader: no buffer for program\n");
-		return -ENOMEM;
-	}
-
 	if ((count + v->len) > v->plen) {
 		printk(KERN_WARNING
 		       "VPE loader: elf size too big. Perhaps strip uneeded symbols\n");
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 77dc3b2..606c8a9 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -161,16 +161,16 @@
 
 .Lfwd_fixup:
 	PTR_L		t0, TI_TASK($28)
-	LONG_L		t0, THREAD_BUADDR(t0)
 	andi		a2, 0x3f
+	LONG_L		t0, THREAD_BUADDR(t0)
 	LONG_ADDU	a2, t1
 	jr		ra
 	 LONG_SUBU	a2, t0
 
 .Lpartial_fixup:
 	PTR_L		t0, TI_TASK($28)
-	LONG_L		t0, THREAD_BUADDR(t0)
 	andi		a2, LONGMASK
+	LONG_L		t0, THREAD_BUADDR(t0)
 	LONG_ADDU	a2, t1
 	jr		ra
 	 LONG_SUBU	a2, t0
diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c
index ae4cff9..11b193f 100644
--- a/arch/mips/loongson/common/env.c
+++ b/arch/mips/loongson/common/env.c
@@ -29,9 +29,9 @@
 
 #define parse_even_earlier(res, option, p)				\
 do {									\
+	int ret;							\
 	if (strncmp(option, (char *)p, strlen(option)) == 0)		\
-			strict_strtol((char *)p + strlen(option"="),	\
-					10, &res);			\
+		ret = strict_strtol((char *)p + strlen(option"="), 10, &res); \
 } while (0)
 
 void __init prom_init_env(void)
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index b2ad1b0..d32cb05 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -64,7 +64,7 @@
 
 #if __mips >= 4 && __mips != 32
 static int fpux_emu(struct pt_regs *,
-	struct mips_fpu_struct *, mips_instruction);
+	struct mips_fpu_struct *, mips_instruction, void *__user *);
 #endif
 
 /* Further private data for which no space exists in mips_fpu_struct */
@@ -208,16 +208,23 @@
  * Two instructions if the instruction is in a branch delay slot.
  */
 
-static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx)
+static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
+		       void *__user *fault_addr)
 {
 	mips_instruction ir;
 	unsigned long emulpc, contpc;
 	unsigned int cond;
 
-	if (get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) {
+	if (!access_ok(VERIFY_READ, xcp->cp0_epc, sizeof(mips_instruction))) {
 		MIPS_FPU_EMU_INC_STATS(errors);
+		*fault_addr = (mips_instruction __user *)xcp->cp0_epc;
 		return SIGBUS;
 	}
+	if (__get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) {
+		MIPS_FPU_EMU_INC_STATS(errors);
+		*fault_addr = (mips_instruction __user *)xcp->cp0_epc;
+		return SIGSEGV;
+	}
 
 	/* XXX NEC Vr54xx bug workaround */
 	if ((xcp->cp0_cause & CAUSEF_BD) && !isBranchInstr(&ir))
@@ -245,10 +252,16 @@
 #endif
 			return SIGILL;
 		}
-		if (get_user(ir, (mips_instruction __user *) emulpc)) {
+		if (!access_ok(VERIFY_READ, emulpc, sizeof(mips_instruction))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = (mips_instruction __user *)emulpc;
 			return SIGBUS;
 		}
+		if (__get_user(ir, (mips_instruction __user *) emulpc)) {
+			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = (mips_instruction __user *)emulpc;
+			return SIGSEGV;
+		}
 		/* __compute_return_epc() will have updated cp0_epc */
 		contpc = xcp->cp0_epc;
 		/* In order not to confuse ptrace() et al, tweak context */
@@ -269,10 +282,17 @@
 		u64 val;
 
 		MIPS_FPU_EMU_INC_STATS(loads);
-		if (get_user(val, va)) {
+
+		if (!access_ok(VERIFY_READ, va, sizeof(u64))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = va;
 			return SIGBUS;
 		}
+		if (__get_user(val, va)) {
+			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = va;
+			return SIGSEGV;
+		}
 		DITOREG(val, MIPSInst_RT(ir));
 		break;
 	}
@@ -284,10 +304,16 @@
 
 		MIPS_FPU_EMU_INC_STATS(stores);
 		DIFROMREG(val, MIPSInst_RT(ir));
-		if (put_user(val, va)) {
+		if (!access_ok(VERIFY_WRITE, va, sizeof(u64))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = va;
 			return SIGBUS;
 		}
+		if (__put_user(val, va)) {
+			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = va;
+			return SIGSEGV;
+		}
 		break;
 	}
 
@@ -297,10 +323,16 @@
 		u32 val;
 
 		MIPS_FPU_EMU_INC_STATS(loads);
-		if (get_user(val, va)) {
+		if (!access_ok(VERIFY_READ, va, sizeof(u32))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = va;
 			return SIGBUS;
 		}
+		if (__get_user(val, va)) {
+			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = va;
+			return SIGSEGV;
+		}
 		SITOREG(val, MIPSInst_RT(ir));
 		break;
 	}
@@ -312,10 +344,16 @@
 
 		MIPS_FPU_EMU_INC_STATS(stores);
 		SIFROMREG(val, MIPSInst_RT(ir));
-		if (put_user(val, va)) {
+		if (!access_ok(VERIFY_WRITE, va, sizeof(u32))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = va;
 			return SIGBUS;
 		}
+		if (__put_user(val, va)) {
+			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = va;
+			return SIGSEGV;
+		}
 		break;
 	}
 
@@ -440,10 +478,17 @@
 				contpc = (xcp->cp0_epc +
 					(MIPSInst_SIMM(ir) << 2));
 
-				if (get_user(ir,
+				if (!access_ok(VERIFY_READ, xcp->cp0_epc,
+					       sizeof(mips_instruction))) {
+					MIPS_FPU_EMU_INC_STATS(errors);
+					*fault_addr = (mips_instruction __user *)xcp->cp0_epc;
+					return SIGBUS;
+				}
+				if (__get_user(ir,
 				    (mips_instruction __user *) xcp->cp0_epc)) {
 					MIPS_FPU_EMU_INC_STATS(errors);
-					return SIGBUS;
+					*fault_addr = (mips_instruction __user *)xcp->cp0_epc;
+					return SIGSEGV;
 				}
 
 				switch (MIPSInst_OPCODE(ir)) {
@@ -506,9 +551,8 @@
 
 #if __mips >= 4 && __mips != 32
 	case cop1x_op:{
-		int sig;
-
-		if ((sig = fpux_emu(xcp, ctx, ir)))
+		int sig = fpux_emu(xcp, ctx, ir, fault_addr);
+		if (sig)
 			return sig;
 		break;
 	}
@@ -604,7 +648,7 @@
 DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg);
 
 static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
-	mips_instruction ir)
+	mips_instruction ir, void *__user *fault_addr)
 {
 	unsigned rcsr = 0;	/* resulting csr */
 
@@ -624,10 +668,16 @@
 				xcp->regs[MIPSInst_FT(ir)]);
 
 			MIPS_FPU_EMU_INC_STATS(loads);
-			if (get_user(val, va)) {
+			if (!access_ok(VERIFY_READ, va, sizeof(u32))) {
 				MIPS_FPU_EMU_INC_STATS(errors);
+				*fault_addr = va;
 				return SIGBUS;
 			}
+			if (__get_user(val, va)) {
+				MIPS_FPU_EMU_INC_STATS(errors);
+				*fault_addr = va;
+				return SIGSEGV;
+			}
 			SITOREG(val, MIPSInst_FD(ir));
 			break;
 
@@ -638,9 +688,15 @@
 			MIPS_FPU_EMU_INC_STATS(stores);
 
 			SIFROMREG(val, MIPSInst_FS(ir));
+			if (!access_ok(VERIFY_WRITE, va, sizeof(u32))) {
+				MIPS_FPU_EMU_INC_STATS(errors);
+				*fault_addr = va;
+				return SIGBUS;
+			}
 			if (put_user(val, va)) {
 				MIPS_FPU_EMU_INC_STATS(errors);
-				return SIGBUS;
+				*fault_addr = va;
+				return SIGSEGV;
 			}
 			break;
 
@@ -701,10 +757,16 @@
 				xcp->regs[MIPSInst_FT(ir)]);
 
 			MIPS_FPU_EMU_INC_STATS(loads);
-			if (get_user(val, va)) {
+			if (!access_ok(VERIFY_READ, va, sizeof(u64))) {
 				MIPS_FPU_EMU_INC_STATS(errors);
+				*fault_addr = va;
 				return SIGBUS;
 			}
+			if (__get_user(val, va)) {
+				MIPS_FPU_EMU_INC_STATS(errors);
+				*fault_addr = va;
+				return SIGSEGV;
+			}
 			DITOREG(val, MIPSInst_FD(ir));
 			break;
 
@@ -714,10 +776,16 @@
 
 			MIPS_FPU_EMU_INC_STATS(stores);
 			DIFROMREG(val, MIPSInst_FS(ir));
-			if (put_user(val, va)) {
+			if (!access_ok(VERIFY_WRITE, va, sizeof(u64))) {
 				MIPS_FPU_EMU_INC_STATS(errors);
+				*fault_addr = va;
 				return SIGBUS;
 			}
+			if (__put_user(val, va)) {
+				MIPS_FPU_EMU_INC_STATS(errors);
+				*fault_addr = va;
+				return SIGSEGV;
+			}
 			break;
 
 		case madd_d_op:
@@ -1242,7 +1310,7 @@
 }
 
 int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
-	int has_fpu)
+	int has_fpu, void *__user *fault_addr)
 {
 	unsigned long oldepc, prevepc;
 	mips_instruction insn;
@@ -1252,10 +1320,16 @@
 	do {
 		prevepc = xcp->cp0_epc;
 
-		if (get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) {
+		if (!access_ok(VERIFY_READ, xcp->cp0_epc, sizeof(mips_instruction))) {
 			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = (mips_instruction __user *)xcp->cp0_epc;
 			return SIGBUS;
 		}
+		if (__get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) {
+			MIPS_FPU_EMU_INC_STATS(errors);
+			*fault_addr = (mips_instruction __user *)xcp->cp0_epc;
+			return SIGSEGV;
+		}
 		if (insn == 0)
 			xcp->cp0_epc += 4;	/* skip nops */
 		else {
@@ -1267,7 +1341,7 @@
 			 */
 			/* convert to ieee library modes */
 			ieee754_csr.rm = ieee_rm[ieee754_csr.rm];
-			sig = cop1Emulate(xcp, ctx);
+			sig = cop1Emulate(xcp, ctx, fault_addr);
 			/* revert to mips rounding mode */
 			ieee754_csr.rm = mips_rm[ieee754_csr.rm];
 		}
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index 4fc1a0f..21ea14e 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -288,7 +288,7 @@
 	return plat_dma_supported(dev, mask);
 }
 
-void mips_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 			 enum dma_data_direction direction)
 {
 	BUG_ON(direction == DMA_NONE);
@@ -298,6 +298,8 @@
 		__dma_sync((unsigned long)vaddr, size, direction);
 }
 
+EXPORT_SYMBOL(dma_cache_sync);
+
 static struct dma_map_ops mips_default_dma_map_ops = {
 	.alloc_coherent = mips_dma_alloc_coherent,
 	.free_coherent = mips_dma_free_coherent,
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 505feca..9cca8de 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -68,6 +68,9 @@
  */
 static inline int mips_sc_is_activated(struct cpuinfo_mips *c)
 {
+	unsigned int config2 = read_c0_config2();
+	unsigned int tmp;
+
 	/* Check the bypass bit (L2B) */
 	switch (c->cputype) {
 	case CPU_34K:
@@ -83,6 +86,7 @@
 		c->scache.linesz = 2 << tmp;
 	else
 		return 0;
+	return 1;
 }
 
 static inline int __init mips_sc_probe(void)
diff --git a/arch/mips/pmc-sierra/yosemite/py-console.c b/arch/mips/pmc-sierra/yosemite/py-console.c
index b7f1d9c..434d7b1 100644
--- a/arch/mips/pmc-sierra/yosemite/py-console.c
+++ b/arch/mips/pmc-sierra/yosemite/py-console.c
@@ -65,11 +65,15 @@
 
 	__asm__ __volatile__ (
 	"	.set	mips3		\n"
+	"	.set	push		\n"
+	"	.set	noreorder	\n"
+	"	.set	nomacro		\n"
 	"	ld	%0, %1		\n"
+	"	.set	pop		\n"
 	"	lbu	%0, (%0)	\n"
 	"	.set	mips0		\n"
 	: "=r" (res)
-	: "m" (vaddr));
+	: "R" (vaddr));
 
 	write_c0_status(sr);
 	ssnop_4();
@@ -89,11 +93,15 @@
 
 	__asm__ __volatile__ (
 	"	.set	mips3		\n"
+	"	.set	push		\n"
+	"	.set	noreorder	\n"
+	"	.set	nomacro		\n"
 	"	ld	%0, %1		\n"
+	"	.set	pop		\n"
 	"	sb	%2, (%0)	\n"
 	"	.set	mips0		\n"
 	: "=&r" (tmp)
-	: "m" (vaddr), "r" (c));
+	: "R" (vaddr), "r" (c));
 
 	write_c0_status(sr);
 	ssnop_4();
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index c308989..41707a2 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -82,7 +82,7 @@
 enum swarm_rtc_type {
 	RTC_NONE,
 	RTC_XICOR,
-	RTC_M4LT81
+	RTC_M41T81,
 };
 
 enum swarm_rtc_type swarm_rtc_type;
@@ -96,7 +96,7 @@
 		sec = xicor_get_time();
 		break;
 
-	case RTC_M4LT81:
+	case RTC_M41T81:
 		sec = m41t81_get_time();
 		break;
 
@@ -115,7 +115,7 @@
 	case RTC_XICOR:
 		return xicor_set_time(sec);
 
-	case RTC_M4LT81:
+	case RTC_M41T81:
 		return m41t81_set_time(sec);
 
 	case RTC_NONE:
@@ -141,7 +141,7 @@
 	if (xicor_probe())
 		swarm_rtc_type = RTC_XICOR;
 	if (m41t81_probe())
-		swarm_rtc_type = RTC_M4LT81;
+		swarm_rtc_type = RTC_M41T81;
 
 #ifdef CONFIG_VT
 	screen_info = (struct screen_info) {
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index f860a34..75da468 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -40,21 +40,17 @@
 		unsigned long long ll;
 		unsigned l[2];
 	} tsc64, result;
-	unsigned long tsc, tmp;
+	unsigned long tmp;
 	unsigned product[3]; /* 96-bit intermediate value */
 
 	/* cnt32_to_63() is not safe with preemption */
 	preempt_disable();
 
-	/* read the TSC value
-	 */
-	tsc = get_cycles();
-
-	/* expand to 64-bits.
+	/* expand the tsc to 64-bits.
 	 * - sched_clock() must be called once a minute or better or the
 	 *   following will go horribly wrong - see cnt32_to_63()
 	 */
-	tsc64.ll = cnt32_to_63(tsc) & 0x7fffffffffffffffULL;
+	tsc64.ll = cnt32_to_63(get_cycles()) & 0x7fffffffffffffffULL;
 
 	preempt_enable();
 
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c
index 7c07de0..b150b51 100644
--- a/arch/powerpc/kernel/e500-pmu.c
+++ b/arch/powerpc/kernel/e500-pmu.c
@@ -126,4 +126,4 @@
 	return register_fsl_emb_pmu(&e500_pmu);
 }
 
-arch_initcall(init_e500_pmu);
+early_initcall(init_e500_pmu);
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index 09d7202..2cc5e03 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -414,4 +414,4 @@
 	return register_power_pmu(&mpc7450_pmu);
 }
 
-arch_initcall(init_mpc7450_pmu);
+early_initcall(init_mpc7450_pmu);
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 3129c85..5674807 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1379,7 +1379,7 @@
 		freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
-	perf_pmu_register(&power_pmu);
+	perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
 	perf_cpu_notifier(power_pmu_notifier);
 
 	return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 7ecca59..4dcf5f8 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -681,7 +681,7 @@
 	pr_info("%s performance monitor hardware support registered\n",
 		pmu->name);
 
-	perf_pmu_register(&fsl_emb_pmu);
+	perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 2a361cd..ead8b3c 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -613,4 +613,4 @@
 	return register_power_pmu(&power4_pmu);
 }
 
-arch_initcall(init_power4_pmu);
+early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 199de52..eca0ac5 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -682,4 +682,4 @@
 	return register_power_pmu(&power5p_pmu);
 }
 
-arch_initcall(init_power5p_pmu);
+early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 98b6a72..d5ff0f6 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -621,4 +621,4 @@
 	return register_power_pmu(&power5_pmu);
 }
 
-arch_initcall(init_power5_pmu);
+early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 84a607b..3160392 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -544,4 +544,4 @@
 	return register_power_pmu(&power6_pmu);
 }
 
-arch_initcall(init_power6_pmu);
+early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 852f7b7..593740f 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -369,4 +369,4 @@
 	return register_power_pmu(&power7_pmu);
 }
 
-arch_initcall(init_power7_pmu);
+early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 3fee685..9a6e093 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -494,4 +494,4 @@
 	return register_power_pmu(&ppc970_pmu);
 }
 
-arch_initcall(init_ppc970_pmu);
+early_initcall(init_ppc970_pmu);
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c
index dbf3b4b..748955d 100644
--- a/arch/sh/kernel/cpu/sh4/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -250,4 +250,4 @@
 
 	return register_sh_pmu(&sh7750_pmu);
 }
-arch_initcall(sh7750_pmu_init);
+early_initcall(sh7750_pmu_init);
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
index 5802765..17e6beb 100644
--- a/arch/sh/kernel/cpu/sh4a/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -284,4 +284,4 @@
 
 	return register_sh_pmu(&sh4a_pmu);
 }
-arch_initcall(sh4a_pmu_init);
+early_initcall(sh4a_pmu_init);
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 5a4b334..2ee21a4 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -389,7 +389,7 @@
 
 	WARN_ON(_pmu->num_events > MAX_HWEVENTS);
 
-	perf_pmu_register(&pmu);
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 	perf_cpu_notifier(sh_pmu_notifier);
 	return 0;
 }
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 6e8bfa1..4d3dbe3 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -4,8 +4,6 @@
 #ifdef CONFIG_PERF_EVENTS
 #include <asm/ptrace.h>
 
-extern void init_hw_perf_events(void);
-
 #define perf_arch_fetch_caller_regs(regs, ip)		\
 do {							\
 	unsigned long _pstate, _asi, _pil, _i7, _fp;	\
@@ -26,8 +24,6 @@
 	(regs)->u_regs[UREG_I6] = _fp;			\
 	(regs)->u_regs[UREG_I7] = _i7;			\
 } while (0)
-#else
-static inline void init_hw_perf_events(void)	{ }
 #endif
 
 #endif
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a4bd7ba..300f810 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -270,8 +270,6 @@
 			atomic_set(&nmi_active, -1);
 		}
 	}
-	if (!err)
-		init_hw_perf_events();
 
 	return err;
 }
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 0d6deb5..7605786 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1307,20 +1307,23 @@
 	return false;
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
 	pr_info("Performance events: ");
 
 	if (!supported_pmu()) {
 		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
-		return;
+		return 0;
 	}
 
 	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
-	perf_pmu_register(&pmu);
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 	register_die_notifier(&perf_event_nmi_notifier);
+
+	return 0;
 }
+early_initcall(init_hw_perf_events);
 
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
 			   struct pt_regs *regs)
diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
index c1ee1d6..81d92a4 100644
--- a/arch/tile/include/asm/signal.h
+++ b/arch/tile/include/asm/signal.h
@@ -25,7 +25,7 @@
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 struct pt_regs;
-int restore_sigcontext(struct pt_regs *, struct sigcontext __user *, long *);
+int restore_sigcontext(struct pt_regs *, struct sigcontext __user *);
 int setup_sigcontext(struct sigcontext __user *, struct pt_regs *);
 void do_signal(struct pt_regs *regs);
 #endif
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index 543d6a3..dbb0dfc 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -290,12 +290,12 @@
 	return ret;
 }
 
+/* The assembly shim for this function arranges to ignore the return value. */
 long compat_sys_rt_sigreturn(struct pt_regs *regs)
 {
 	struct compat_rt_sigframe __user *frame =
 		(struct compat_rt_sigframe __user *) compat_ptr(regs->sp);
 	sigset_t set;
-	long r0;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -308,13 +308,13 @@
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
 	if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0)
 		goto badframe;
 
-	return r0;
+	return 0;
 
 badframe:
 	force_sig(SIGSEGV, current);
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index f582162..5eed4a0 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -1342,8 +1342,8 @@
 	lw      r20, r20
 
 	/* Jump to syscall handler. */
-	jalr    r20; .Lhandle_syscall_link:
-	FEEDBACK_REENTER(handle_syscall)
+	jalr    r20
+.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */
 
 	/*
 	 * Write our r0 onto the stack so it gets restored instead
@@ -1352,6 +1352,9 @@
 	PTREGS_PTR(r29, PTREGS_OFFSET_REG(0))
 	sw      r29, r0
 
+.Lsyscall_sigreturn_skip:
+	FEEDBACK_REENTER(handle_syscall)
+
 	/* Do syscall trace again, if requested. */
 	lw	r30, r31
 	andi    r30, r30, _TIF_SYSCALL_TRACE
@@ -1536,9 +1539,24 @@
 	};                                              \
 	STD_ENDPROC(_##x)
 
+/*
+ * Special-case sigreturn to not write r0 to the stack on return.
+ * This is technically more efficient, but it also avoids difficulties
+ * in the 64-bit OS when handling 32-bit compat code, since we must not
+ * sign-extend r0 for the sigreturn return-value case.
+ */
+#define PTREGS_SYSCALL_SIGRETURN(x, reg)                \
+	STD_ENTRY(_##x);                                \
+	addli   lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \
+	{                                               \
+	 PTREGS_PTR(reg, PTREGS_OFFSET_BASE);           \
+	 j      x                                       \
+	};                                              \
+	STD_ENDPROC(_##x)
+
 PTREGS_SYSCALL(sys_execve, r3)
 PTREGS_SYSCALL(sys_sigaltstack, r2)
-PTREGS_SYSCALL(sys_rt_sigreturn, r0)
+PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
 PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1)
 
 /* Save additional callee-saves to pt_regs, put address in r4 and jump. */
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 8430f45..e90eb53 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -212,6 +212,13 @@
 	childregs->sp = sp;  /* override with new user stack pointer */
 
 	/*
+	 * If CLONE_SETTLS is set, set "tp" in the new task to "r4",
+	 * which is passed in as arg #5 to sys_clone().
+	 */
+	if (clone_flags & CLONE_SETTLS)
+		childregs->tp = regs->regs[4];
+
+	/*
 	 * Copy the callee-saved registers from the passed pt_regs struct
 	 * into the context-switch callee-saved registers area.
 	 * This way when we start the interrupt-return sequence, the
@@ -539,6 +546,7 @@
 	return __switch_to(prev, next, next_current_ksp0(next));
 }
 
+/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */
 SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 		void __user *, parent_tidptr, void __user *, child_tidptr,
 		struct pt_regs *, regs)
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 757407e..1260321 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -52,7 +52,7 @@
  */
 
 int restore_sigcontext(struct pt_regs *regs,
-		       struct sigcontext __user *sc, long *pr0)
+		       struct sigcontext __user *sc)
 {
 	int err = 0;
 	int i;
@@ -75,17 +75,15 @@
 
 	regs->faultnum = INT_SWINT_1_SIGRETURN;
 
-	err |= __get_user(*pr0, &sc->gregs[0]);
 	return err;
 }
 
-/* sigreturn() returns long since it restores r0 in the interrupted code. */
+/* The assembly shim for this function arranges to ignore the return value. */
 SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs)
 {
 	struct rt_sigframe __user *frame =
 		(struct rt_sigframe __user *)(regs->sp);
 	sigset_t set;
-	long r0;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -98,13 +96,13 @@
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
 	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
 		goto badframe;
 
-	return r0;
+	return 0;
 
 badframe:
 	force_sig(SIGSEGV, current);
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 23f315c..325c052 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -355,7 +355,7 @@
 	if (heap > 0x3fffffffffffUL)
 		error("Destination address too large");
 #else
-	if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
+	if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
 		error("Destination address too large");
 #endif
 #ifndef CONFIG_RELOCATABLE
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 76561d2..4a2adaa 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -180,8 +180,15 @@
  * On the local CPU you need to be protected again NMI or MCE handlers seeing an
  * inconsistent instruction while you patch.
  */
+struct text_poke_param {
+	void *addr;
+	const void *opcode;
+	size_t len;
+};
+
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
+extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 #define IDEAL_NOP_SIZE_5 5
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 5be1542..e99d55d 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -72,6 +72,9 @@
 #define BIOS_BEGIN		0x000a0000
 #define BIOS_END		0x00100000
 
+#define BIOS_ROM_BASE		0xffe00000
+#define BIOS_ROM_END		0xffffffff
+
 #ifdef __KERNEL__
 /* see comment in arch/x86/kernel/e820.c */
 extern struct e820map e820;
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 13b0eba..ba870bb 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -15,10 +15,6 @@
 	return ((irq == 2) ? 9 : irq);
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-# define ARCH_HAS_NMI_WATCHDOG
-#endif
-
 #ifdef CONFIG_X86_32
 extern void irq_ctx_init(int cpu);
 #else
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca8..f23eb25 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -28,7 +28,7 @@
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_registers(struct pt_regs *regs);
 extern void show_trace(struct task_struct *t, struct pt_regs *regs,
-		       unsigned long *sp, unsigned long bp);
+		       unsigned long *sp);
 extern void __show_regs(struct pt_regs *regs, int all);
 extern void show_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e6fe39..f702f82 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,7 +79,7 @@
 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
 #define KVM_MIN_FREE_MMU_PAGES 5
 #define KVM_REFILL_PAGES 25
-#define KVM_MAX_CPUID_ENTRIES 40
+#define KVM_MAX_CPUID_ENTRIES 80
 #define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f8..c4021b9 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -5,41 +5,15 @@
 #include <asm/irq.h>
 #include <asm/io.h>
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
-
-/**
- * do_nmi_callback
- *
- * Check to see if a callback exists and execute it.  Return 1
- * if the handler exists and was handled successfully.
- */
-int do_nmi_callback(struct pt_regs *regs, int cpu);
+#ifdef CONFIG_X86_LOCAL_APIC
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
-extern int check_nmi_watchdog(void);
-#if !defined(CONFIG_LOCKUP_DETECTOR)
-extern int nmi_watchdog_enabled;
-#endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
 extern int reserve_evntsel_nmi(unsigned int);
 extern void release_evntsel_nmi(unsigned int);
 
-extern void setup_apic_nmi_watchdog(void *);
-extern void stop_apic_nmi_watchdog(void *);
-extern void disable_timer_nmi_watchdog(void);
-extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
-extern void cpu_nmi_set_wd_enabled(void);
-
-extern atomic_t nmi_active;
-extern unsigned int nmi_watchdog;
-#define NMI_NONE	0
-#define NMI_IO_APIC	1
-#define NMI_LOCAL_APIC	2
-#define NMI_INVALID	3
-
 struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
@@ -47,33 +21,8 @@
 
 void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
-
-static inline void localise_nmi_watchdog(void)
-{
-	if (nmi_watchdog == NMI_IO_APIC)
-		nmi_watchdog = NMI_LOCAL_APIC;
-}
-
-/* check if nmi_watchdog is active (ie was specified at boot) */
-static inline int nmi_watchdog_active(void)
-{
-	/*
-	 * actually it should be:
-	 * 	return (nmi_watchdog == NMI_LOCAL_APIC ||
-	 * 		nmi_watchdog == NMI_IO_APIC)
-	 * but since they are power of two we could use a
-	 * cheaper way --cvg
-	 */
-	return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
-}
 #endif
 
-void lapic_watchdog_stop(void);
-int lapic_watchdog_init(unsigned nmi_hz);
-int lapic_wd_event(unsigned nmi_hz);
-unsigned lapic_adjust_nmi_hz(unsigned hz);
-void disable_lapic_nmi_watchdog(void);
-void enable_lapic_nmi_watchdog(void);
 void stop_nmi(void);
 void restart_nmi(void);
 
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 550e26b..d9d4dae3 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -125,7 +125,6 @@
 #define IBS_OP_MAX_CNT_EXT	0x007FFFFFULL	/* not a register bit mask */
 
 #ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
 extern void perf_events_lapic_init(void);
 
 #define PERF_EVENT_INDEX_OFFSET			0
@@ -156,7 +155,6 @@
 }
 
 #else
-static inline void init_hw_perf_events(void)		{ }
 static inline void perf_events_lapic_init(void)	{ }
 #endif
 
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index a70cd21..295e2ff 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -744,14 +744,6 @@
 };
 
 /*
- * P4 PEBS specifics (Replay Event only)
- *
- * Format (bits):
- *   0-6: metric from P4_PEBS_METRIC enum
- *    7 : reserved
- *    8 : reserved
- * 9-11 : reserved
- *
  * Note we have UOP and PEBS bits reserved for now
  * just in case if we will need them once
  */
@@ -788,5 +780,60 @@
 	P4_PEBS_METRIC__max
 };
 
+/*
+ * Notes on internal configuration of ESCR+CCCR tuples
+ *
+ * Since P4 has quite the different architecture of
+ * performance registers in compare with "architectural"
+ * once and we have on 64 bits to keep configuration
+ * of performance event, the following trick is used.
+ *
+ * 1) Since both ESCR and CCCR registers have only low
+ *    32 bits valuable, we pack them into a single 64 bit
+ *    configuration. Low 32 bits of such config correspond
+ *    to low 32 bits of CCCR register and high 32 bits
+ *    correspond to low 32 bits of ESCR register.
+ *
+ * 2) The meaning of every bit of such config field can
+ *    be found in Intel SDM but it should be noted that
+ *    we "borrow" some reserved bits for own usage and
+ *    clean them or set to a proper value when we do
+ *    a real write to hardware registers.
+ *
+ * 3) The format of bits of config is the following
+ *    and should be either 0 or set to some predefined
+ *    values:
+ *
+ *    Low 32 bits
+ *    -----------
+ *      0-6: P4_PEBS_METRIC enum
+ *     7-11:                    reserved
+ *       12:                    reserved (Enable)
+ *    13-15:                    reserved (ESCR select)
+ *    16-17: Active Thread
+ *       18: Compare
+ *       19: Complement
+ *    20-23: Threshold
+ *       24: Edge
+ *       25:                    reserved (FORCE_OVF)
+ *       26:                    reserved (OVF_PMI_T0)
+ *       27:                    reserved (OVF_PMI_T1)
+ *    28-29:                    reserved
+ *       30:                    reserved (Cascade)
+ *       31:                    reserved (OVF)
+ *
+ *    High 32 bits
+ *    ------------
+ *        0:                    reserved (T1_USR)
+ *        1:                    reserved (T1_OS)
+ *        2:                    reserved (T0_USR)
+ *        3:                    reserved (T0_OS)
+ *        4: Tag Enable
+ *      5-8: Tag Value
+ *     9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
+ *    25-30: enum P4_EVENTS
+ *       31:                    reserved (HT thread)
+ */
+
 #endif /* PERF_EVENT_P4_H */
 
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 1def601..6c22bf3 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -48,7 +48,6 @@
 		setup_IO_APIC();
 	else {
 		nr_ioapics = 0;
-		localise_nmi_watchdog();
 	}
 #endif
 }
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 2b16a2a..52b5c7e 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -7,6 +7,7 @@
 #define _ASM_X86_STACKTRACE_H
 
 #include <linux/uaccess.h>
+#include <linux/ptrace.h>
 
 extern int kstack_depth_to_print;
 
@@ -46,7 +47,7 @@
 };
 
 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp,
+		unsigned long *stack,
 		const struct stacktrace_ops *ops, void *data);
 
 #ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@
 #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
 #endif
 
+#ifdef CONFIG_FRAME_POINTER
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+	unsigned long bp;
+
+	if (regs)
+		return regs->bp;
+
+	if (task == current) {
+		/* Grab bp right from our regs */
+		get_bp(bp);
+		return bp;
+	}
+
+	/* bp is the last reg pushed by switch_to */
+	return *(unsigned long *)task->thread.sp;
+}
+#else
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+	return 0;
+}
+#endif
+
 extern void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp, char *log_lvl);
+		   unsigned long *stack, char *log_lvl);
 
 extern void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *sp, unsigned long bp, char *log_lvl);
+		   unsigned long *sp, char *log_lvl);
 
 extern unsigned int code_bytes;
 
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 5469630..fa7b917 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -10,12 +10,6 @@
 unsigned long long native_sched_clock(void);
 extern int recalibrate_cpu_khz(void);
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-extern int timer_ack;
-#else
-# define timer_ack (0)
-#endif
-
 extern int no_timer_check;
 
 /* Accelerators for sched_clock()
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9e13763..1e99475 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -45,6 +45,7 @@
 obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y			+= tsc.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
+obj-y			+= resource.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24..553d0b0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -591,17 +591,21 @@
 static int wrote_text;
 
 struct text_poke_params {
-	void *addr;
-	const void *opcode;
-	size_t len;
+	struct text_poke_param *params;
+	int nparams;
 };
 
 static int __kprobes stop_machine_text_poke(void *data)
 {
 	struct text_poke_params *tpp = data;
+	struct text_poke_param *p;
+	int i;
 
 	if (atomic_dec_and_test(&stop_machine_first)) {
-		text_poke(tpp->addr, tpp->opcode, tpp->len);
+		for (i = 0; i < tpp->nparams; i++) {
+			p = &tpp->params[i];
+			text_poke(p->addr, p->opcode, p->len);
+		}
 		smp_wmb();	/* Make sure other cpus see that this has run */
 		wrote_text = 1;
 	} else {
@@ -610,8 +614,12 @@
 		smp_mb();	/* Load wrote_text before following execution */
 	}
 
-	flush_icache_range((unsigned long)tpp->addr,
-			   (unsigned long)tpp->addr + tpp->len);
+	for (i = 0; i < tpp->nparams; i++) {
+		p = &tpp->params[i];
+		flush_icache_range((unsigned long)p->addr,
+				   (unsigned long)p->addr + p->len);
+	}
+
 	return 0;
 }
 
@@ -631,10 +639,13 @@
 void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
 {
 	struct text_poke_params tpp;
+	struct text_poke_param p;
 
-	tpp.addr = addr;
-	tpp.opcode = opcode;
-	tpp.len = len;
+	p.addr = addr;
+	p.opcode = opcode;
+	p.len = len;
+	tpp.params = &p;
+	tpp.nparams = 1;
 	atomic_set(&stop_machine_first, 1);
 	wrote_text = 0;
 	/* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +653,26 @@
 	return addr;
 }
 
+/**
+ * text_poke_smp_batch - Update instructions on a live kernel on SMP
+ * @params: an array of text_poke parameters
+ * @n: the number of elements in params.
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. Since the
+ * stop_machine() is heavy task, it is better to aggregate text_poke requests
+ * and do it once if possible.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+{
+	struct text_poke_params tpp = {.params = params, .nparams = n};
+
+	atomic_set(&stop_machine_first, 1);
+	wrote_text = 0;
+	stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+}
+
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b..3966b56 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
 #
 
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o probe_$(BITS).o ipi.o
-ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
-obj-$(CONFIG_X86_LOCAL_APIC)	+= nmi.o
-endif
-obj-$(CONFIG_HARDLOCKUP_DETECTOR)	+= hw_nmi.o
+obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_SMP)		+= ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3f838d5..fb76578 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/dmi.h>
-#include <linux/nmi.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
 
@@ -799,11 +798,7 @@
 	 * PIT/HPET going.  Otherwise register lapic as a dummy
 	 * device.
 	 */
-	if (nmi_watchdog != NMI_IO_APIC)
-		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-	else
-		pr_warning("APIC timer registered as dummy,"
-			" due to nmi_watchdog=%d!\n", nmi_watchdog);
+	lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 
 	/* Setup the lapic or request the broadcast */
 	setup_APIC_timer();
@@ -1387,8 +1382,15 @@
 	}
 #endif
 
-	setup_apic_nmi_watchdog(NULL);
 	apic_pm_activate();
+
+	/*
+	 * Now that local APIC setup is completed for BP, configure the fault
+	 * handling for interrupt remapping.
+	 */
+	if (!smp_processor_id() && intr_remapping_enabled)
+		enable_drhd_fault_handling();
+
 }
 
 #ifdef CONFIG_X86_X2APIC
@@ -1750,17 +1752,10 @@
 		setup_IO_APIC();
 	else {
 		nr_ioapics = 0;
-		localise_nmi_watchdog();
 	}
-#else
-	localise_nmi_watchdog();
 #endif
 
 	x86_init.timers.setup_percpu_clockev();
-#ifdef CONFIG_X86_64
-	check_nmi_watchdog();
-#endif
-
 	return 0;
 }
 
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 62f6e1e..c57d0b5 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,13 +17,14 @@
 #include <linux/nmi.h>
 #include <linux/module.h>
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(void)
 {
 	return (u64)(cpu_khz) * 1000 * 60;
 }
+#endif
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
-
+#ifdef arch_trigger_all_cpu_backtrace
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 
@@ -91,18 +92,3 @@
 }
 early_initcall(register_trigger_all_cpu_backtrace);
 #endif
-
-/* STUB calls to mimic old nmi_watchdog behaviour */
-#if defined(CONFIG_X86_LOCAL_APIC)
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
-#endif
-atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-int unknown_nmi_panic;
-void cpu_nmi_set_wd_enabled(void) { return; }
-void stop_apic_nmi_watchdog(void *unused) { return; }
-void setup_apic_nmi_watchdog(void *unused) { return; }
-int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7cc0a72..16c2db8 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
@@ -2430,13 +2429,12 @@
 {
 	struct irq_cfg *cfg = data->chip_data;
 	int i, do_unmask_irq = 0, irq = data->irq;
-	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long v;
 
 	irq_complete_move(cfg);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
-	if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
 		mask_ioapic(cfg);
 	}
@@ -2643,24 +2641,6 @@
 				      "edge");
 }
 
-static void __init setup_nmi(void)
-{
-	/*
-	 * Dirty trick to enable the NMI watchdog ...
-	 * We put the 8259A master into AEOI mode and
-	 * unmask on all local APICs LVT0 as NMI.
-	 *
-	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-	 * is from Maciej W. Rozycki - so we do not have to EOI from
-	 * the NMI handler or the timer interrupt.
-	 */
-	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-	enable_NMI_through_LVT0();
-
-	apic_printk(APIC_VERBOSE, " done.\n");
-}
-
 /*
  * This looks a bit hackish but it's about the only one way of sending
  * a few INTA cycles to 8259As and any associated glue logic.  ICR does
@@ -2766,15 +2746,6 @@
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	legacy_pic->init(1);
-#ifdef CONFIG_X86_32
-	{
-		unsigned int ver;
-
-		ver = apic_read(APIC_LVR);
-		ver = GET_APIC_VERSION(ver);
-		timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-	}
-#endif
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2822,10 +2793,6 @@
 				unmask_ioapic(cfg);
 		}
 		if (timer_irq_works()) {
-			if (nmi_watchdog == NMI_IO_APIC) {
-				setup_nmi();
-				legacy_pic->unmask(0);
-			}
 			if (disable_timer_pin_1 > 0)
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
@@ -2851,11 +2818,6 @@
 		if (timer_irq_works()) {
 			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
 			timer_through_8259 = 1;
-			if (nmi_watchdog == NMI_IO_APIC) {
-				legacy_pic->mask(0);
-				setup_nmi();
-				legacy_pic->unmask(0);
-			}
 			goto out;
 		}
 		/*
@@ -2867,15 +2829,6 @@
 		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
 	}
 
-	if (nmi_watchdog == NMI_IO_APIC) {
-		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-			    "through the IO-APIC - disabling NMI Watchdog!\n");
-		nmi_watchdog = NMI_NONE;
-	}
-#ifdef CONFIG_X86_32
-	timer_ack = 0;
-#endif
-
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
@@ -3413,6 +3366,7 @@
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
 
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041c..0000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- *  NMI watchdog support on APIC systems
- *
- *  Started by Ingo Molnar <mingo@redhat.com>
- *
- *  Fixes:
- *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
- *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
- *  Mikael Pettersson	: Pentium 4 support for local APIC NMI watchdog.
- *  Pavel Machek and
- *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
- */
-
-#include <asm/apic.h>
-
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-#include <linux/percpu.h>
-#include <linux/kprobes.h>
-#include <linux/cpumask.h>
-#include <linux/kernel_stat.h>
-#include <linux/kdebug.h>
-#include <linux/smp.h>
-
-#include <asm/i8259.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/timer.h>
-
-#include <asm/mce.h>
-
-#include <asm/mach_traps.h>
-
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
-/* nmi_active:
- * >0: the lapic NMI watchdog is active, but can be disabled
- * <0: the lapic NMI watchdog has not been set up, and cannot
- *     be enabled
- *  0: the lapic NMI watchdog is disabled, but can be enabled
- */
-atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-
-static int panic_on_timeout;
-
-static unsigned int nmi_hz = HZ;
-static DEFINE_PER_CPU(short, wd_enabled);
-static int endflag __initdata;
-
-static inline unsigned int get_nmi_count(int cpu)
-{
-	return per_cpu(irq_stat, cpu).__nmi_count;
-}
-
-static inline int mce_in_progress(void)
-{
-#if defined(CONFIG_X86_MCE)
-	return atomic_read(&mce_entry) > 0;
-#endif
-	return 0;
-}
-
-/*
- * Take the local apic timer and PIT/HPET into account. We don't
- * know which one is active, when we have highres/dyntick on
- */
-static inline unsigned int get_timer_irqs(int cpu)
-{
-	return per_cpu(irq_stat, cpu).apic_timer_irqs +
-		per_cpu(irq_stat, cpu).irq0_irqs;
-}
-
-#ifdef CONFIG_SMP
-/*
- * The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
-	local_irq_enable_in_hardirq();
-	/*
-	 * Intentionally don't use cpu_relax here. This is
-	 * to make sure that the performance counter really ticks,
-	 * even if there is a simulator or similar that catches the
-	 * pause instruction. On a real HT machine this is fine because
-	 * all other CPUs are busy with "useless" delay loops and don't
-	 * care if they get somewhat less cycles.
-	 */
-	while (endflag == 0)
-		mb();
-}
-#endif
-
-static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
-{
-	printk(KERN_CONT "\n");
-
-	printk(KERN_WARNING
-		"WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
-			cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
-
-	printk(KERN_WARNING
-		"Please report this to bugzilla.kernel.org,\n");
-	printk(KERN_WARNING
-		"and attach the output of the 'dmesg' command.\n");
-
-	per_cpu(wd_enabled, cpu) = 0;
-	atomic_dec(&nmi_active);
-}
-
-static void __acpi_nmi_disable(void *__unused)
-{
-	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-int __init check_nmi_watchdog(void)
-{
-	unsigned int *prev_nmi_count;
-	int cpu;
-
-	if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
-		return 0;
-
-	prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
-	if (!prev_nmi_count)
-		goto error;
-
-	printk(KERN_INFO "Testing NMI watchdog ... ");
-
-#ifdef CONFIG_SMP
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
-#endif
-
-	for_each_possible_cpu(cpu)
-		prev_nmi_count[cpu] = get_nmi_count(cpu);
-	local_irq_enable();
-	mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
-
-	for_each_online_cpu(cpu) {
-		if (!per_cpu(wd_enabled, cpu))
-			continue;
-		if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
-			report_broken_nmi(cpu, prev_nmi_count);
-	}
-	endflag = 1;
-	if (!atomic_read(&nmi_active)) {
-		kfree(prev_nmi_count);
-		atomic_set(&nmi_active, -1);
-		goto error;
-	}
-	printk("OK.\n");
-
-	/*
-	 * now that we know it works we can reduce NMI frequency to
-	 * something more reasonable; makes a difference in some configs
-	 */
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		nmi_hz = lapic_adjust_nmi_hz(1);
-
-	kfree(prev_nmi_count);
-	return 0;
-error:
-	if (nmi_watchdog == NMI_IO_APIC) {
-		if (!timer_through_8259)
-			legacy_pic->mask(0);
-		on_each_cpu(__acpi_nmi_disable, NULL, 1);
-	}
-
-#ifdef CONFIG_X86_32
-	timer_ack = 0;
-#endif
-	return -1;
-}
-
-static int __init setup_nmi_watchdog(char *str)
-{
-	unsigned int nmi;
-
-	if (!strncmp(str, "panic", 5)) {
-		panic_on_timeout = 1;
-		str = strchr(str, ',');
-		if (!str)
-			return 1;
-		++str;
-	}
-
-	if (!strncmp(str, "lapic", 5))
-		nmi_watchdog = NMI_LOCAL_APIC;
-	else if (!strncmp(str, "ioapic", 6))
-		nmi_watchdog = NMI_IO_APIC;
-	else {
-		get_option(&str, &nmi);
-		if (nmi >= NMI_INVALID)
-			return 0;
-		nmi_watchdog = nmi;
-	}
-
-	return 1;
-}
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-/*
- * Suspend/resume support
- */
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
-	/* only CPU0 goes here, other CPUs should be offline */
-	nmi_pm_active = atomic_read(&nmi_active);
-	stop_apic_nmi_watchdog(NULL);
-	BUG_ON(atomic_read(&nmi_active) != 0);
-	return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
-	/* only CPU0 goes here, other CPUs should be offline */
-	if (nmi_pm_active > 0) {
-		setup_apic_nmi_watchdog(NULL);
-		touch_nmi_watchdog();
-	}
-	return 0;
-}
-
-static struct sysdev_class nmi_sysclass = {
-	.name		= "lapic_nmi",
-	.resume		= lapic_nmi_resume,
-	.suspend	= lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
-	.id	= 0,
-	.cls	= &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
-	int error;
-
-	/*
-	 * should really be a BUG_ON but b/c this is an
-	 * init call, it just doesn't work.  -dcz
-	 */
-	if (nmi_watchdog != NMI_LOCAL_APIC)
-		return 0;
-
-	if (atomic_read(&nmi_active) < 0)
-		return 0;
-
-	error = sysdev_class_register(&nmi_sysclass);
-	if (!error)
-		error = sysdev_register(&device_lapic_nmi);
-	return error;
-}
-
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif	/* CONFIG_PM */
-
-static void __acpi_nmi_enable(void *__unused)
-{
-	apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_enable, NULL, 1);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_disable, NULL, 1);
-}
-
-/*
- * This function is called as soon the LAPIC NMI watchdog driver has everything
- * in place and it's ready to check if the NMIs belong to the NMI watchdog
- */
-void cpu_nmi_set_wd_enabled(void)
-{
-	__get_cpu_var(wd_enabled) = 1;
-}
-
-void setup_apic_nmi_watchdog(void *unused)
-{
-	if (__get_cpu_var(wd_enabled))
-		return;
-
-	/* cheap hack to support suspend/resume */
-	/* if cpu0 is not active neither should the other cpus */
-	if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
-		return;
-
-	switch (nmi_watchdog) {
-	case NMI_LOCAL_APIC:
-		if (lapic_watchdog_init(nmi_hz) < 0) {
-			__get_cpu_var(wd_enabled) = 0;
-			return;
-		}
-		/* FALL THROUGH */
-	case NMI_IO_APIC:
-		__get_cpu_var(wd_enabled) = 1;
-		atomic_inc(&nmi_active);
-	}
-}
-
-void stop_apic_nmi_watchdog(void *unused)
-{
-	/* only support LOCAL and IO APICs for now */
-	if (!nmi_watchdog_active())
-		return;
-	if (__get_cpu_var(wd_enabled) == 0)
-		return;
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		lapic_watchdog_stop();
-	else
-		__acpi_nmi_disable(NULL);
-	__get_cpu_var(wd_enabled) = 0;
-	atomic_dec(&nmi_active);
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up here too!]
- */
-
-static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(long, alert_counter);
-static DEFINE_PER_CPU(int, nmi_touch);
-
-void touch_nmi_watchdog(void)
-{
-	if (nmi_watchdog_active()) {
-		unsigned cpu;
-
-		/*
-		 * Tell other CPUs to reset their alert counters. We cannot
-		 * do it ourselves because the alert count increase is not
-		 * atomic.
-		 */
-		for_each_present_cpu(cpu) {
-			if (per_cpu(nmi_touch, cpu) != 1)
-				per_cpu(nmi_touch, cpu) = 1;
-		}
-	}
-
-	/*
-	 * Tickle the softlockup detector too:
-	 */
-	touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
-{
-	/*
-	 * Since current_thread_info()-> is always on the stack, and we
-	 * always switch the stack NMI-atomically, it's safe to use
-	 * smp_processor_id().
-	 */
-	unsigned int sum;
-	int touched = 0;
-	int cpu = smp_processor_id();
-	int rc = 0;
-
-	sum = get_timer_irqs(cpu);
-
-	if (__get_cpu_var(nmi_touch)) {
-		__get_cpu_var(nmi_touch) = 0;
-		touched = 1;
-	}
-
-	/* We can be called before check_nmi_watchdog, hence NULL check. */
-	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-		static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
-
-		raw_spin_lock(&lock);
-		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
-		show_regs(regs);
-		dump_stack();
-		raw_spin_unlock(&lock);
-		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
-
-		rc = 1;
-	}
-
-	/* Could check oops_in_progress here too, but it's safer not to */
-	if (mce_in_progress())
-		touched = 1;
-
-	/* if the none of the timers isn't firing, this cpu isn't doing much */
-	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
-		/*
-		 * Ayiee, looks like this CPU is stuck ...
-		 * wait a few IRQs (5 seconds) before doing the oops ...
-		 */
-		__this_cpu_inc(alert_counter);
-		if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
-			/*
-			 * die_nmi will return ONLY if NOTIFY_STOP happens..
-			 */
-			die_nmi("BUG: NMI Watchdog detected LOCKUP",
-				regs, panic_on_timeout);
-	} else {
-		__get_cpu_var(last_irq_sum) = sum;
-		__this_cpu_write(alert_counter, 0);
-	}
-
-	/* see if the nmi watchdog went off */
-	if (!__get_cpu_var(wd_enabled))
-		return rc;
-	switch (nmi_watchdog) {
-	case NMI_LOCAL_APIC:
-		rc |= lapic_wd_event(nmi_hz);
-		break;
-	case NMI_IO_APIC:
-		/*
-		 * don't know how to accurately check for this.
-		 * just assume it was a watchdog timer interrupt
-		 * This matches the old behaviour.
-		 */
-		rc = 1;
-		break;
-	}
-	return rc;
-}
-
-#ifdef CONFIG_SYSCTL
-
-static void enable_ioapic_nmi_watchdog_single(void *unused)
-{
-	__get_cpu_var(wd_enabled) = 1;
-	atomic_inc(&nmi_active);
-	__acpi_nmi_enable(NULL);
-}
-
-static void enable_ioapic_nmi_watchdog(void)
-{
-	on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
-	touch_nmi_watchdog();
-}
-
-static void disable_ioapic_nmi_watchdog(void)
-{
-	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-}
-
-static int __init setup_unknown_nmi_panic(char *str)
-{
-	unknown_nmi_panic = 1;
-	return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
-	unsigned char reason = get_nmi_reason();
-	char buf[64];
-
-	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-	die_nmi(buf, regs, 1); /* Always panic here */
-	return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/nmi
- */
-int proc_nmi_enabled(struct ctl_table *table, int write,
-			void __user *buffer, size_t *length, loff_t *ppos)
-{
-	int old_state;
-
-	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
-	old_state = nmi_watchdog_enabled;
-	proc_dointvec(table, write, buffer, length, ppos);
-	if (!!old_state == !!nmi_watchdog_enabled)
-		return 0;
-
-	if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
-		printk(KERN_WARNING
-			"NMI watchdog is permanently disabled\n");
-		return -EIO;
-	}
-
-	if (nmi_watchdog == NMI_LOCAL_APIC) {
-		if (nmi_watchdog_enabled)
-			enable_lapic_nmi_watchdog();
-		else
-			disable_lapic_nmi_watchdog();
-	} else if (nmi_watchdog == NMI_IO_APIC) {
-		if (nmi_watchdog_enabled)
-			enable_ioapic_nmi_watchdog();
-		else
-			disable_ioapic_nmi_watchdog();
-	} else {
-		printk(KERN_WARNING
-			"NMI watchdog doesn't know what hardware to touch\n");
-		return -EIO;
-	}
-	return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
-	if (unknown_nmi_panic)
-		return unknown_nmi_panic_callback(regs, cpu);
-#endif
-	return 0;
-}
-
-void arch_trigger_all_cpu_backtrace(void)
-{
-	int i;
-
-	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-
-	printk(KERN_INFO "sending NMI to all CPUs:\n");
-	apic->send_IPI_all(NMI_VECTOR);
-
-	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
-	for (i = 0; i < 10 * 1000; i++) {
-		if (cpumask_empty(to_cpumask(backtrace_mask)))
-			break;
-		mdelay(1);
-	}
-}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index f9e4e6a..d8c4a6f 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -79,13 +79,6 @@
 		/* need to update phys_pkg_id */
 		apic->phys_pkg_id = apicid_phys_pkg_id;
 	}
-
-	/*
-	 * Now that apic routing model is selected, configure the
-	 * fault handling for intr remapping.
-	 */
-	if (intr_remapping_enabled)
-		enable_drhd_fault_handling();
 }
 
 /* Same for both flat and physical. */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda..1d59834 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@
 #else
 	vgetcpu_set_mode();
 #endif
-	init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6d75b91..0a360d1 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@
 {
 	int i;
 
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		disable_lapic_nmi_watchdog();
-
 	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
 			goto perfctr_fail;
@@ -355,9 +352,6 @@
 	for (i--; i >= 0; i--)
 		release_perfctr_nmi(x86_pmu.perfctr + i);
 
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
-
 	return false;
 }
 
@@ -369,9 +363,6 @@
 		release_perfctr_nmi(x86_pmu.perfctr + i);
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 	}
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
 }
 
 #else
@@ -384,15 +375,53 @@
 static bool check_hw_exists(void)
 {
 	u64 val, val_new = 0;
-	int ret = 0;
+	int i, reg, ret = 0;
 
+	/*
+	 * Check to see if the BIOS enabled any of the counters, if so
+	 * complain and bail.
+	 */
+	for (i = 0; i < x86_pmu.num_counters; i++) {
+		reg = x86_pmu.eventsel + i;
+		ret = rdmsrl_safe(reg, &val);
+		if (ret)
+			goto msr_fail;
+		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
+			goto bios_fail;
+	}
+
+	if (x86_pmu.num_counters_fixed) {
+		reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		ret = rdmsrl_safe(reg, &val);
+		if (ret)
+			goto msr_fail;
+		for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
+			if (val & (0x03 << i*4))
+				goto bios_fail;
+		}
+	}
+
+	/*
+	 * Now write a value and read it back to see if it matches,
+	 * this is needed to detect certain hardware emulators (qemu/kvm)
+	 * that don't trap on the MSR access and always return 0s.
+	 */
 	val = 0xabcdUL;
-	ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+	ret = checking_wrmsrl(x86_pmu.perfctr, val);
 	ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
 	if (ret || val != val_new)
-		return false;
+		goto msr_fail;
 
 	return true;
+
+bios_fail:
+	printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
+	printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
+	return false;
+
+msr_fail:
+	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+	return false;
 }
 
 static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@
 	struct hw_perf_event *hwc = &event->hw;
 	u64 config;
 
-	if (!hwc->sample_period) {
+	if (!is_sampling_event(event)) {
 		hwc->sample_period = x86_pmu.max_period;
 		hwc->last_period = hwc->sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
@@ -1362,7 +1391,7 @@
 	pr_info("no hardware sampling interrupt available.\n");
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
 	struct event_constraint *c;
 	int err;
@@ -1377,20 +1406,18 @@
 		err = amd_pmu_init();
 		break;
 	default:
-		return;
+		return 0;
 	}
 	if (err != 0) {
 		pr_cont("no PMU driver, software events only.\n");
-		return;
+		return 0;
 	}
 
 	pmu_check_apic();
 
 	/* sanity check that the hardware exists or is emulated */
-	if (!check_hw_exists()) {
-		pr_cont("Broken PMU hardware detected, software events only.\n");
-		return;
-	}
+	if (!check_hw_exists())
+		return 0;
 
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
 
@@ -1438,9 +1465,12 @@
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
-	perf_pmu_register(&pmu);
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 	perf_cpu_notifier(x86_pmu_notifier);
+
+	return 0;
 }
+early_initcall(init_hw_perf_events);
 
 static inline void x86_pmu_read(struct perf_event *event)
 {
@@ -1686,7 +1716,7 @@
 
 	perf_callchain_store(entry, regs->ip);
 
-	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
+	dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index e421b8c..67e2202 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,7 +1,5 @@
 #ifdef CONFIG_CPU_SUP_AMD
 
-static DEFINE_RAW_SPINLOCK(amd_nb_lock);
-
 static __initconst const u64 amd_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@
 	return &emptyconstraint;
 }
 
-static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+static struct amd_nb *amd_alloc_nb(int cpu)
 {
 	struct amd_nb *nb;
 	int i;
@@ -285,7 +283,7 @@
 	if (!nb)
 		return NULL;
 
-	nb->nb_id = nb_id;
+	nb->nb_id = -1;
 
 	/*
 	 * initialize all possible NB constraints
@@ -306,7 +304,7 @@
 	if (boot_cpu_data.x86_max_cores < 2)
 		return NOTIFY_OK;
 
-	cpuc->amd_nb = amd_alloc_nb(cpu, -1);
+	cpuc->amd_nb = amd_alloc_nb(cpu);
 	if (!cpuc->amd_nb)
 		return NOTIFY_BAD;
 
@@ -325,8 +323,6 @@
 	nb_id = amd_get_nb_id(cpu);
 	WARN_ON_ONCE(nb_id == BAD_APICID);
 
-	raw_spin_lock(&amd_nb_lock);
-
 	for_each_online_cpu(i) {
 		nb = per_cpu(cpu_hw_events, i).amd_nb;
 		if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@
 
 	cpuc->amd_nb->nb_id = nb_id;
 	cpuc->amd_nb->refcnt++;
-
-	raw_spin_unlock(&amd_nb_lock);
 }
 
 static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@
 
 	cpuhw = &per_cpu(cpu_hw_events, cpu);
 
-	raw_spin_lock(&amd_nb_lock);
-
 	if (cpuhw->amd_nb) {
 		struct amd_nb *nb = cpuhw->amd_nb;
 
@@ -364,8 +356,6 @@
 
 		cpuhw->amd_nb = NULL;
 	}
-
-	raw_spin_unlock(&amd_nb_lock);
 }
 
 static __initconst const struct x86_pmu amd_pmu = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c08..24e390e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -816,6 +816,32 @@
 	if (ret)
 		return ret;
 
+	if (event->attr.precise_ip &&
+	    (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+		/*
+		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+		 * (0x003c) so that we can use it with PEBS.
+		 *
+		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+		 * PEBS capable. However we can use INST_RETIRED.ANY_P
+		 * (0x00c0), which is a PEBS capable event, to get the same
+		 * count.
+		 *
+		 * INST_RETIRED.ANY_P counts the number of cycles that retires
+		 * CNTMASK instructions. By setting CNTMASK to a value (16)
+		 * larger than the maximum number of instructions that can be
+		 * retired per cycle (4) and then inverting the condition, we
+		 * count all cycles that retire 16 or less instructions, which
+		 * is every cycle.
+		 *
+		 * Thereby we gain a PEBS capable cycle counter.
+		 */
+		u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
+
+		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+		event->hw.config = alt_config;
+	}
+
 	if (event->attr.type != PERF_TYPE_RAW)
 		return 0;
 
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8..d5a2366 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -16,32 +16,12 @@
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/smp.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
 
-struct nmi_watchdog_ctlblk {
-	unsigned int cccr_msr;
-	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
-	unsigned int evntsel_msr;  /* the MSR to select the events to handle */
-};
-
-/* Interface defining a CPU specific perfctr watchdog */
-struct wd_ops {
-	int (*reserve)(void);
-	void (*unreserve)(void);
-	int (*setup)(unsigned nmi_hz);
-	void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
-	void (*stop)(void);
-	unsigned perfctr;
-	unsigned evntsel;
-	u64 checkbit;
-};
-
-static const struct wd_ops *wd_ops;
-
 /*
  * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@
 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
 
-static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
-
 /* converts an msr to an appropriate reservation bit */
 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 {
@@ -172,623 +150,3 @@
 	clear_bit(counter, evntsel_nmi_owner);
 }
 EXPORT_SYMBOL(release_evntsel_nmi);
-
-void disable_lapic_nmi_watchdog(void)
-{
-	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-	if (atomic_read(&nmi_active) <= 0)
-		return;
-
-	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-
-	if (wd_ops)
-		wd_ops->unreserve();
-
-	BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-void enable_lapic_nmi_watchdog(void)
-{
-	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-	/* are we already enabled */
-	if (atomic_read(&nmi_active) != 0)
-		return;
-
-	/* are we lapic aware */
-	if (!wd_ops)
-		return;
-	if (!wd_ops->reserve()) {
-		printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
-		return;
-	}
-
-	on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
-	touch_nmi_watchdog();
-}
-
-/*
- * Activate the NMI watchdog via the local APIC.
- */
-
-static unsigned int adjust_for_32bit_ctr(unsigned int hz)
-{
-	u64 counter_val;
-	unsigned int retval = hz;
-
-	/*
-	 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
-	 * are writable, with higher bits sign extending from bit 31.
-	 * So, we can only program the counter with 31 bit values and
-	 * 32nd bit should be 1, for 33.. to be 1.
-	 * Find the appropriate nmi_hz
-	 */
-	counter_val = (u64)cpu_khz * 1000;
-	do_div(counter_val, retval);
-	if (counter_val > 0x7fffffffULL) {
-		u64 count = (u64)cpu_khz * 1000;
-		do_div(count, 0x7fffffffUL);
-		retval = count + 1;
-	}
-	return retval;
-}
-
-static void write_watchdog_counter(unsigned int perfctr_msr,
-				const char *descr, unsigned nmi_hz)
-{
-	u64 count = (u64)cpu_khz * 1000;
-
-	do_div(count, nmi_hz);
-	if (descr)
-		pr_debug("setting %s to -0x%08Lx\n", descr, count);
-	wrmsrl(perfctr_msr, 0 - count);
-}
-
-static void write_watchdog_counter32(unsigned int perfctr_msr,
-				const char *descr, unsigned nmi_hz)
-{
-	u64 count = (u64)cpu_khz * 1000;
-
-	do_div(count, nmi_hz);
-	if (descr)
-		pr_debug("setting %s to -0x%08Lx\n", descr, count);
-	wrmsr(perfctr_msr, (u32)(-count), 0);
-}
-
-/*
- * AMD K7/K8/Family10h/Family11h support.
- * AMD keeps this interface nicely stable so there is not much variety
- */
-#define K7_EVNTSEL_ENABLE	(1 << 22)
-#define K7_EVNTSEL_INT		(1 << 20)
-#define K7_EVNTSEL_OS		(1 << 17)
-#define K7_EVNTSEL_USR		(1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
-#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-static int setup_k7_watchdog(unsigned nmi_hz)
-{
-	unsigned int perfctr_msr, evntsel_msr;
-	unsigned int evntsel;
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	perfctr_msr = wd_ops->perfctr;
-	evntsel_msr = wd_ops->evntsel;
-
-	wrmsrl(perfctr_msr, 0UL);
-
-	evntsel = K7_EVNTSEL_INT
-		| K7_EVNTSEL_OS
-		| K7_EVNTSEL_USR
-		| K7_NMI_EVENT;
-
-	/* setup the timer */
-	wrmsr(evntsel_msr, evntsel, 0);
-	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
-
-	/* initialize the wd struct before enabling */
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
-
-	/* ok, everything is initialized, announce that we're set */
-	cpu_nmi_set_wd_enabled();
-
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= K7_EVNTSEL_ENABLE;
-	wrmsr(evntsel_msr, evntsel, 0);
-
-	return 1;
-}
-
-static void single_msr_stop_watchdog(void)
-{
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int single_msr_reserve(void)
-{
-	if (!reserve_perfctr_nmi(wd_ops->perfctr))
-		return 0;
-
-	if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
-		release_perfctr_nmi(wd_ops->perfctr);
-		return 0;
-	}
-	return 1;
-}
-
-static void single_msr_unreserve(void)
-{
-	release_evntsel_nmi(wd_ops->evntsel);
-	release_perfctr_nmi(wd_ops->perfctr);
-}
-
-static void __kprobes
-single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-	/* start the cycle over again */
-	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops k7_wd_ops = {
-	.reserve	= single_msr_reserve,
-	.unreserve	= single_msr_unreserve,
-	.setup		= setup_k7_watchdog,
-	.rearm		= single_msr_rearm,
-	.stop		= single_msr_stop_watchdog,
-	.perfctr	= MSR_K7_PERFCTR0,
-	.evntsel	= MSR_K7_EVNTSEL0,
-	.checkbit	= 1ULL << 47,
-};
-
-/*
- * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
- */
-#define P6_EVNTSEL0_ENABLE	(1 << 22)
-#define P6_EVNTSEL_INT		(1 << 20)
-#define P6_EVNTSEL_OS		(1 << 17)
-#define P6_EVNTSEL_USR		(1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
-#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
-
-static int setup_p6_watchdog(unsigned nmi_hz)
-{
-	unsigned int perfctr_msr, evntsel_msr;
-	unsigned int evntsel;
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	perfctr_msr = wd_ops->perfctr;
-	evntsel_msr = wd_ops->evntsel;
-
-	/* KVM doesn't implement this MSR */
-	if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
-		return 0;
-
-	evntsel = P6_EVNTSEL_INT
-		| P6_EVNTSEL_OS
-		| P6_EVNTSEL_USR
-		| P6_NMI_EVENT;
-
-	/* setup the timer */
-	wrmsr(evntsel_msr, evntsel, 0);
-	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
-
-	/* initialize the wd struct before enabling */
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
-
-	/* ok, everything is initialized, announce that we're set */
-	cpu_nmi_set_wd_enabled();
-
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= P6_EVNTSEL0_ENABLE;
-	wrmsr(evntsel_msr, evntsel, 0);
-
-	return 1;
-}
-
-static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-	/*
-	 * P6 based Pentium M need to re-unmask
-	 * the apic vector but it doesn't hurt
-	 * other P6 variant.
-	 * ArchPerfom/Core Duo also needs this
-	 */
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-
-	/* P6/ARCH_PERFMON has 32 bit counter write */
-	write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p6_wd_ops = {
-	.reserve	= single_msr_reserve,
-	.unreserve	= single_msr_unreserve,
-	.setup		= setup_p6_watchdog,
-	.rearm		= p6_rearm,
-	.stop		= single_msr_stop_watchdog,
-	.perfctr	= MSR_P6_PERFCTR0,
-	.evntsel	= MSR_P6_EVNTSEL0,
-	.checkbit	= 1ULL << 39,
-};
-
-/*
- * Intel P4 performance counters.
- * By far the most complicated of all.
- */
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1 << 7)
-#define P4_ESCR_EVENT_SELECT(N)	((N) << 25)
-#define P4_ESCR_OS		(1 << 3)
-#define P4_ESCR_USR		(1 << 2)
-#define P4_CCCR_OVF_PMI0	(1 << 26)
-#define P4_CCCR_OVF_PMI1	(1 << 27)
-#define P4_CCCR_THRESHOLD(N)	((N) << 20)
-#define P4_CCCR_COMPLEMENT	(1 << 19)
-#define P4_CCCR_COMPARE		(1 << 18)
-#define P4_CCCR_REQUIRED	(3 << 16)
-#define P4_CCCR_ESCR_SELECT(N)	((N) << 13)
-#define P4_CCCR_ENABLE		(1 << 12)
-#define P4_CCCR_OVF 		(1 << 31)
-
-#define P4_CONTROLS 18
-static unsigned int p4_controls[18] = {
-	MSR_P4_BPU_CCCR0,
-	MSR_P4_BPU_CCCR1,
-	MSR_P4_BPU_CCCR2,
-	MSR_P4_BPU_CCCR3,
-	MSR_P4_MS_CCCR0,
-	MSR_P4_MS_CCCR1,
-	MSR_P4_MS_CCCR2,
-	MSR_P4_MS_CCCR3,
-	MSR_P4_FLAME_CCCR0,
-	MSR_P4_FLAME_CCCR1,
-	MSR_P4_FLAME_CCCR2,
-	MSR_P4_FLAME_CCCR3,
-	MSR_P4_IQ_CCCR0,
-	MSR_P4_IQ_CCCR1,
-	MSR_P4_IQ_CCCR2,
-	MSR_P4_IQ_CCCR3,
-	MSR_P4_IQ_CCCR4,
-	MSR_P4_IQ_CCCR5,
-};
-/*
- * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- * CRU_ESCR0 (with any non-null event selector) through a complemented
- * max threshold. [IA32-Vol3, Section 14.9.9]
- */
-static int setup_p4_watchdog(unsigned nmi_hz)
-{
-	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
-	unsigned int evntsel, cccr_val;
-	unsigned int misc_enable, dummy;
-	unsigned int ht_num;
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
-	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
-		return 0;
-
-#ifdef CONFIG_SMP
-	/* detect which hyperthread we are on */
-	if (smp_num_siblings == 2) {
-		unsigned int ebx, apicid;
-
-		ebx = cpuid_ebx(1);
-		apicid = (ebx >> 24) & 0xff;
-		ht_num = apicid & 1;
-	} else
-#endif
-		ht_num = 0;
-
-	/*
-	 * performance counters are shared resources
-	 * assign each hyperthread its own set
-	 * (re-use the ESCR0 register, seems safe
-	 * and keeps the cccr_val the same)
-	 */
-	if (!ht_num) {
-		/* logical cpu 0 */
-		perfctr_msr = MSR_P4_IQ_PERFCTR0;
-		evntsel_msr = MSR_P4_CRU_ESCR0;
-		cccr_msr = MSR_P4_IQ_CCCR0;
-		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
-
-		/*
-		 * If we're on the kdump kernel or other situation, we may
-		 * still have other performance counter registers set to
-		 * interrupt and they'll keep interrupting forever because
-		 * of the P4_CCCR_OVF quirk. So we need to ACK all the
-		 * pending interrupts and disable all the registers here,
-		 * before reenabling the NMI delivery. Refer to p4_rearm()
-		 * about the P4_CCCR_OVF quirk.
-		 */
-		if (reset_devices) {
-			unsigned int low, high;
-			int i;
-
-			for (i = 0; i < P4_CONTROLS; i++) {
-				rdmsr(p4_controls[i], low, high);
-				low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
-				wrmsr(p4_controls[i], low, high);
-			}
-		}
-	} else {
-		/* logical cpu 1 */
-		perfctr_msr = MSR_P4_IQ_PERFCTR1;
-		evntsel_msr = MSR_P4_CRU_ESCR0;
-		cccr_msr = MSR_P4_IQ_CCCR1;
-
-		/* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
-		if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
-			cccr_val = P4_CCCR_OVF_PMI0;
-		else
-			cccr_val = P4_CCCR_OVF_PMI1;
-		cccr_val |= P4_CCCR_ESCR_SELECT(4);
-	}
-
-	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
-		| P4_ESCR_OS
-		| P4_ESCR_USR;
-
-	cccr_val |= P4_CCCR_THRESHOLD(15)
-		 | P4_CCCR_COMPLEMENT
-		 | P4_CCCR_COMPARE
-		 | P4_CCCR_REQUIRED;
-
-	wrmsr(evntsel_msr, evntsel, 0);
-	wrmsr(cccr_msr, cccr_val, 0);
-	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = cccr_msr;
-
-	/* ok, everything is initialized, announce that we're set */
-	cpu_nmi_set_wd_enabled();
-
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	cccr_val |= P4_CCCR_ENABLE;
-	wrmsr(cccr_msr, cccr_val, 0);
-	return 1;
-}
-
-static void stop_p4_watchdog(void)
-{
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-	wrmsr(wd->cccr_msr, 0, 0);
-	wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int p4_reserve(void)
-{
-	if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
-		return 0;
-#ifdef CONFIG_SMP
-	if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
-		goto fail1;
-#endif
-	if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
-		goto fail2;
-	/* RED-PEN why is ESCR1 not reserved here? */
-	return 1;
- fail2:
-#ifdef CONFIG_SMP
-	if (smp_num_siblings > 1)
-		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
- fail1:
-#endif
-	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-	return 0;
-}
-
-static void p4_unreserve(void)
-{
-#ifdef CONFIG_SMP
-	if (smp_num_siblings > 1)
-		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
-#endif
-	release_evntsel_nmi(MSR_P4_CRU_ESCR0);
-	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-}
-
-static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-	unsigned dummy;
-	/*
-	 * P4 quirks:
-	 * - An overflown perfctr will assert its interrupt
-	 *   until the OVF flag in its CCCR is cleared.
-	 * - LVTPC is masked on interrupt and must be
-	 *   unmasked by the LVTPC handler.
-	 */
-	rdmsrl(wd->cccr_msr, dummy);
-	dummy &= ~P4_CCCR_OVF;
-	wrmsrl(wd->cccr_msr, dummy);
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	/* start the cycle over again */
-	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p4_wd_ops = {
-	.reserve	= p4_reserve,
-	.unreserve	= p4_unreserve,
-	.setup		= setup_p4_watchdog,
-	.rearm		= p4_rearm,
-	.stop		= stop_p4_watchdog,
-	/* RED-PEN this is wrong for the other sibling */
-	.perfctr	= MSR_P4_BPU_PERFCTR0,
-	.evntsel	= MSR_P4_BSU_ESCR0,
-	.checkbit	= 1ULL << 39,
-};
-
-/*
- * Watchdog using the Intel architected PerfMon.
- * Used for Core2 and hopefully all future Intel CPUs.
- */
-#define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
-static struct wd_ops intel_arch_wd_ops;
-
-static int setup_intel_arch_watchdog(unsigned nmi_hz)
-{
-	unsigned int ebx;
-	union cpuid10_eax eax;
-	unsigned int unused;
-	unsigned int perfctr_msr, evntsel_msr;
-	unsigned int evntsel;
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	/*
-	 * Check whether the Architectural PerfMon supports
-	 * Unhalted Core Cycles Event or not.
-	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
-	 */
-	cpuid(10, &(eax.full), &ebx, &unused, &unused);
-	if ((eax.split.mask_length <
-			(ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
-	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
-		return 0;
-
-	perfctr_msr = wd_ops->perfctr;
-	evntsel_msr = wd_ops->evntsel;
-
-	wrmsrl(perfctr_msr, 0UL);
-
-	evntsel = ARCH_PERFMON_EVENTSEL_INT
-		| ARCH_PERFMON_EVENTSEL_OS
-		| ARCH_PERFMON_EVENTSEL_USR
-		| ARCH_PERFMON_NMI_EVENT_SEL
-		| ARCH_PERFMON_NMI_EVENT_UMASK;
-
-	/* setup the timer */
-	wrmsr(evntsel_msr, evntsel, 0);
-	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
-
-	/* ok, everything is initialized, announce that we're set */
-	cpu_nmi_set_wd_enabled();
-
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
-	wrmsr(evntsel_msr, evntsel, 0);
-	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
-	return 1;
-}
-
-static struct wd_ops intel_arch_wd_ops __read_mostly = {
-	.reserve	= single_msr_reserve,
-	.unreserve	= single_msr_unreserve,
-	.setup		= setup_intel_arch_watchdog,
-	.rearm		= p6_rearm,
-	.stop		= single_msr_stop_watchdog,
-	.perfctr	= MSR_ARCH_PERFMON_PERFCTR1,
-	.evntsel	= MSR_ARCH_PERFMON_EVENTSEL1,
-};
-
-static void probe_nmi_watchdog(void)
-{
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		if (boot_cpu_data.x86 == 6 ||
-		    (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
-			wd_ops = &k7_wd_ops;
-		return;
-	case X86_VENDOR_INTEL:
-		/* Work around where perfctr1 doesn't have a working enable
-		 * bit as described in the following errata:
-		 * AE49 Core Duo and Intel Core Solo 65 nm
-		 * AN49 Intel Pentium Dual-Core
-		 * AF49 Dual-Core Intel Xeon Processor LV
-		 */
-		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
-		    ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
-		     boot_cpu_data.x86_mask == 4))) {
-			intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
-			intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
-		}
-		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-			wd_ops = &intel_arch_wd_ops;
-			break;
-		}
-		switch (boot_cpu_data.x86) {
-		case 6:
-			if (boot_cpu_data.x86_model > 13)
-				return;
-
-			wd_ops = &p6_wd_ops;
-			break;
-		case 15:
-			wd_ops = &p4_wd_ops;
-			break;
-		default:
-			return;
-		}
-		break;
-	}
-}
-
-/* Interface to nmi.c */
-
-int lapic_watchdog_init(unsigned nmi_hz)
-{
-	if (!wd_ops) {
-		probe_nmi_watchdog();
-		if (!wd_ops) {
-			printk(KERN_INFO "NMI watchdog: CPU not supported\n");
-			return -1;
-		}
-
-		if (!wd_ops->reserve()) {
-			printk(KERN_ERR
-				"NMI watchdog: cannot reserve perfctrs\n");
-			return -1;
-		}
-	}
-
-	if (!(wd_ops->setup(nmi_hz))) {
-		printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
-		       raw_smp_processor_id());
-		return -1;
-	}
-
-	return 0;
-}
-
-void lapic_watchdog_stop(void)
-{
-	if (wd_ops)
-		wd_ops->stop();
-}
-
-unsigned lapic_adjust_nmi_hz(unsigned hz)
-{
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-	if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
-	    wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
-		hz = adjust_for_32bit_ctr(hz);
-	return hz;
-}
-
-int __kprobes lapic_wd_event(unsigned nmi_hz)
-{
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-	u64 ctr;
-
-	rdmsrl(wd->perfctr_msr, ctr);
-	if (ctr & wd_ops->checkbit) /* perfctr still running? */
-		return 0;
-
-	wd_ops->rearm(wd, nmi_hz);
-	return 1;
-}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c..8474c99 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@
 
 void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp, char *log_lvl)
+		unsigned long *stack, char *log_lvl)
 {
 	printk("%sCall Trace:\n", log_lvl);
-	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+	dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp)
+		unsigned long *stack)
 {
-	show_trace_log_lvl(task, regs, stack, bp, "");
+	show_trace_log_lvl(task, regs, stack, "");
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
-	show_stack_log_lvl(task, NULL, sp, 0, "");
+	show_stack_log_lvl(task, NULL, sp, "");
 }
 
 /*
@@ -210,7 +210,7 @@
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	show_trace(NULL, NULL, &stack, bp);
+	show_trace(NULL, NULL, &stack);
 }
 EXPORT_SYMBOL(dump_stack);
 
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75..74cc1ed 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
 #include <asm/stacktrace.h>
 
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+		struct pt_regs *regs, unsigned long *stack,
 		const struct stacktrace_ops *ops, void *data)
 {
 	int graph = 0;
+	unsigned long bp;
 
 	if (!task)
 		task = current;
@@ -34,18 +35,7 @@
 			stack = (unsigned long *)task->thread.sp;
 	}
 
-#ifdef CONFIG_FRAME_POINTER
-	if (!bp) {
-		if (task == current) {
-			/* Grab bp right from our regs */
-			get_bp(bp);
-		} else {
-			/* bp is the last reg pushed by switch_to */
-			bp = *(unsigned long *) task->thread.sp;
-		}
-	}
-#endif
-
+	bp = stack_frame(task, regs);
 	for (;;) {
 		struct thread_info *context;
 
@@ -65,7 +55,7 @@
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long *sp, unsigned long bp, char *log_lvl)
+		   unsigned long *sp, char *log_lvl)
 {
 	unsigned long *stack;
 	int i;
@@ -87,7 +77,7 @@
 		touch_nmi_watchdog();
 	}
 	printk(KERN_CONT "\n");
-	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+	show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 
@@ -112,8 +102,7 @@
 		u8 *ip;
 
 		printk(KERN_EMERG "Stack:\n");
-		show_stack_log_lvl(NULL, regs, &regs->sp,
-				0, KERN_EMERG);
+		show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
 
 		printk(KERN_EMERG "Code: ");
 
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a34048..6410133 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+		struct pt_regs *regs, unsigned long *stack,
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@
 	unsigned used = 0;
 	struct thread_info *tinfo;
 	int graph = 0;
+	unsigned long bp;
 
 	if (!task)
 		task = current;
@@ -160,18 +161,7 @@
 			stack = (unsigned long *)task->thread.sp;
 	}
 
-#ifdef CONFIG_FRAME_POINTER
-	if (!bp) {
-		if (task == current) {
-			/* Grab bp right from our regs */
-			get_bp(bp);
-		} else {
-			/* bp is the last reg pushed by switch_to */
-			bp = *(unsigned long *) task->thread.sp;
-		}
-	}
-#endif
-
+	bp = stack_frame(task, regs);
 	/*
 	 * Print function call entries in all stacks, starting at the
 	 * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long *sp, unsigned long bp, char *log_lvl)
+		   unsigned long *sp, char *log_lvl)
 {
 	unsigned long *irq_stack_end;
 	unsigned long *irq_stack;
@@ -279,7 +269,7 @@
 	preempt_enable();
 
 	printk(KERN_CONT "\n");
-	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+	show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@
 
 		printk(KERN_EMERG "Stack:\n");
 		show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
-				regs->bp, KERN_EMERG);
+				   KERN_EMERG);
 
 		printk(KERN_EMERG "Code: ");
 
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index bcece91..c0dbd9a 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -60,16 +60,18 @@
 #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
 #endif
 
+/* Number of possible pages in the lowmem region */
+LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT)
+	
 /* Enough space to fit pagetables for the low memory linear map */
-MAPPING_BEYOND_END = \
-	PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
+MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
 
 /*
  * Worst-case size of the kernel mapping we need to make:
- * the worst-case size of the kernel itself, plus the extra we need
- * to map for the linear map.
+ * a relocatable kernel can live anywhere in lowmem, so we need to be able
+ * to map all of lowmem.
  */
-KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT
+KERNEL_PAGES = LOWMEM_PAGES
 
 INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
 RESERVE_BRK(pagetables, INIT_MAP_SIZE)
@@ -620,13 +622,13 @@
 __PAGE_ALIGNED_BSS
 	.align PAGE_SIZE_asm
 #ifdef CONFIG_X86_PAE
-initial_pg_pmd:
+ENTRY(initial_pg_pmd)
 	.fill 1024*KPMDS,4,0
 #else
 ENTRY(initial_page_table)
 	.fill 1024,4,0
 #endif
-initial_pg_fixmap:
+ENTRY(initial_pg_fixmap)
 	.fill 1024,4,0
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ae03cab..4ff5968 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -27,6 +27,9 @@
 #define HPET_DEV_FSB_CAP		0x1000
 #define HPET_DEV_PERI_CAP		0x2000
 
+#define HPET_MIN_CYCLES			128
+#define HPET_MIN_PROG_DELTA		(HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
+
 #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
 
 /*
@@ -299,8 +302,9 @@
 	/* Calculate the min / max delta */
 	hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
 							   &hpet_clockevent);
-	/* 5 usec minimum reprogramming delta. */
-	hpet_clockevent.min_delta_ns = 5000;
+	/* Setup minimum reprogramming delta. */
+	hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
+							   &hpet_clockevent);
 
 	/*
 	 * Start hpet with the boot cpu mask and make it
@@ -393,22 +397,24 @@
 	 * the wraparound into account) nor a simple count down event
 	 * mode. Further the write to the comparator register is
 	 * delayed internally up to two HPET clock cycles in certain
-	 * chipsets (ATI, ICH9,10). We worked around that by reading
-	 * back the compare register, but that required another
-	 * workaround for ICH9,10 chips where the first readout after
-	 * write can return the old stale value. We already have a
-	 * minimum delta of 5us enforced, but a NMI or SMI hitting
+	 * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even
+	 * longer delays. We worked around that by reading back the
+	 * compare register, but that required another workaround for
+	 * ICH9,10 chips where the first readout after write can
+	 * return the old stale value. We already had a minimum
+	 * programming delta of 5us enforced, but a NMI or SMI hitting
 	 * between the counter readout and the comparator write can
 	 * move us behind that point easily. Now instead of reading
 	 * the compare register back several times, we make the ETIME
 	 * decision based on the following: Return ETIME if the
-	 * counter value after the write is less than 8 HPET cycles
+	 * counter value after the write is less than HPET_MIN_CYCLES
 	 * away from the event or if the counter is already ahead of
-	 * the event.
+	 * the event. The minimum programming delta for the generic
+	 * clockevents code is set to 1.5 * HPET_MIN_CYCLES.
 	 */
 	res = (s32)(cnt - hpet_readl(HPET_COUNTER));
 
-	return res < 8 ? -ETIME : 0;
+	return res < HPET_MIN_CYCLES ? -ETIME : 0;
 }
 
 static void hpet_legacy_set_mode(enum clock_event_mode mode,
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c..5940282 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1184,6 +1184,10 @@
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
+	/* This is possible if op is under delayed unoptimizing */
+	if (kprobe_disabled(&op->kp))
+		return;
+
 	preempt_disable();
 	if (kprobe_running()) {
 		kprobes_inc_nmissed_count(&op->kp);
@@ -1401,10 +1405,16 @@
 	return 0;
 }
 
-/* Replace a breakpoint (int3) with a relative jump.  */
-int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+	u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+					    u8 *insn_buf,
+					    struct optimized_kprobe *op)
 {
-	unsigned char jmp_code[RELATIVEJUMP_SIZE];
 	s32 rel = (s32)((long)op->optinsn.insn -
 			((long)op->kp.addr + RELATIVEJUMP_SIZE));
 
@@ -1412,16 +1422,79 @@
 	memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
 	       RELATIVE_ADDR_SIZE);
 
-	jmp_code[0] = RELATIVEJUMP_OPCODE;
-	*(s32 *)(&jmp_code[1]) = rel;
+	insn_buf[0] = RELATIVEJUMP_OPCODE;
+	*(s32 *)(&insn_buf[1]) = rel;
+
+	tprm->addr = op->kp.addr;
+	tprm->opcode = insn_buf;
+	tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+	struct optimized_kprobe *op, *tmp;
+	int c = 0;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		WARN_ON(kprobe_disabled(&op->kp));
+		/* Setup param */
+		setup_optimize_kprobe(&jump_poke_params[c],
+				      jump_poke_bufs[c].buf, op);
+		list_del_init(&op->list);
+		if (++c >= MAX_OPTIMIZE_PROBES)
+			break;
+	}
 
 	/*
 	 * text_poke_smp doesn't support NMI/MCE code modifying.
 	 * However, since kprobes itself also doesn't support NMI/MCE
 	 * code probing, it's not a problem.
 	 */
-	text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
-	return 0;
+	text_poke_smp_batch(jump_poke_params, c);
+}
+
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+					      u8 *insn_buf,
+					      struct optimized_kprobe *op)
+{
+	/* Set int3 to first byte for kprobes */
+	insn_buf[0] = BREAKPOINT_INSTRUCTION;
+	memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+	tprm->addr = op->kp.addr;
+	tprm->opcode = insn_buf;
+	tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+				    struct list_head *done_list)
+{
+	struct optimized_kprobe *op, *tmp;
+	int c = 0;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		/* Setup param */
+		setup_unoptimize_kprobe(&jump_poke_params[c],
+					jump_poke_bufs[c].buf, op);
+		list_move(&op->list, done_list);
+		if (++c >= MAX_OPTIMIZE_PROBES)
+			break;
+	}
+
+	/*
+	 * text_poke_smp doesn't support NMI/MCE code modifying.
+	 * However, since kprobes itself also doesn't support NMI/MCE
+	 * code probing, it's not a problem.
+	 */
+	text_poke_smp_batch(jump_poke_params, c);
 }
 
 /* Replace a relative jump with a breakpoint (int3).  */
@@ -1453,11 +1526,35 @@
 	}
 	return 0;
 }
+
+static int __kprobes init_poke_params(void)
+{
+	/* Allocate code buffer and parameter array */
+	jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+				 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+	if (!jump_poke_bufs)
+		return -ENOMEM;
+
+	jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+				   MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+	if (!jump_poke_params) {
+		kfree(jump_poke_bufs);
+		jump_poke_bufs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+#else	/* !CONFIG_OPTPROBES */
+static int __kprobes init_poke_params(void)
+{
+	return 0;
+}
 #endif
 
 int __init arch_init_kprobes(void)
 {
-	return 0;
+	return init_poke_params();
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d1868..96ed1aa 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -91,8 +91,7 @@
 void show_regs(struct pt_regs *regs)
 {
 	show_registers(regs);
-	show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
-		   regs->bp);
+	show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
 }
 
 void show_regs_common(void)
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
new file mode 100644
index 0000000..2a26819
--- /dev/null
+++ b/arch/x86/kernel/resource.c
@@ -0,0 +1,48 @@
+#include <linux/ioport.h>
+#include <asm/e820.h>
+
+static void resource_clip(struct resource *res, resource_size_t start,
+			  resource_size_t end)
+{
+	resource_size_t low = 0, high = 0;
+
+	if (res->end < start || res->start > end)
+		return;		/* no conflict */
+
+	if (res->start < start)
+		low = start - res->start;
+
+	if (res->end > end)
+		high = res->end - end;
+
+	/* Keep the area above or below the conflict, whichever is larger */
+	if (low > high)
+		res->end = start - 1;
+	else
+		res->start = end + 1;
+}
+
+static void remove_e820_regions(struct resource *avail)
+{
+	int i;
+	struct e820entry *entry;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		entry = &e820.map[i];
+
+		resource_clip(avail, entry->addr,
+			      entry->addr + entry->size - 1);
+	}
+}
+
+void arch_remove_reservations(struct resource *avail)
+{
+	/* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */
+	if (avail->flags & IORESOURCE_MEM) {
+		if (avail->start < BIOS_END)
+			avail->start = BIOS_END;
+		resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
+
+		remove_e820_regions(avail);
+	}
+}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 21c6746..85268f8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -769,7 +769,6 @@
 
 	x86_init.oem.arch_setup();
 
-	resource_alloc_from_bottom = 0;
 	iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
 	setup_memory_map();
 	parse_setup_data();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d..68f61ac 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -281,6 +281,13 @@
 	 */
 	smp_store_cpu_info(cpuid);
 
+	/*
+	 * This must be done before setting cpu_online_mask
+	 * or calling notify_cpu_starting.
+	 */
+	set_cpu_sibling_map(raw_smp_processor_id());
+	wmb();
+
 	notify_cpu_starting(cpuid);
 
 	/*
@@ -316,16 +323,6 @@
 	 */
 	check_tsc_sync_target();
 
-	if (nmi_watchdog == NMI_IO_APIC) {
-		legacy_pic->mask(0);
-		enable_NMI_through_LVT0();
-		legacy_pic->unmask(0);
-	}
-
-	/* This must be done before setting cpu_online_mask */
-	set_cpu_sibling_map(raw_smp_processor_id());
-	wmb();
-
 	/*
 	 * We need to hold call_lock, so there is no inconsistency
 	 * between the time smp_call_function() determines number of
@@ -1061,8 +1058,6 @@
 		printk(KERN_INFO "SMP mode deactivated.\n");
 		smpboot_clear_io_apic();
 
-		localise_nmi_watchdog();
-
 		connect_bsp_APIC();
 		setup_local_APIC();
 		end_local_APIC_setup();
@@ -1196,7 +1191,6 @@
 #ifdef CONFIG_X86_IO_APIC
 	setup_ioapic_dest();
 #endif
-	check_nmi_watchdog();
 	mtrr_aps_init();
 }
 
@@ -1341,8 +1335,6 @@
 	if (cpu == 0)
 		return -EBUSY;
 
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		stop_apic_nmi_watchdog(NULL);
 	clear_local_APIC();
 
 	cpu_disable_common();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525..938c8e1 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@
  */
 void save_stack_trace(struct stack_trace *trace)
 {
-	dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
+	dump_trace(current, NULL, NULL, &save_stack_ops, trace);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
-void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
+void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
 {
-	dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
+	dump_trace(current, regs, NULL, &save_stack_ops, trace);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-	dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
+	dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e1..25a28a2 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
 #include <asm/hpet.h>
 #include <asm/time.h>
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-int timer_ack;
-#endif
-
 #ifdef CONFIG_X86_64
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 #endif
@@ -63,20 +59,6 @@
 	/* Keep nmi watchdog up to date */
 	inc_irq_stat(irq0_irqs);
 
-	/* Optimized out for !IO_APIC and x86_64 */
-	if (timer_ack) {
-		/*
-		 * Subtle, when I/O APICs are used we have to ack timer IRQ
-		 * manually to deassert NMI lines for the watchdog if run
-		 * on an 82489DX-based system.
-		 */
-		raw_spin_lock(&i8259A_lock);
-		outb(0x0c, PIC_MASTER_OCW3);
-		/* Ack the IRQ; AEOI will end it automatically. */
-		inb(PIC_MASTER_POLL);
-		raw_spin_unlock(&i8259A_lock);
-	}
-
 	global_clock_event->event_handler(global_clock_event);
 
 	/* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca..bb6f041 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,8 @@
 
 static int ignore_nmis;
 
+int unknown_nmi_panic;
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -300,6 +302,13 @@
 	die("general protection fault", regs, error_code);
 }
 
+static int __init setup_unknown_nmi_panic(char *str)
+{
+	unknown_nmi_panic = 1;
+	return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
 static notrace __kprobes void
 mem_parity_error(unsigned char reason, struct pt_regs *regs)
 {
@@ -371,7 +380,7 @@
 			reason, smp_processor_id());
 
 	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-	if (panic_on_unrecovered_nmi)
+	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
 
 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -397,20 +406,8 @@
 		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
 							== NOTIFY_STOP)
 			return;
-
-#ifndef CONFIG_LOCKUP_DETECTOR
-		/*
-		 * Ok, so this is none of the documented NMI sources,
-		 * so it must be the NMI watchdog.
-		 */
-		if (nmi_watchdog_tick(regs, reason))
-			return;
-		if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
-			unknown_nmi_error(reason, regs);
-#else
-		unknown_nmi_error(reason, regs);
 #endif
+		unknown_nmi_error(reason, regs);
 
 		return;
 	}
@@ -446,14 +443,12 @@
 
 void stop_nmi(void)
 {
-	acpi_nmi_disable();
 	ignore_nmis++;
 }
 
 void restart_nmi(void)
 {
 	ignore_nmis--;
-	acpi_nmi_enable();
 }
 
 /* May run on IST stack. */
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 9c253bd..5471285 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -394,7 +394,8 @@
 	 * Setup init_xstate_buf to represent the init state of
 	 * all the features managed by the xsave
 	 */
-	init_xstate_buf = alloc_bootmem(xstate_size);
+	init_xstate_buf = alloc_bootmem_align(xstate_size,
+					      __alignof__(struct xsave_struct));
 	init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
 
 	clts();
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1ca1229..b81a9b7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3494,6 +3494,10 @@
 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 {
 	switch (func) {
+	case 0x00000001:
+		/* Mask out xsave bit as long as it is not supported by SVM */
+		entry->ecx &= ~(bit(X86_FEATURE_XSAVE));
+		break;
 	case 0x80000001:
 		if (nested)
 			entry->ecx |= (1 << 2); /* Set SVM bit */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ff21fdd..81fcbe9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4227,11 +4227,6 @@
 		return PT_PDPE_LEVEL;
 }
 
-static inline u32 bit(int bitno)
-{
-	return 1 << (bitno & 31);
-}
-
 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cdac9e5..b989e1f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -155,11 +155,6 @@
 
 u64 __read_mostly host_xcr0;
 
-static inline u32 bit(int bitno)
-{
-	return 1 << (bitno & 31);
-}
-
 static void kvm_on_user_return(struct user_return_notifier *urn)
 {
 	unsigned slot;
@@ -4569,9 +4564,11 @@
 #ifdef CONFIG_CPU_FREQ
 		struct cpufreq_policy policy;
 		memset(&policy, 0, sizeof(policy));
-		cpufreq_get_policy(&policy, get_cpu());
+		cpu = get_cpu();
+		cpufreq_get_policy(&policy, cpu);
 		if (policy.cpuinfo.max_freq)
 			max_tsc_khz = policy.cpuinfo.max_freq;
+		put_cpu();
 #endif
 		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
 					  CPUFREQ_TRANSITION_NOTIFIER);
@@ -5522,6 +5519,8 @@
 
 	mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
 	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
+	if (sregs->cr4 & X86_CR4_OSXSAVE)
+		update_cpuid(vcpu);
 	if (!is_long_mode(vcpu) && is_pae(vcpu)) {
 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
 		mmu_reset_needed = 1;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2cea414..c600da8 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -70,6 +70,11 @@
 	return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
 }
 
+static inline u32 bit(int bitno)
+{
+	return 1 << (bitno & 31);
+}
+
 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
 int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 73b1e1a..4996cf5 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -531,7 +531,10 @@
 {
 	lguest_data.pgdir = cr3;
 	lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
-	cr3_changed = true;
+
+	/* These two page tables are simple, linear, and used during boot */
+	if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
+		cr3_changed = true;
 }
 
 static unsigned long lguest_read_cr3(void)
@@ -703,9 +706,9 @@
  * to forget all of them.  Fortunately, this is very rare.
  *
  * ... except in early boot when the kernel sets up the initial pagetables,
- * which makes booting astonishingly slow: 1.83 seconds!  So we don't even tell
- * the Host anything changed until we've done the first page table switch,
- * which brings boot back to 0.25 seconds.
+ * which makes booting astonishingly slow: 48 seconds!  So we don't even tell
+ * the Host anything changed until we've done the first real page table switch,
+ * which brings boot back to 4.3 seconds.
  */
 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 {
@@ -1002,7 +1005,7 @@
 	clockevents_register_device(&lguest_clockevent);
 
 	/* Finally, we unblock the timer interrupt. */
-	enable_lguest_irq(0);
+	clear_bit(0, lguest_data.blocked_interrupts);
 }
 
 /*
@@ -1349,9 +1352,6 @@
 	 */
 	switch_to_new_gdt(0);
 
-	/* We actually boot with all memory mapped, but let's say 128MB. */
-	max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
-
 	/*
 	 * The Host<->Guest Switcher lives at the top of our address space, and
 	 * the Host told us how big it is when we made LGUEST_INIT hypercall:
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 4f420c2f..e7d5382 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -4,6 +4,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/processor-flags.h>
+#include <asm/pgtable.h>
 
 /*G:020
  * Our story starts with the kernel booting into startup_32 in
@@ -37,9 +38,113 @@
 	/* Set up the initial stack so we can run C code. */
 	movl $(init_thread_union+THREAD_SIZE),%esp
 
+	call init_pagetables
+
 	/* Jumps are relative: we're running __PAGE_OFFSET too low. */
 	jmp lguest_init+__PAGE_OFFSET
 
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond __brk_base.  The variable
+ * _brk_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end.
+ *
+ * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they
+ * don't have a stack at this point, so we can't just use call and ret.
+ */
+init_pagetables:
+#if PTRS_PER_PMD > 1
+#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+#else
+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
+#endif
+#define pa(X) ((X) - __PAGE_OFFSET)
+
+/* Enough space to fit pagetables for the low memory linear map */
+MAPPING_BEYOND_END = \
+	PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
+#ifdef CONFIG_X86_PAE
+
+	/*
+	 * In PAE mode initial_page_table is statically defined to contain
+	 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+	 * entries). The identity mapping is handled by pointing two PGD entries
+	 * to the first kernel PMD.
+	 *
+	 * Note the upper half of each PMD or PTE are always zero at this stage.
+	 */
+
+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
+
+	xorl %ebx,%ebx				/* %ebx is kept at zero */
+
+	movl $pa(__brk_base), %edi
+	movl $pa(initial_pg_pmd), %edx
+	movl $PTE_IDENT_ATTR, %eax
+10:
+	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PMD entry */
+	movl %ecx,(%edx)			/* Store PMD entry */
+						/* Upper half already zero */
+	addl $8,%edx
+	movl $512,%ecx
+11:
+	stosl
+	xchgl %eax,%ebx
+	stosl
+	xchgl %eax,%ebx
+	addl $0x1000,%eax
+	loop 11b
+
+	/*
+	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
+	 */
+	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
+	cmpl %ebp,%eax
+	jb 10b
+1:
+	addl $__PAGE_OFFSET, %edi
+	movl %edi, pa(_brk_end)
+	shrl $12, %eax
+	movl %eax, pa(max_pfn_mapped)
+
+	/* Do early initialization of the fixmap area */
+	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+	movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
+#else	/* Not PAE */
+
+page_pde_offset = (__PAGE_OFFSET >> 20);
+
+	movl $pa(__brk_base), %edi
+	movl $pa(initial_page_table), %edx
+	movl $PTE_IDENT_ATTR, %eax
+10:
+	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PDE entry */
+	movl %ecx,(%edx)			/* Store identity PDE entry */
+	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
+	addl $4,%edx
+	movl $1024, %ecx
+11:
+	stosl
+	addl $0x1000,%eax
+	loop 11b
+	/*
+	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
+	 */
+	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
+	cmpl %ebp,%eax
+	jb 10b
+	addl $__PAGE_OFFSET, %edi
+	movl %edi, pa(_brk_end)
+	shrl $12, %eax
+	movl %eax, pa(max_pfn_mapped)
+
+	/* Do early initialization of the fixmap area */
+	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+	movl %eax,pa(initial_page_table+0xffc)
+#endif
+	ret
+
 /*G:055
  * We create a macro which puts the assembler code between lgstart_ and lgend_
  * markers.  These templates are put in the .text section: they can't be
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index af3b6c8..704a37c 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@
 	e->trace.entries = e->trace_entries;
 	e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
 	e->trace.skip = 0;
-	save_stack_trace_bp(&e->trace, regs->bp);
+	save_stack_trace_regs(&e->trace, regs);
 
 	/* Round address down to nearest 16 bytes */
 	shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e..72cbec1 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -126,7 +126,7 @@
 	if (!user_mode_vm(regs)) {
 		unsigned long stack = kernel_stack_pointer(regs);
 		if (depth)
-			dump_trace(NULL, regs, (unsigned long *)stack, 0,
+			dump_trace(NULL, regs, (unsigned long *)stack,
 				   &backtrace_ops, &depth);
 		return;
 	}
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index e3ecb71..0636dd9 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -58,9 +58,6 @@
 
 int __init op_nmi_timer_init(struct oprofile_operations *ops)
 {
-	if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
-		return -ENODEV;
-
 	ops->start = timer_start;
 	ops->stop = timer_stop;
 	ops->cpu_type = "timer";
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 182558d..9fadec0 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -11,7 +11,7 @@
 #include <linux/oprofile.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index c4bb261..b1805b7 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -65,21 +65,13 @@
 			resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
-	resource_size_t start = round_down(res->end - size + 1, align);
+	resource_size_t start = res->start;
 
 	if (res->flags & IORESOURCE_IO) {
-
-		/*
-		 * If we're avoiding ISA aliases, the largest contiguous I/O
-		 * port space is 256 bytes.  Clearing bits 9 and 10 preserves
-		 * all 256-byte and smaller alignments, so the result will
-		 * still be correctly aligned.
-		 */
-		if (!skip_isa_ioresource_align(dev))
-			start &= ~0x300;
-	} else if (res->flags & IORESOURCE_MEM) {
-		if (start < BIOS_END)
-			start = res->end;	/* fail; no space */
+		if (skip_isa_ioresource_align(dev))
+			return start;
+		if (start & 0x300)
+			start = (start + 0x3ff) & ~0x3ff;
 	}
 	return start;
 }
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 4a2afa1..b6552b1 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -25,7 +25,7 @@
 
 export CPPFLAGS_vdso.lds += -P -C
 
-VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \
+VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 		      	-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
 
 $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
@@ -69,7 +69,7 @@
 vdso32-images			= $(vdso32.so-y:%=vdso32-%.so)
 
 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
-VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
+VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
 
 # This makes sure the $(obj) subdirectory exists even though vdso32/
 # is not a kbuild sub-make subdirectory.
diff --git a/block/blk-map.c b/block/blk-map.c
index 5d5dbe4..e663ac2 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -201,12 +201,13 @@
 	for (i = 0; i < iov_count; i++) {
 		unsigned long uaddr = (unsigned long)iov[i].iov_base;
 
+		if (!iov[i].iov_len)
+			return -EINVAL;
+
 		if (uaddr & queue_dma_alignment(q)) {
 			unaligned = 1;
 			break;
 		}
-		if (!iov[i].iov_len)
-			return -EINVAL;
 	}
 
 	if (unaligned || (q->dma_pad_mask & len) || map_data)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 77b7c26..74bc4a7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -21,7 +21,7 @@
 		return 0;
 
 	fbio = bio;
-	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+	cluster = blk_queue_cluster(q);
 	seg_size = 0;
 	nr_phys_segs = 0;
 	for_each_bio(bio) {
@@ -87,7 +87,7 @@
 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 				   struct bio *nxt)
 {
-	if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
+	if (!blk_queue_cluster(q))
 		return 0;
 
 	if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
@@ -123,7 +123,7 @@
 	int nsegs, cluster;
 
 	nsegs = 0;
-	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+	cluster = blk_queue_cluster(q);
 
 	/*
 	 * for each bio in rq
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 701859f..36c8c1f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -126,7 +126,7 @@
 	lim->alignment_offset = 0;
 	lim->io_opt = 0;
 	lim->misaligned = 0;
-	lim->no_cluster = 0;
+	lim->cluster = 1;
 }
 EXPORT_SYMBOL(blk_set_default_limits);
 
@@ -229,8 +229,8 @@
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
 /**
- * blk_queue_max_hw_sectors - set max sectors for a request for this queue
- * @q:  the request queue for the device
+ * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request
+ * @limits: the queue limits
  * @max_hw_sectors:  max hardware sectors in the usual 512b unit
  *
  * Description:
@@ -244,7 +244,7 @@
  *    per-device basis in /sys/block/<device>/queue/max_sectors_kb.
  *    The soft limit can not exceed max_hw_sectors.
  **/
-void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
+void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors)
 {
 	if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
 		max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
@@ -252,9 +252,23 @@
 		       __func__, max_hw_sectors);
 	}
 
-	q->limits.max_hw_sectors = max_hw_sectors;
-	q->limits.max_sectors = min_t(unsigned int, max_hw_sectors,
-				      BLK_DEF_MAX_SECTORS);
+	limits->max_hw_sectors = max_hw_sectors;
+	limits->max_sectors = min_t(unsigned int, max_hw_sectors,
+				    BLK_DEF_MAX_SECTORS);
+}
+EXPORT_SYMBOL(blk_limits_max_hw_sectors);
+
+/**
+ * blk_queue_max_hw_sectors - set max sectors for a request for this queue
+ * @q:  the request queue for the device
+ * @max_hw_sectors:  max hardware sectors in the usual 512b unit
+ *
+ * Description:
+ *    See description for blk_limits_max_hw_sectors().
+ **/
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
+{
+	blk_limits_max_hw_sectors(&q->limits, max_hw_sectors);
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
@@ -464,15 +478,6 @@
 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 {
 	blk_stack_limits(&t->limits, &b->limits, 0);
-
-	if (!t->queue_lock)
-		WARN_ON_ONCE(1);
-	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
-		unsigned long flags;
-		spin_lock_irqsave(t->queue_lock, flags);
-		queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
-		spin_unlock_irqrestore(t->queue_lock, flags);
-	}
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
@@ -545,7 +550,7 @@
 	t->io_min = max(t->io_min, b->io_min);
 	t->io_opt = lcm(t->io_opt, b->io_opt);
 
-	t->no_cluster |= b->no_cluster;
+	t->cluster &= b->cluster;
 	t->discard_zeroes_data &= b->discard_zeroes_data;
 
 	/* Physical block size a multiple of the logical block size? */
@@ -641,7 +646,6 @@
 		       sector_t offset)
 {
 	struct request_queue *t = disk->queue;
-	struct request_queue *b = bdev_get_queue(bdev);
 
 	if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) {
 		char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
@@ -652,17 +656,6 @@
 		printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
 		       top, bottom);
 	}
-
-	if (!t->queue_lock)
-		WARN_ON_ONCE(1);
-	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(t->queue_lock, flags);
-		if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
-			queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
-		spin_unlock_irqrestore(t->queue_lock, flags);
-	}
 }
 EXPORT_SYMBOL(disk_stack_limits);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 013457f..41fb691 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -119,7 +119,7 @@
 
 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
 {
-	if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
+	if (blk_queue_cluster(q))
 		return queue_var_show(queue_max_segment_size(q), (page));
 
 	return queue_var_show(PAGE_CACHE_SIZE, (page));
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 004be80..381b09b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -355,6 +355,12 @@
 			tg->slice_end[rw], jiffies);
 }
 
+static inline void throtl_set_slice_end(struct throtl_data *td,
+		struct throtl_grp *tg, bool rw, unsigned long jiffy_end)
+{
+	tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
+}
+
 static inline void throtl_extend_slice(struct throtl_data *td,
 		struct throtl_grp *tg, bool rw, unsigned long jiffy_end)
 {
@@ -391,6 +397,16 @@
 	if (throtl_slice_used(td, tg, rw))
 		return;
 
+	/*
+	 * A bio has been dispatched. Also adjust slice_end. It might happen
+	 * that initially cgroup limit was very low resulting in high
+	 * slice_end, but later limit was bumped up and bio was dispached
+	 * sooner, then we need to reduce slice_end. A high bogus slice_end
+	 * is bad because it does not allow new slice to start.
+	 */
+
+	throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice);
+
 	time_elapsed = jiffies - tg->slice_start[rw];
 
 	nr_slices = time_elapsed / throtl_slice;
@@ -709,26 +725,21 @@
 	struct throtl_grp *tg;
 	struct hlist_node *pos, *n;
 
-	/*
-	 * Make sure atomic_inc() effects from
-	 * throtl_update_blkio_group_read_bps(), group of functions are
-	 * visible.
-	 * Is this required or smp_mb__after_atomic_inc() was suffcient
-	 * after the atomic_inc().
-	 */
-	smp_rmb();
 	if (!atomic_read(&td->limits_changed))
 		return;
 
 	throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed));
 
-	hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
-		/*
-		 * Do I need an smp_rmb() here to make sure tg->limits_changed
-		 * update is visible. I am relying on smp_rmb() at the
-		 * beginning of function and not putting a new one here.
-		 */
+	/*
+	 * Make sure updates from throtl_update_blkio_group_read_bps() group
+	 * of functions to tg->limits_changed are visible. We do not
+	 * want update td->limits_changed to be visible but update to
+	 * tg->limits_changed not being visible yet on this cpu. Hence
+	 * the read barrier.
+	 */
+	smp_rmb();
 
+	hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
 		if (throtl_tg_on_rr(tg) && tg->limits_changed) {
 			throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
 				" riops=%u wiops=%u", tg->bps[READ],
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 660a272..0cac7ec 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -577,9 +577,7 @@
 	 * as possible (without an NMI being received in the middle of
 	 * this) - so disable NMIs and initialize the device:
 	 */
-	acpi_nmi_disable();
 	status = acpi_ns_evaluate(info);
-	acpi_nmi_enable();
 
 	if (ACPI_SUCCESS(status)) {
 		walk_info->num_INI++;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index f291587..8e0f925 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2834,6 +2834,8 @@
 	InquiryData_struct *inq_buff = NULL;
 
 	for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
+		if (!h->drv[logvol])
+			continue;
 		if (memcmp(h->drv[logvol]->LunID, drv->LunID,
 			sizeof(drv->LunID)) == 0) {
 			FOUND = 1;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 89d8a7c..24487d4 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3627,17 +3627,19 @@
 		}
 
 		shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header);
-		rv = drbd_recv(mdev, &header->h80.payload, shs);
-		if (unlikely(rv != shs)) {
-			dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
-			goto err_out;
-		}
-
 		if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
 			dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
 			goto err_out;
 		}
 
+		if (shs) {
+			rv = drbd_recv(mdev, &header->h80.payload, shs);
+			if (unlikely(rv != shs)) {
+				dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
+				goto err_out;
+			}
+		}
+
 		rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
 
 		if (unlikely(!rv)) {
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 181ea03..ab2bd09 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -339,7 +339,8 @@
 }
 
 /* completion of master bio is outside of spinlock.
- * If you need it irqsave, do it your self! */
+ * If you need it irqsave, do it your self!
+ * Which means: don't use from bio endio callback. */
 static inline int req_mod(struct drbd_request *req,
 		enum drbd_req_event what)
 {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 47d223c..34f224b 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -193,8 +193,10 @@
  */
 void drbd_endio_pri(struct bio *bio, int error)
 {
+	unsigned long flags;
 	struct drbd_request *req = bio->bi_private;
 	struct drbd_conf *mdev = req->mdev;
+	struct bio_and_error m;
 	enum drbd_req_event what;
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 
@@ -220,7 +222,13 @@
 	bio_put(req->private_bio);
 	req->private_bio = ERR_PTR(error);
 
-	req_mod(req, what);
+	/* not req_mod(), we need irqsave here! */
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	__req_mod(req, what, &m);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
 }
 
 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index d68d3aa..f975d24 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -283,16 +283,21 @@
 	} while (delay);
 }
 
+static void __sh_cmt_set_next(struct sh_cmt_priv *p, unsigned long delta)
+{
+	if (delta > p->max_match_value)
+		dev_warn(&p->pdev->dev, "delta out of range\n");
+
+	p->next_match_value = delta;
+	sh_cmt_clock_event_program_verify(p, 0);
+}
+
 static void sh_cmt_set_next(struct sh_cmt_priv *p, unsigned long delta)
 {
 	unsigned long flags;
 
-	if (delta > p->max_match_value)
-		dev_warn(&p->pdev->dev, "delta out of range\n");
-
 	spin_lock_irqsave(&p->lock, flags);
-	p->next_match_value = delta;
-	sh_cmt_clock_event_program_verify(p, 0);
+	__sh_cmt_set_next(p, delta);
 	spin_unlock_irqrestore(&p->lock, flags);
 }
 
@@ -359,7 +364,7 @@
 
 	/* setup timeout if no clockevent */
 	if ((flag == FLAG_CLOCKSOURCE) && (!(p->flags & FLAG_CLOCKEVENT)))
-		sh_cmt_set_next(p, p->max_match_value);
+		__sh_cmt_set_next(p, p->max_match_value);
  out:
 	spin_unlock_irqrestore(&p->lock, flags);
 
@@ -381,7 +386,7 @@
 
 	/* adjust the timeout to maximum if only clocksource left */
 	if ((flag == FLAG_CLOCKEVENT) && (p->flags & FLAG_CLOCKSOURCE))
-		sh_cmt_set_next(p, p->max_match_value);
+		__sh_cmt_set_next(p, p->max_match_value);
 
 	spin_unlock_irqrestore(&p->lock, flags);
 }
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index e3f7fc6..68f09a8 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -534,76 +534,73 @@
 }
 #undef OLD_KEY_MAX
 
-static int evdev_handle_get_keycode(struct input_dev *dev,
-				    void __user *p, size_t size)
+static int evdev_handle_get_keycode(struct input_dev *dev, void __user *p)
+{
+	struct input_keymap_entry ke = {
+		.len	= sizeof(unsigned int),
+		.flags	= 0,
+	};
+	int __user *ip = (int __user *)p;
+	int error;
+
+	/* legacy case */
+	if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
+		return -EFAULT;
+
+	error = input_get_keycode(dev, &ke);
+	if (error)
+		return error;
+
+	if (put_user(ke.keycode, ip + 1))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int evdev_handle_get_keycode_v2(struct input_dev *dev, void __user *p)
 {
 	struct input_keymap_entry ke;
 	int error;
 
-	memset(&ke, 0, sizeof(ke));
+	if (copy_from_user(&ke, p, sizeof(ke)))
+		return -EFAULT;
 
-	if (size == sizeof(unsigned int[2])) {
-		/* legacy case */
-		int __user *ip = (int __user *)p;
+	error = input_get_keycode(dev, &ke);
+	if (error)
+		return error;
 
-		if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
-			return -EFAULT;
+	if (copy_to_user(p, &ke, sizeof(ke)))
+		return -EFAULT;
 
-		ke.len = sizeof(unsigned int);
-		ke.flags = 0;
-
-		error = input_get_keycode(dev, &ke);
-		if (error)
-			return error;
-
-		if (put_user(ke.keycode, ip + 1))
-			return -EFAULT;
-
-	} else {
-		size = min(size, sizeof(ke));
-
-		if (copy_from_user(&ke, p, size))
-			return -EFAULT;
-
-		error = input_get_keycode(dev, &ke);
-		if (error)
-			return error;
-
-		if (copy_to_user(p, &ke, size))
-			return -EFAULT;
-	}
 	return 0;
 }
 
-static int evdev_handle_set_keycode(struct input_dev *dev,
-				    void __user *p, size_t size)
+static int evdev_handle_set_keycode(struct input_dev *dev, void __user *p)
+{
+	struct input_keymap_entry ke = {
+		.len	= sizeof(unsigned int),
+		.flags	= 0,
+	};
+	int __user *ip = (int __user *)p;
+
+	if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
+		return -EFAULT;
+
+	if (get_user(ke.keycode, ip + 1))
+		return -EFAULT;
+
+	return input_set_keycode(dev, &ke);
+}
+
+static int evdev_handle_set_keycode_v2(struct input_dev *dev, void __user *p)
 {
 	struct input_keymap_entry ke;
 
-	memset(&ke, 0, sizeof(ke));
+	if (copy_from_user(&ke, p, sizeof(ke)))
+		return -EFAULT;
 
-	if (size == sizeof(unsigned int[2])) {
-		/* legacy case */
-		int __user *ip = (int __user *)p;
-
-		if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
-			return -EFAULT;
-
-		if (get_user(ke.keycode, ip + 1))
-			return -EFAULT;
-
-		ke.len = sizeof(unsigned int);
-		ke.flags = 0;
-
-	} else {
-		size = min(size, sizeof(ke));
-
-		if (copy_from_user(&ke, p, size))
-			return -EFAULT;
-
-		if (ke.len > sizeof(ke.scancode))
-			return -EINVAL;
-	}
+	if (ke.len > sizeof(ke.scancode))
+		return -EINVAL;
 
 	return input_set_keycode(dev, &ke);
 }
@@ -669,6 +666,18 @@
 			return evdev_grab(evdev, client);
 		else
 			return evdev_ungrab(evdev, client);
+
+	case EVIOCGKEYCODE:
+		return evdev_handle_get_keycode(dev, p);
+
+	case EVIOCSKEYCODE:
+		return evdev_handle_set_keycode(dev, p);
+
+	case EVIOCGKEYCODE_V2:
+		return evdev_handle_get_keycode_v2(dev, p);
+
+	case EVIOCSKEYCODE_V2:
+		return evdev_handle_set_keycode_v2(dev, p);
 	}
 
 	size = _IOC_SIZE(cmd);
@@ -708,12 +717,6 @@
 			return -EFAULT;
 
 		return error;
-
-	case EVIOC_MASK_SIZE(EVIOCGKEYCODE):
-		return evdev_handle_get_keycode(dev, p, size);
-
-	case EVIOC_MASK_SIZE(EVIOCSKEYCODE):
-		return evdev_handle_set_keycode(dev, p, size);
 	}
 
 	/* Multi-number variable-length handlers */
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index 4852b44..435b0af 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -1436,6 +1436,8 @@
 	{ "Wacom Bamboo Craft",   WACOM_PKGLEN_BBFUN,     14720,  9200, 1023, 63, BAMBOO_PT };
 static struct wacom_features wacom_features_0xD3 =
 	{ "Wacom Bamboo 2FG 6x8", WACOM_PKGLEN_BBFUN,     21648, 13530, 1023, 63, BAMBOO_PT };
+static const struct wacom_features wacom_features_0xD4 =
+	{ "Wacom Bamboo Pen",     WACOM_PKGLEN_BBFUN,     14720,  9200,  255, 63, BAMBOO_PT };
 static struct wacom_features wacom_features_0xD8 =
 	{ "Wacom Bamboo Comic 2FG", WACOM_PKGLEN_BBFUN,   21648, 13530, 1023, 63, BAMBOO_PT };
 static struct wacom_features wacom_features_0xDA =
@@ -1510,6 +1512,7 @@
 	{ USB_DEVICE_WACOM(0xD1) },
 	{ USB_DEVICE_WACOM(0xD2) },
 	{ USB_DEVICE_WACOM(0xD3) },
+	{ USB_DEVICE_WACOM(0xD4) },
 	{ USB_DEVICE_WACOM(0xD8) },
 	{ USB_DEVICE_WACOM(0xDA) },
 	{ USB_DEVICE_WACOM(0xDB) },
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 90267f8..4d705ce 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -517,9 +517,8 @@
 	 */
 
 	if (q->merge_bvec_fn && !ti->type->merge)
-		limits->max_sectors =
-			min_not_zero(limits->max_sectors,
-				     (unsigned int) (PAGE_SIZE >> 9));
+		blk_limits_max_hw_sectors(limits,
+					  (unsigned int) (PAGE_SIZE >> 9));
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
@@ -1131,11 +1130,6 @@
 	 */
 	q->limits = *limits;
 
-	if (limits->no_cluster)
-		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
-	else
-		queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
-
 	if (!dm_table_supports_discards(t))
 		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
 	else
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e71c5fa..175c424 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4295,9 +4295,6 @@
 		goto abort;
 	mddev->queue->queuedata = mddev;
 
-	/* Can be unlocked because the queue is new: no concurrency */
-	queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
-
 	blk_queue_make_request(mddev->queue, md_make_request);
 
 	disk = alloc_disk(1 << shift);
diff --git a/drivers/media/common/saa7146_hlp.c b/drivers/media/common/saa7146_hlp.c
index 05bde9c..1d1d8d2 100644
--- a/drivers/media/common/saa7146_hlp.c
+++ b/drivers/media/common/saa7146_hlp.c
@@ -558,7 +558,7 @@
 static void saa7146_set_position(struct saa7146_dev *dev, int w_x, int w_y, int w_height, enum v4l2_field field, u32 pixelformat)
 {
 	struct saa7146_vv *vv = dev->vv_data;
-	struct saa7146_format *sfmt = format_by_fourcc(dev, pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev, pixelformat);
 
 	int b_depth = vv->ov_fmt->depth;
 	int b_bpl = vv->ov_fb.fmt.bytesperline;
@@ -702,7 +702,7 @@
 	struct saa7146_vv *vv = dev->vv_data;
 	struct saa7146_video_dma vdma1;
 
-	struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 
 	int width = buf->fmt->width;
 	int height = buf->fmt->height;
@@ -827,7 +827,7 @@
 	struct saa7146_video_dma vdma2;
 	struct saa7146_video_dma vdma3;
 
-	struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 
 	int width = buf->fmt->width;
 	int height = buf->fmt->height;
@@ -994,7 +994,7 @@
 
 void saa7146_set_capture(struct saa7146_dev *dev, struct saa7146_buf *buf, struct saa7146_buf *next)
 {
-	struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 	struct saa7146_vv *vv = dev->vv_data;
 	u32 vdma1_prot_addr;
 
diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c
index 741c573..d246910 100644
--- a/drivers/media/common/saa7146_video.c
+++ b/drivers/media/common/saa7146_video.c
@@ -84,7 +84,7 @@
 
 static int NUM_FORMATS = sizeof(formats)/sizeof(struct saa7146_format);
 
-struct saa7146_format* format_by_fourcc(struct saa7146_dev *dev, int fourcc)
+struct saa7146_format* saa7146_format_by_fourcc(struct saa7146_dev *dev, int fourcc)
 {
 	int i, j = NUM_FORMATS;
 
@@ -266,7 +266,7 @@
 	struct videobuf_dmabuf *dma=videobuf_to_dma(&buf->vb);
 	struct scatterlist *list = dma->sglist;
 	int length = dma->sglen;
-	struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 
 	DEB_EE(("dev:%p, buf:%p, sg_len:%d\n",dev,buf,length));
 
@@ -408,7 +408,7 @@
 		}
 	}
 
-	fmt = format_by_fourcc(dev,fh->video_fmt.pixelformat);
+	fmt = saa7146_format_by_fourcc(dev,fh->video_fmt.pixelformat);
 	/* we need to have a valid format set here */
 	BUG_ON(NULL == fmt);
 
@@ -460,7 +460,7 @@
 		return -EBUSY;
 	}
 
-	fmt = format_by_fourcc(dev,fh->video_fmt.pixelformat);
+	fmt = saa7146_format_by_fourcc(dev,fh->video_fmt.pixelformat);
 	/* we need to have a valid format set here */
 	BUG_ON(NULL == fmt);
 
@@ -536,7 +536,7 @@
 		return -EPERM;
 
 	/* check args */
-	fmt = format_by_fourcc(dev, fb->fmt.pixelformat);
+	fmt = saa7146_format_by_fourcc(dev, fb->fmt.pixelformat);
 	if (NULL == fmt)
 		return -EINVAL;
 
@@ -760,7 +760,7 @@
 
 	DEB_EE(("V4L2_BUF_TYPE_VIDEO_CAPTURE: dev:%p, fh:%p\n", dev, fh));
 
-	fmt = format_by_fourcc(dev, f->fmt.pix.pixelformat);
+	fmt = saa7146_format_by_fourcc(dev, f->fmt.pix.pixelformat);
 	if (NULL == fmt)
 		return -EINVAL;
 
@@ -1264,7 +1264,7 @@
 		buf->fmt       = &fh->video_fmt;
 		buf->vb.field  = fh->video_fmt.field;
 
-		sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+		sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 
 		release_all_pagetables(dev, buf);
 		if( 0 != IS_PLANAR(sfmt->trans)) {
@@ -1378,7 +1378,7 @@
 	fh->video_fmt.pixelformat = V4L2_PIX_FMT_BGR24;
 	fh->video_fmt.bytesperline = 0;
 	fh->video_fmt.field = V4L2_FIELD_ANY;
-	sfmt = format_by_fourcc(dev,fh->video_fmt.pixelformat);
+	sfmt = saa7146_format_by_fourcc(dev,fh->video_fmt.pixelformat);
 	fh->video_fmt.sizeimage = (fh->video_fmt.width * fh->video_fmt.height * sfmt->depth)/8;
 
 	videobuf_queue_sg_init(&fh->video_q, &video_qops,
diff --git a/drivers/media/radio/radio-aimslab.c b/drivers/media/radio/radio-aimslab.c
index 5bf4985..05e832f 100644
--- a/drivers/media/radio/radio-aimslab.c
+++ b/drivers/media/radio/radio-aimslab.c
@@ -361,7 +361,7 @@
 
 static const struct v4l2_file_operations rtrack_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops rtrack_ioctl_ops = {
@@ -412,13 +412,6 @@
 	rt->vdev.release = video_device_release_empty;
 	video_set_drvdata(&rt->vdev, rt);
 
-	if (video_register_device(&rt->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
-		v4l2_device_unregister(&rt->v4l2_dev);
-		release_region(rt->io, 2);
-		return -EINVAL;
-	}
-	v4l2_info(v4l2_dev, "AIMSlab RadioTrack/RadioReveal card driver.\n");
-
 	/* Set up the I/O locking */
 
 	mutex_init(&rt->lock);
@@ -430,6 +423,13 @@
 	sleep_delay(2000000);	/* make sure it's totally down	*/
 	outb(0xc0, rt->io);		/* steady volume, mute card	*/
 
+	if (video_register_device(&rt->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
+		v4l2_device_unregister(&rt->v4l2_dev);
+		release_region(rt->io, 2);
+		return -EINVAL;
+	}
+	v4l2_info(v4l2_dev, "AIMSlab RadioTrack/RadioReveal card driver.\n");
+
 	return 0;
 }
 
diff --git a/drivers/media/radio/radio-aztech.c b/drivers/media/radio/radio-aztech.c
index c223113..dd8a6ab 100644
--- a/drivers/media/radio/radio-aztech.c
+++ b/drivers/media/radio/radio-aztech.c
@@ -324,7 +324,7 @@
 
 static const struct v4l2_file_operations aztech_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops aztech_ioctl_ops = {
@@ -375,6 +375,8 @@
 	az->vdev.ioctl_ops = &aztech_ioctl_ops;
 	az->vdev.release = video_device_release_empty;
 	video_set_drvdata(&az->vdev, az);
+	/* mute card - prevents noisy bootups */
+	outb(0, az->io);
 
 	if (video_register_device(&az->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
 		v4l2_device_unregister(v4l2_dev);
@@ -383,8 +385,6 @@
 	}
 
 	v4l2_info(v4l2_dev, "Aztech radio card driver v1.00/19990224 rkroll@exploits.org\n");
-	/* mute card - prevents noisy bootups */
-	outb(0, az->io);
 	return 0;
 }
 
diff --git a/drivers/media/radio/radio-cadet.c b/drivers/media/radio/radio-cadet.c
index b701ea6..bc9ad08 100644
--- a/drivers/media/radio/radio-cadet.c
+++ b/drivers/media/radio/radio-cadet.c
@@ -328,11 +328,10 @@
 	unsigned char readbuf[RDS_BUFFER];
 	int i = 0;
 
+	mutex_lock(&dev->lock);
 	if (dev->rdsstat == 0) {
-		mutex_lock(&dev->lock);
 		dev->rdsstat = 1;
 		outb(0x80, dev->io);        /* Select RDS fifo */
-		mutex_unlock(&dev->lock);
 		init_timer(&dev->readtimer);
 		dev->readtimer.function = cadet_handler;
 		dev->readtimer.data = (unsigned long)dev;
@@ -340,12 +339,15 @@
 		add_timer(&dev->readtimer);
 	}
 	if (dev->rdsin == dev->rdsout) {
+		mutex_unlock(&dev->lock);
 		if (file->f_flags & O_NONBLOCK)
 			return -EWOULDBLOCK;
 		interruptible_sleep_on(&dev->read_queue);
+		mutex_lock(&dev->lock);
 	}
 	while (i < count && dev->rdsin != dev->rdsout)
 		readbuf[i++] = dev->rdsbuf[dev->rdsout++];
+	mutex_unlock(&dev->lock);
 
 	if (copy_to_user(data, readbuf, i))
 		return -EFAULT;
@@ -525,9 +527,11 @@
 {
 	struct cadet *dev = video_drvdata(file);
 
+	mutex_lock(&dev->lock);
 	dev->users++;
 	if (1 == dev->users)
 		init_waitqueue_head(&dev->read_queue);
+	mutex_unlock(&dev->lock);
 	return 0;
 }
 
@@ -535,11 +539,13 @@
 {
 	struct cadet *dev = video_drvdata(file);
 
+	mutex_lock(&dev->lock);
 	dev->users--;
 	if (0 == dev->users) {
 		del_timer_sync(&dev->readtimer);
 		dev->rdsstat = 0;
 	}
+	mutex_unlock(&dev->lock);
 	return 0;
 }
 
@@ -559,7 +565,7 @@
 	.open		= cadet_open,
 	.release       	= cadet_release,
 	.read		= cadet_read,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 	.poll		= cadet_poll,
 };
 
diff --git a/drivers/media/radio/radio-gemtek-pci.c b/drivers/media/radio/radio-gemtek-pci.c
index 7903967..28fa85b 100644
--- a/drivers/media/radio/radio-gemtek-pci.c
+++ b/drivers/media/radio/radio-gemtek-pci.c
@@ -361,7 +361,7 @@
 
 static const struct v4l2_file_operations gemtek_pci_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops gemtek_pci_ioctl_ops = {
@@ -422,11 +422,11 @@
 	card->vdev.release = video_device_release_empty;
 	video_set_drvdata(&card->vdev, card);
 
+	gemtek_pci_mute(card);
+
 	if (video_register_device(&card->vdev, VFL_TYPE_RADIO, nr_radio) < 0)
 		goto err_video;
 
-	gemtek_pci_mute(card);
-
 	v4l2_info(v4l2_dev, "Gemtek PCI Radio (rev. %d) found at 0x%04x-0x%04x.\n",
 		pdev->revision, card->iobase, card->iobase + card->length - 1);
 
diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c
index 73985f6..2599364 100644
--- a/drivers/media/radio/radio-gemtek.c
+++ b/drivers/media/radio/radio-gemtek.c
@@ -378,7 +378,7 @@
 
 static const struct v4l2_file_operations gemtek_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static int vidioc_querycap(struct file *file, void *priv,
@@ -577,12 +577,6 @@
 	gt->vdev.release = video_device_release_empty;
 	video_set_drvdata(&gt->vdev, gt);
 
-	if (video_register_device(&gt->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
-		v4l2_device_unregister(v4l2_dev);
-		release_region(gt->io, 1);
-		return -EBUSY;
-	}
-
 	/* Set defaults */
 	gt->lastfreq = GEMTEK_LOWFREQ;
 	gt->bu2614data = 0;
@@ -590,6 +584,12 @@
 	if (initmute)
 		gemtek_mute(gt);
 
+	if (video_register_device(&gt->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
+		v4l2_device_unregister(v4l2_dev);
+		release_region(gt->io, 1);
+		return -EBUSY;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/media/radio/radio-maestro.c b/drivers/media/radio/radio-maestro.c
index 08f1051..6af61bf 100644
--- a/drivers/media/radio/radio-maestro.c
+++ b/drivers/media/radio/radio-maestro.c
@@ -299,7 +299,7 @@
 
 static const struct v4l2_file_operations maestro_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops maestro_ioctl_ops = {
@@ -383,22 +383,20 @@
 	dev->vdev.release = video_device_release_empty;
 	video_set_drvdata(&dev->vdev, dev);
 
+	if (!radio_power_on(dev)) {
+		retval = -EIO;
+		goto errfr1;
+	}
+
 	retval = video_register_device(&dev->vdev, VFL_TYPE_RADIO, radio_nr);
 	if (retval) {
 		v4l2_err(v4l2_dev, "can't register video device!\n");
 		goto errfr1;
 	}
 
-	if (!radio_power_on(dev)) {
-		retval = -EIO;
-		goto errunr;
-	}
-
 	v4l2_info(v4l2_dev, "version " DRIVER_VERSION "\n");
 
 	return 0;
-errunr:
-	video_unregister_device(&dev->vdev);
 errfr1:
 	v4l2_device_unregister(v4l2_dev);
 errfr:
diff --git a/drivers/media/radio/radio-maxiradio.c b/drivers/media/radio/radio-maxiradio.c
index 255d40d..6459a22 100644
--- a/drivers/media/radio/radio-maxiradio.c
+++ b/drivers/media/radio/radio-maxiradio.c
@@ -346,7 +346,7 @@
 
 static const struct v4l2_file_operations maxiradio_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl          = video_ioctl2,
+	.unlocked_ioctl = video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops maxiradio_ioctl_ops = {
diff --git a/drivers/media/radio/radio-miropcm20.c b/drivers/media/radio/radio-miropcm20.c
index 4ff8854..3fb76e3 100644
--- a/drivers/media/radio/radio-miropcm20.c
+++ b/drivers/media/radio/radio-miropcm20.c
@@ -33,6 +33,7 @@
 	unsigned long freq;
 	int muted;
 	struct snd_miro_aci *aci;
+	struct mutex lock;
 };
 
 static struct pcm20 pcm20_card = {
@@ -72,7 +73,7 @@
 
 static const struct v4l2_file_operations pcm20_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static int vidioc_querycap(struct file *file, void *priv,
@@ -229,7 +230,7 @@
 		return -ENODEV;
 	}
 	strlcpy(v4l2_dev->name, "miropcm20", sizeof(v4l2_dev->name));
-
+	mutex_init(&dev->lock);
 
 	res = v4l2_device_register(NULL, v4l2_dev);
 	if (res < 0) {
@@ -242,6 +243,7 @@
 	dev->vdev.fops = &pcm20_fops;
 	dev->vdev.ioctl_ops = &pcm20_ioctl_ops;
 	dev->vdev.release = video_device_release_empty;
+	dev->vdev.lock = &dev->lock;
 	video_set_drvdata(&dev->vdev, dev);
 
 	if (video_register_device(&dev->vdev, VFL_TYPE_RADIO, radio_nr) < 0)
diff --git a/drivers/media/radio/radio-rtrack2.c b/drivers/media/radio/radio-rtrack2.c
index a79296a..8d6ea59 100644
--- a/drivers/media/radio/radio-rtrack2.c
+++ b/drivers/media/radio/radio-rtrack2.c
@@ -266,7 +266,7 @@
 
 static const struct v4l2_file_operations rtrack2_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops rtrack2_ioctl_ops = {
@@ -315,6 +315,10 @@
 	dev->vdev.release = video_device_release_empty;
 	video_set_drvdata(&dev->vdev, dev);
 
+	/* mute card - prevents noisy bootups */
+	outb(1, dev->io);
+	dev->muted = 1;
+
 	mutex_init(&dev->lock);
 	if (video_register_device(&dev->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
 		v4l2_device_unregister(v4l2_dev);
@@ -324,10 +328,6 @@
 
 	v4l2_info(v4l2_dev, "AIMSlab Radiotrack II card driver.\n");
 
-	/* mute card - prevents noisy bootups */
-	outb(1, dev->io);
-	dev->muted = 1;
-
 	return 0;
 }
 
diff --git a/drivers/media/radio/radio-sf16fmi.c b/drivers/media/radio/radio-sf16fmi.c
index 985359d..b5a5f89 100644
--- a/drivers/media/radio/radio-sf16fmi.c
+++ b/drivers/media/radio/radio-sf16fmi.c
@@ -260,7 +260,7 @@
 
 static const struct v4l2_file_operations fmi_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops fmi_ioctl_ops = {
@@ -382,6 +382,9 @@
 
 	mutex_init(&fmi->lock);
 
+	/* mute card - prevents noisy bootups */
+	fmi_mute(fmi);
+
 	if (video_register_device(&fmi->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
 		v4l2_device_unregister(v4l2_dev);
 		release_region(fmi->io, 2);
@@ -391,8 +394,6 @@
 	}
 
 	v4l2_info(v4l2_dev, "card driver at 0x%x\n", fmi->io);
-	/* mute card - prevents noisy bootups */
-	fmi_mute(fmi);
 	return 0;
 }
 
diff --git a/drivers/media/radio/radio-sf16fmr2.c b/drivers/media/radio/radio-sf16fmr2.c
index 52c7bbb..dc3f04c 100644
--- a/drivers/media/radio/radio-sf16fmr2.c
+++ b/drivers/media/radio/radio-sf16fmr2.c
@@ -376,7 +376,7 @@
 
 static const struct v4l2_file_operations fmr2_fops = {
 	.owner          = THIS_MODULE,
-	.ioctl          = video_ioctl2,
+	.unlocked_ioctl = video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops fmr2_ioctl_ops = {
@@ -424,6 +424,10 @@
 	fmr2->vdev.release = video_device_release_empty;
 	video_set_drvdata(&fmr2->vdev, fmr2);
 
+	/* mute card - prevents noisy bootups */
+	fmr2_mute(fmr2->io);
+	fmr2_product_info(fmr2);
+
 	if (video_register_device(&fmr2->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
 		v4l2_device_unregister(v4l2_dev);
 		release_region(fmr2->io, 2);
@@ -431,11 +435,6 @@
 	}
 
 	v4l2_info(v4l2_dev, "SF16FMR2 radio card driver at 0x%x.\n", fmr2->io);
-	/* mute card - prevents noisy bootups */
-	mutex_lock(&fmr2->lock);
-	fmr2_mute(fmr2->io);
-	fmr2_product_info(fmr2);
-	mutex_unlock(&fmr2->lock);
 	debug_print((KERN_DEBUG "card_type %d\n", fmr2->card_type));
 	return 0;
 }
diff --git a/drivers/media/radio/radio-si4713.c b/drivers/media/radio/radio-si4713.c
index 03829e6..726d367 100644
--- a/drivers/media/radio/radio-si4713.c
+++ b/drivers/media/radio/radio-si4713.c
@@ -53,7 +53,8 @@
 /* radio_si4713_fops - file operations interface */
 static const struct v4l2_file_operations radio_si4713_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	/* Note: locking is done at the subdev level in the i2c driver. */
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 /* Video4Linux Interface */
diff --git a/drivers/media/radio/radio-tea5764.c b/drivers/media/radio/radio-tea5764.c
index 789d2ec..0e71d81 100644
--- a/drivers/media/radio/radio-tea5764.c
+++ b/drivers/media/radio/radio-tea5764.c
@@ -142,7 +142,6 @@
 	struct video_device		*videodev;
 	struct tea5764_regs		regs;
 	struct mutex			mutex;
-	int				users;
 };
 
 /* I2C code related */
@@ -458,41 +457,10 @@
 	return 0;
 }
 
-static int tea5764_open(struct file *file)
-{
-	/* Currently we support only one device */
-	struct tea5764_device *radio = video_drvdata(file);
-
-	mutex_lock(&radio->mutex);
-	/* Only exclusive access */
-	if (radio->users) {
-		mutex_unlock(&radio->mutex);
-		return -EBUSY;
-	}
-	radio->users++;
-	mutex_unlock(&radio->mutex);
-	file->private_data = radio;
-	return 0;
-}
-
-static int tea5764_close(struct file *file)
-{
-	struct tea5764_device *radio = video_drvdata(file);
-
-	if (!radio)
-		return -ENODEV;
-	mutex_lock(&radio->mutex);
-	radio->users--;
-	mutex_unlock(&radio->mutex);
-	return 0;
-}
-
 /* File system interface */
 static const struct v4l2_file_operations tea5764_fops = {
 	.owner		= THIS_MODULE,
-	.open           = tea5764_open,
-	.release        = tea5764_close,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops tea5764_ioctl_ops = {
@@ -527,7 +495,7 @@
 	int ret;
 
 	PDEBUG("probe");
-	radio = kmalloc(sizeof(struct tea5764_device), GFP_KERNEL);
+	radio = kzalloc(sizeof(struct tea5764_device), GFP_KERNEL);
 	if (!radio)
 		return -ENOMEM;
 
@@ -555,12 +523,7 @@
 
 	i2c_set_clientdata(client, radio);
 	video_set_drvdata(radio->videodev, radio);
-
-	ret = video_register_device(radio->videodev, VFL_TYPE_RADIO, radio_nr);
-	if (ret < 0) {
-		PWARN("Could not register video device!");
-		goto errrel;
-	}
+	radio->videodev->lock = &radio->mutex;
 
 	/* initialize and power off the chip */
 	tea5764_i2c_read(radio);
@@ -568,6 +531,12 @@
 	tea5764_mute(radio, 1);
 	tea5764_power_down(radio);
 
+	ret = video_register_device(radio->videodev, VFL_TYPE_RADIO, radio_nr);
+	if (ret < 0) {
+		PWARN("Could not register video device!");
+		goto errrel;
+	}
+
 	PINFO("registered.");
 	return 0;
 errrel:
diff --git a/drivers/media/radio/radio-terratec.c b/drivers/media/radio/radio-terratec.c
index fc1c860..a326639 100644
--- a/drivers/media/radio/radio-terratec.c
+++ b/drivers/media/radio/radio-terratec.c
@@ -338,7 +338,7 @@
 
 static const struct v4l2_file_operations terratec_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops terratec_ioctl_ops = {
@@ -389,6 +389,9 @@
 
 	mutex_init(&tt->lock);
 
+	/* mute card - prevents noisy bootups */
+	tt_write_vol(tt, 0);
+
 	if (video_register_device(&tt->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
 		v4l2_device_unregister(&tt->v4l2_dev);
 		release_region(tt->io, 2);
@@ -396,9 +399,6 @@
 	}
 
 	v4l2_info(v4l2_dev, "TERRATEC ActivRadio Standalone card driver.\n");
-
-	/* mute card - prevents noisy bootups */
-	tt_write_vol(tt, 0);
 	return 0;
 }
 
diff --git a/drivers/media/radio/radio-timb.c b/drivers/media/radio/radio-timb.c
index b8bb3ef..a185610 100644
--- a/drivers/media/radio/radio-timb.c
+++ b/drivers/media/radio/radio-timb.c
@@ -34,6 +34,7 @@
 	struct v4l2_subdev	*sd_dsp;
 	struct video_device	video_dev;
 	struct v4l2_device	v4l2_dev;
+	struct mutex		lock;
 };
 
 
@@ -142,7 +143,7 @@
 
 static const struct v4l2_file_operations timbradio_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static int __devinit timbradio_probe(struct platform_device *pdev)
@@ -164,6 +165,7 @@
 	}
 
 	tr->pdata = *pdata;
+	mutex_init(&tr->lock);
 
 	strlcpy(tr->video_dev.name, "Timberdale Radio",
 		sizeof(tr->video_dev.name));
@@ -171,6 +173,7 @@
 	tr->video_dev.ioctl_ops = &timbradio_ioctl_ops;
 	tr->video_dev.release = video_device_release_empty;
 	tr->video_dev.minor = -1;
+	tr->video_dev.lock = &tr->lock;
 
 	strlcpy(tr->v4l2_dev.name, DRIVER_NAME, sizeof(tr->v4l2_dev.name));
 	err = v4l2_device_register(NULL, &tr->v4l2_dev);
diff --git a/drivers/media/radio/radio-trust.c b/drivers/media/radio/radio-trust.c
index 9d6dcf8..22fa9cc 100644
--- a/drivers/media/radio/radio-trust.c
+++ b/drivers/media/radio/radio-trust.c
@@ -344,7 +344,7 @@
 
 static const struct v4l2_file_operations trust_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops trust_ioctl_ops = {
@@ -396,14 +396,6 @@
 	tr->vdev.release = video_device_release_empty;
 	video_set_drvdata(&tr->vdev, tr);
 
-	if (video_register_device(&tr->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
-		v4l2_device_unregister(v4l2_dev);
-		release_region(tr->io, 2);
-		return -EINVAL;
-	}
-
-	v4l2_info(v4l2_dev, "Trust FM Radio card driver v1.0.\n");
-
 	write_i2c(tr, 2, TDA7318_ADDR, 0x80);	/* speaker att. LF = 0 dB */
 	write_i2c(tr, 2, TDA7318_ADDR, 0xa0);	/* speaker att. RF = 0 dB */
 	write_i2c(tr, 2, TDA7318_ADDR, 0xc0);	/* speaker att. LR = 0 dB */
@@ -418,6 +410,14 @@
 	/* mute card - prevents noisy bootups */
 	tr_setmute(tr, 1);
 
+	if (video_register_device(&tr->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
+		v4l2_device_unregister(v4l2_dev);
+		release_region(tr->io, 2);
+		return -EINVAL;
+	}
+
+	v4l2_info(v4l2_dev, "Trust FM Radio card driver v1.0.\n");
+
 	return 0;
 }
 
diff --git a/drivers/media/radio/radio-typhoon.c b/drivers/media/radio/radio-typhoon.c
index b1f6305..8dbbf08 100644
--- a/drivers/media/radio/radio-typhoon.c
+++ b/drivers/media/radio/radio-typhoon.c
@@ -317,7 +317,7 @@
 
 static const struct v4l2_file_operations typhoon_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops typhoon_ioctl_ops = {
@@ -344,18 +344,18 @@
 
 	strlcpy(v4l2_dev->name, "typhoon", sizeof(v4l2_dev->name));
 	dev->io = io;
-	dev->curfreq = dev->mutefreq = mutefreq;
 
 	if (dev->io == -1) {
 		v4l2_err(v4l2_dev, "You must set an I/O address with io=0x316 or io=0x336\n");
 		return -EINVAL;
 	}
 
-	if (dev->mutefreq < 87000 || dev->mutefreq > 108500) {
+	if (mutefreq < 87000 || mutefreq > 108500) {
 		v4l2_err(v4l2_dev, "You must set a frequency (in kHz) used when muting the card,\n");
 		v4l2_err(v4l2_dev, "e.g. with \"mutefreq=87500\" (87000 <= mutefreq <= 108500)\n");
 		return -EINVAL;
 	}
+	dev->curfreq = dev->mutefreq = mutefreq << 4;
 
 	mutex_init(&dev->lock);
 	if (!request_region(dev->io, 8, "typhoon")) {
@@ -378,17 +378,17 @@
 	dev->vdev.ioctl_ops = &typhoon_ioctl_ops;
 	dev->vdev.release = video_device_release_empty;
 	video_set_drvdata(&dev->vdev, dev);
+
+	/* mute card - prevents noisy bootups */
+	typhoon_mute(dev);
+
 	if (video_register_device(&dev->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
 		v4l2_device_unregister(&dev->v4l2_dev);
 		release_region(dev->io, 8);
 		return -EINVAL;
 	}
 	v4l2_info(v4l2_dev, "port 0x%x.\n", dev->io);
-	v4l2_info(v4l2_dev, "mute frequency is %lu kHz.\n", dev->mutefreq);
-	dev->mutefreq <<= 4;
-
-	/* mute card - prevents noisy bootups */
-	typhoon_mute(dev);
+	v4l2_info(v4l2_dev, "mute frequency is %lu kHz.\n", mutefreq);
 
 	return 0;
 }
diff --git a/drivers/media/radio/radio-zoltrix.c b/drivers/media/radio/radio-zoltrix.c
index f31eab9..af99c5b 100644
--- a/drivers/media/radio/radio-zoltrix.c
+++ b/drivers/media/radio/radio-zoltrix.c
@@ -377,7 +377,7 @@
 static const struct v4l2_file_operations zoltrix_fops =
 {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops zoltrix_ioctl_ops = {
@@ -424,20 +424,6 @@
 		return res;
 	}
 
-	strlcpy(zol->vdev.name, v4l2_dev->name, sizeof(zol->vdev.name));
-	zol->vdev.v4l2_dev = v4l2_dev;
-	zol->vdev.fops = &zoltrix_fops;
-	zol->vdev.ioctl_ops = &zoltrix_ioctl_ops;
-	zol->vdev.release = video_device_release_empty;
-	video_set_drvdata(&zol->vdev, zol);
-
-	if (video_register_device(&zol->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
-		v4l2_device_unregister(v4l2_dev);
-		release_region(zol->io, 2);
-		return -EINVAL;
-	}
-	v4l2_info(v4l2_dev, "Zoltrix Radio Plus card driver.\n");
-
 	mutex_init(&zol->lock);
 
 	/* mute card - prevents noisy bootups */
@@ -452,6 +438,20 @@
 	zol->curvol = 0;
 	zol->stereo = 1;
 
+	strlcpy(zol->vdev.name, v4l2_dev->name, sizeof(zol->vdev.name));
+	zol->vdev.v4l2_dev = v4l2_dev;
+	zol->vdev.fops = &zoltrix_fops;
+	zol->vdev.ioctl_ops = &zoltrix_ioctl_ops;
+	zol->vdev.release = video_device_release_empty;
+	video_set_drvdata(&zol->vdev, zol);
+
+	if (video_register_device(&zol->vdev, VFL_TYPE_RADIO, radio_nr) < 0) {
+		v4l2_device_unregister(v4l2_dev);
+		release_region(zol->io, 2);
+		return -EINVAL;
+	}
+	v4l2_info(v4l2_dev, "Zoltrix Radio Plus card driver.\n");
+
 	return 0;
 }
 
diff --git a/drivers/media/video/arv.c b/drivers/media/video/arv.c
index 31e7a12..f989f28 100644
--- a/drivers/media/video/arv.c
+++ b/drivers/media/video/arv.c
@@ -712,7 +712,7 @@
 static const struct v4l2_file_operations ar_fops = {
 	.owner		= THIS_MODULE,
 	.read		= ar_read,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops ar_ioctl_ops = {
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index a529619..0902ec0 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -854,7 +854,6 @@
 		xbits |= RESOURCE_VIDEO_READ | RESOURCE_VIDEO_STREAM;
 
 	/* is it free? */
-	mutex_lock(&btv->lock);
 	if (btv->resources & xbits) {
 		/* no, someone else uses it */
 		goto fail;
@@ -884,11 +883,9 @@
 	/* it's free, grab it */
 	fh->resources  |= bit;
 	btv->resources |= bit;
-	mutex_unlock(&btv->lock);
 	return 1;
 
  fail:
-	mutex_unlock(&btv->lock);
 	return 0;
 }
 
@@ -940,7 +937,6 @@
 		/* trying to free ressources not allocated by us ... */
 		printk("bttv: BUG! (btres)\n");
 	}
-	mutex_lock(&btv->lock);
 	fh->resources  &= ~bits;
 	btv->resources &= ~bits;
 
@@ -951,8 +947,6 @@
 
 	if (0 == (bits & VBI_RESOURCES))
 		disclaim_vbi_lines(btv);
-
-	mutex_unlock(&btv->lock);
 }
 
 /* ----------------------------------------------------------------------- */
@@ -1713,28 +1707,20 @@
 
 		/* Make sure tvnorm and vbi_end remain consistent
 		   until we're done. */
-		mutex_lock(&btv->lock);
 
 		norm = btv->tvnorm;
 
 		/* In this mode capturing always starts at defrect.top
 		   (default VDELAY), ignoring cropping parameters. */
 		if (btv->vbi_end > bttv_tvnorms[norm].cropcap.defrect.top) {
-			mutex_unlock(&btv->lock);
 			return -EINVAL;
 		}
 
-		mutex_unlock(&btv->lock);
-
 		c.rect = bttv_tvnorms[norm].cropcap.defrect;
 	} else {
-		mutex_lock(&btv->lock);
-
 		norm = btv->tvnorm;
 		c = btv->crop[!!fh->do_crop];
 
-		mutex_unlock(&btv->lock);
-
 		if (width < c.min_scaled_width ||
 		    width > c.max_scaled_width ||
 		    height < c.min_scaled_height)
@@ -1858,7 +1844,6 @@
 	unsigned int i;
 	int err;
 
-	mutex_lock(&btv->lock);
 	err = v4l2_prio_check(&btv->prio, fh->prio);
 	if (err)
 		goto err;
@@ -1874,7 +1859,6 @@
 	set_tvnorm(btv, i);
 
 err:
-	mutex_unlock(&btv->lock);
 
 	return err;
 }
@@ -1898,7 +1882,6 @@
 	struct bttv *btv = fh->btv;
 	int rc = 0;
 
-	mutex_lock(&btv->lock);
 	if (i->index >= bttv_tvcards[btv->c.type].video_inputs) {
 		rc = -EINVAL;
 		goto err;
@@ -1928,7 +1911,6 @@
 	i->std = BTTV_NORMS;
 
 err:
-	mutex_unlock(&btv->lock);
 
 	return rc;
 }
@@ -1938,9 +1920,7 @@
 	struct bttv_fh *fh = priv;
 	struct bttv *btv = fh->btv;
 
-	mutex_lock(&btv->lock);
 	*i = btv->input;
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -1952,7 +1932,6 @@
 
 	int err;
 
-	mutex_lock(&btv->lock);
 	err = v4l2_prio_check(&btv->prio, fh->prio);
 	if (unlikely(err))
 		goto err;
@@ -1965,7 +1944,6 @@
 	set_input(btv, i, btv->tvnorm);
 
 err:
-	mutex_unlock(&btv->lock);
 	return 0;
 }
 
@@ -1979,7 +1957,6 @@
 	if (unlikely(0 != t->index))
 		return -EINVAL;
 
-	mutex_lock(&btv->lock);
 	if (unlikely(btv->tuner_type == TUNER_ABSENT)) {
 		err = -EINVAL;
 		goto err;
@@ -1995,7 +1972,6 @@
 		btv->audio_mode_gpio(btv, t, 1);
 
 err:
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -2006,10 +1982,8 @@
 	struct bttv_fh *fh  = priv;
 	struct bttv *btv = fh->btv;
 
-	mutex_lock(&btv->lock);
 	f->type = btv->radio_user ? V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
 	f->frequency = btv->freq;
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -2024,7 +1998,6 @@
 	if (unlikely(f->tuner != 0))
 		return -EINVAL;
 
-	mutex_lock(&btv->lock);
 	err = v4l2_prio_check(&btv->prio, fh->prio);
 	if (unlikely(err))
 		goto err;
@@ -2039,7 +2012,6 @@
 	if (btv->has_matchbox && btv->radio_user)
 		tea5757_set_freq(btv, btv->freq);
 err:
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -2172,7 +2144,6 @@
 
 	/* Make sure tvnorm, vbi_end and the current cropping parameters
 	   remain consistent until we're done. */
-	mutex_lock(&btv->lock);
 
 	b = &bttv_tvnorms[btv->tvnorm].cropcap.bounds;
 
@@ -2250,7 +2221,6 @@
 	rc = 0; /* success */
 
  fail:
-	mutex_unlock(&btv->lock);
 
 	return rc;
 }
@@ -2282,9 +2252,7 @@
 	if (V4L2_FIELD_ANY == field) {
 		__s32 height2;
 
-		mutex_lock(&fh->btv->lock);
 		height2 = fh->btv->crop[!!fh->do_crop].rect.height >> 1;
-		mutex_unlock(&fh->btv->lock);
 		field = (win->w.height > height2)
 			? V4L2_FIELD_INTERLACED
 			: V4L2_FIELD_TOP;
@@ -2360,7 +2328,6 @@
 		}
 	}
 
-	mutex_lock(&fh->cap.vb_lock);
 	/* clip against screen */
 	if (NULL != btv->fbuf.base)
 		n = btcx_screen_clips(btv->fbuf.fmt.width, btv->fbuf.fmt.height,
@@ -2391,13 +2358,6 @@
 	fh->ov.field    = win->field;
 	fh->ov.setup_ok = 1;
 
-	/*
-	 * FIXME: btv is protected by btv->lock mutex, while btv->init
-	 *	  is protected by fh->cap.vb_lock. This seems to open the
-	 *	  possibility for some race situations. Maybe the better would
-	 *	  be to unify those locks or to use another way to store the
-	 *	  init values that will be consumed by videobuf callbacks
-	 */
 	btv->init.ov.w.width   = win->w.width;
 	btv->init.ov.w.height  = win->w.height;
 	btv->init.ov.field     = win->field;
@@ -2412,7 +2372,6 @@
 		bttv_overlay_risc(btv, &fh->ov, fh->ovfmt, new);
 		retval = bttv_switch_overlay(btv,fh,new);
 	}
-	mutex_unlock(&fh->cap.vb_lock);
 	return retval;
 }
 
@@ -2526,9 +2485,7 @@
 	if (V4L2_FIELD_ANY == field) {
 		__s32 height2;
 
-		mutex_lock(&btv->lock);
 		height2 = btv->crop[!!fh->do_crop].rect.height >> 1;
-		mutex_unlock(&btv->lock);
 		field = (f->fmt.pix.height > height2)
 			? V4L2_FIELD_INTERLACED
 			: V4L2_FIELD_BOTTOM;
@@ -2614,7 +2571,6 @@
 	fmt = format_by_fourcc(f->fmt.pix.pixelformat);
 
 	/* update our state informations */
-	mutex_lock(&fh->cap.vb_lock);
 	fh->fmt              = fmt;
 	fh->cap.field        = f->fmt.pix.field;
 	fh->cap.last         = V4L2_FIELD_NONE;
@@ -2623,7 +2579,6 @@
 	btv->init.fmt        = fmt;
 	btv->init.width      = f->fmt.pix.width;
 	btv->init.height     = f->fmt.pix.height;
-	mutex_unlock(&fh->cap.vb_lock);
 
 	return 0;
 }
@@ -2649,11 +2604,9 @@
 	unsigned int i;
 	struct bttv_fh *fh = priv;
 
-	mutex_lock(&fh->cap.vb_lock);
 	retval = __videobuf_mmap_setup(&fh->cap, gbuffers, gbufsize,
 				     V4L2_MEMORY_MMAP);
 	if (retval < 0) {
-		mutex_unlock(&fh->cap.vb_lock);
 		return retval;
 	}
 
@@ -2665,7 +2618,6 @@
 	for (i = 0; i < gbuffers; i++)
 		mbuf->offsets[i] = i * gbufsize;
 
-	mutex_unlock(&fh->cap.vb_lock);
 	return 0;
 }
 #endif
@@ -2775,10 +2727,8 @@
 	int retval = 0;
 
 	if (on) {
-		mutex_lock(&fh->cap.vb_lock);
 		/* verify args */
 		if (unlikely(!btv->fbuf.base)) {
-			mutex_unlock(&fh->cap.vb_lock);
 			return -EINVAL;
 		}
 		if (unlikely(!fh->ov.setup_ok)) {
@@ -2787,13 +2737,11 @@
 		}
 		if (retval)
 			return retval;
-		mutex_unlock(&fh->cap.vb_lock);
 	}
 
 	if (!check_alloc_btres_lock(btv, fh, RESOURCE_OVERLAY))
 		return -EBUSY;
 
-	mutex_lock(&fh->cap.vb_lock);
 	if (on) {
 		fh->ov.tvnorm = btv->tvnorm;
 		new = videobuf_sg_alloc(sizeof(*new));
@@ -2805,7 +2753,6 @@
 
 	/* switch over */
 	retval = bttv_switch_overlay(btv, fh, new);
-	mutex_unlock(&fh->cap.vb_lock);
 	return retval;
 }
 
@@ -2844,7 +2791,6 @@
 	}
 
 	/* ok, accept it */
-	mutex_lock(&fh->cap.vb_lock);
 	btv->fbuf.base       = fb->base;
 	btv->fbuf.fmt.width  = fb->fmt.width;
 	btv->fbuf.fmt.height = fb->fmt.height;
@@ -2876,7 +2822,6 @@
 			retval = bttv_switch_overlay(btv, fh, new);
 		}
 	}
-	mutex_unlock(&fh->cap.vb_lock);
 	return retval;
 }
 
@@ -2955,7 +2900,6 @@
 	     c->id >= V4L2_CID_PRIVATE_LASTP1))
 		return -EINVAL;
 
-	mutex_lock(&btv->lock);
 	if (!btv->volume_gpio && (c->id == V4L2_CID_AUDIO_VOLUME))
 		*c = no_ctl;
 	else {
@@ -2963,7 +2907,6 @@
 
 		*c = (NULL != ctrl) ? *ctrl : no_ctl;
 	}
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -2974,10 +2917,8 @@
 	struct bttv_fh *fh = f;
 	struct bttv *btv = fh->btv;
 
-	mutex_lock(&btv->lock);
 	v4l2_video_std_frame_period(bttv_tvnorms[btv->tvnorm].v4l2_id,
 				    &parm->parm.capture.timeperframe);
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -2993,7 +2934,6 @@
 	if (0 != t->index)
 		return -EINVAL;
 
-	mutex_lock(&btv->lock);
 	t->rxsubchans = V4L2_TUNER_SUB_MONO;
 	bttv_call_all(btv, tuner, g_tuner, t);
 	strcpy(t->name, "Television");
@@ -3005,7 +2945,6 @@
 	if (btv->audio_mode_gpio)
 		btv->audio_mode_gpio(btv, t, 0);
 
-	mutex_unlock(&btv->lock);
 	return 0;
 }
 
@@ -3014,9 +2953,7 @@
 	struct bttv_fh *fh = f;
 	struct bttv *btv = fh->btv;
 
-	mutex_lock(&btv->lock);
 	*p = v4l2_prio_max(&btv->prio);
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -3028,9 +2965,7 @@
 	struct bttv *btv = fh->btv;
 	int	rc;
 
-	mutex_lock(&btv->lock);
 	rc = v4l2_prio_change(&btv->prio, &fh->prio, prio);
-	mutex_unlock(&btv->lock);
 
 	return rc;
 }
@@ -3045,9 +2980,7 @@
 	    cap->type != V4L2_BUF_TYPE_VIDEO_OVERLAY)
 		return -EINVAL;
 
-	mutex_lock(&btv->lock);
 	*cap = bttv_tvnorms[btv->tvnorm].cropcap;
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -3065,9 +2998,7 @@
 	   inconsistent with fh->width or fh->height and apps
 	   do not expect a change here. */
 
-	mutex_lock(&btv->lock);
 	crop->c = btv->crop[!!fh->do_crop].rect;
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -3091,17 +3022,14 @@
 	/* Make sure tvnorm, vbi_end and the current cropping
 	   parameters remain consistent until we're done. Note
 	   read() may change vbi_end in check_alloc_btres_lock(). */
-	mutex_lock(&btv->lock);
 	retval = v4l2_prio_check(&btv->prio, fh->prio);
 	if (0 != retval) {
-		mutex_unlock(&btv->lock);
 		return retval;
 	}
 
 	retval = -EBUSY;
 
 	if (locked_btres(fh->btv, VIDEO_RESOURCES)) {
-		mutex_unlock(&btv->lock);
 		return retval;
 	}
 
@@ -3113,7 +3041,6 @@
 
 	b_top = max(b->top, btv->vbi_end);
 	if (b_top + 32 >= b_bottom) {
-		mutex_unlock(&btv->lock);
 		return retval;
 	}
 
@@ -3136,12 +3063,8 @@
 
 	btv->crop[1] = c;
 
-	mutex_unlock(&btv->lock);
-
 	fh->do_crop = 1;
 
-	mutex_lock(&fh->cap.vb_lock);
-
 	if (fh->width < c.min_scaled_width) {
 		fh->width = c.min_scaled_width;
 		btv->init.width = c.min_scaled_width;
@@ -3158,8 +3081,6 @@
 		btv->init.height = c.max_scaled_height;
 	}
 
-	mutex_unlock(&fh->cap.vb_lock);
-
 	return 0;
 }
 
@@ -3227,7 +3148,6 @@
 		return videobuf_poll_stream(file, &fh->vbi, wait);
 	}
 
-	mutex_lock(&fh->cap.vb_lock);
 	if (check_btres(fh,RESOURCE_VIDEO_STREAM)) {
 		/* streaming capture */
 		if (list_empty(&fh->cap.stream))
@@ -3262,7 +3182,6 @@
 	else
 		rc = 0;
 err:
-	mutex_unlock(&fh->cap.vb_lock);
 	return rc;
 }
 
@@ -3293,23 +3212,11 @@
 		return -ENOMEM;
 	file->private_data = fh;
 
-	/*
-	 * btv is protected by btv->lock mutex, while btv->init and other
-	 * streaming vars are protected by fh->cap.vb_lock. We need to take
-	 * care of both locks to avoid troubles. However, vb_lock is used also
-	 * inside videobuf, without calling buf->lock. So, it is a very bad
-	 * idea to hold both locks at the same time.
-	 * Let's first copy btv->init at fh, holding cap.vb_lock, and then work
-	 * with the rest of init, holding btv->lock.
-	 */
-	mutex_lock(&fh->cap.vb_lock);
 	*fh = btv->init;
-	mutex_unlock(&fh->cap.vb_lock);
 
 	fh->type = type;
 	fh->ov.setup_ok = 0;
 
-	mutex_lock(&btv->lock);
 	v4l2_prio_open(&btv->prio, &fh->prio);
 
 	videobuf_queue_sg_init(&fh->cap, &bttv_video_qops,
@@ -3317,13 +3224,13 @@
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_INTERLACED,
 			    sizeof(struct bttv_buffer),
-			    fh, NULL);
+			    fh, &btv->lock);
 	videobuf_queue_sg_init(&fh->vbi, &bttv_vbi_qops,
 			    &btv->c.pci->dev, &btv->s_lock,
 			    V4L2_BUF_TYPE_VBI_CAPTURE,
 			    V4L2_FIELD_SEQ_TB,
 			    sizeof(struct bttv_buffer),
-			    fh, NULL);
+			    fh, &btv->lock);
 	set_tvnorm(btv,btv->tvnorm);
 	set_input(btv, btv->input, btv->tvnorm);
 
@@ -3346,7 +3253,6 @@
 	bttv_vbi_fmt_reset(&fh->vbi_fmt, btv->tvnorm);
 
 	bttv_field_count(btv);
-	mutex_unlock(&btv->lock);
 	return 0;
 }
 
@@ -3355,7 +3261,6 @@
 	struct bttv_fh *fh = file->private_data;
 	struct bttv *btv = fh->btv;
 
-	mutex_lock(&btv->lock);
 	/* turn off overlay */
 	if (check_btres(fh, RESOURCE_OVERLAY))
 		bttv_switch_overlay(btv,fh,NULL);
@@ -3381,14 +3286,8 @@
 
 	/* free stuff */
 
-	/*
-	 * videobuf uses cap.vb_lock - we should avoid holding btv->lock,
-	 * otherwise we may have dead lock conditions
-	 */
-	mutex_unlock(&btv->lock);
 	videobuf_mmap_free(&fh->cap);
 	videobuf_mmap_free(&fh->vbi);
-	mutex_lock(&btv->lock);
 	v4l2_prio_close(&btv->prio, fh->prio);
 	file->private_data = NULL;
 	kfree(fh);
@@ -3398,7 +3297,6 @@
 
 	if (!btv->users)
 		audio_mute(btv, 1);
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -3502,11 +3400,8 @@
 	if (unlikely(!fh))
 		return -ENOMEM;
 	file->private_data = fh;
-	mutex_lock(&fh->cap.vb_lock);
 	*fh = btv->init;
-	mutex_unlock(&fh->cap.vb_lock);
 
-	mutex_lock(&btv->lock);
 	v4l2_prio_open(&btv->prio, &fh->prio);
 
 	btv->radio_user++;
@@ -3514,7 +3409,6 @@
 	bttv_call_all(btv, tuner, s_radio);
 	audio_input(btv,TVAUDIO_INPUT_RADIO);
 
-	mutex_unlock(&btv->lock);
 	return 0;
 }
 
@@ -3524,7 +3418,6 @@
 	struct bttv *btv = fh->btv;
 	struct rds_command cmd;
 
-	mutex_lock(&btv->lock);
 	v4l2_prio_close(&btv->prio, fh->prio);
 	file->private_data = NULL;
 	kfree(fh);
@@ -3532,7 +3425,6 @@
 	btv->radio_user--;
 
 	bttv_call_all(btv, core, ioctl, RDS_CMD_CLOSE, &cmd);
-	mutex_unlock(&btv->lock);
 
 	return 0;
 }
@@ -3561,7 +3453,6 @@
 		return -EINVAL;
 	if (0 != t->index)
 		return -EINVAL;
-	mutex_lock(&btv->lock);
 	strcpy(t->name, "Radio");
 	t->type = V4L2_TUNER_RADIO;
 
@@ -3570,8 +3461,6 @@
 	if (btv->audio_mode_gpio)
 		btv->audio_mode_gpio(btv, t, 0);
 
-	mutex_unlock(&btv->lock);
-
 	return 0;
 }
 
@@ -3692,7 +3581,7 @@
 	.open	  = radio_open,
 	.read     = radio_read,
 	.release  = radio_release,
-	.ioctl	  = video_ioctl2,
+	.unlocked_ioctl = video_ioctl2,
 	.poll     = radio_poll,
 };
 
diff --git a/drivers/media/video/bw-qcam.c b/drivers/media/video/bw-qcam.c
index 935e0c9..c119350 100644
--- a/drivers/media/video/bw-qcam.c
+++ b/drivers/media/video/bw-qcam.c
@@ -860,7 +860,7 @@
 
 static const struct v4l2_file_operations qcam_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl          = video_ioctl2,
+	.unlocked_ioctl = video_ioctl2,
 	.read		= qcam_read,
 };
 
diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c
index 6e4b196..24fc009 100644
--- a/drivers/media/video/c-qcam.c
+++ b/drivers/media/video/c-qcam.c
@@ -718,7 +718,7 @@
 
 static const struct v4l2_file_operations qcam_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 	.read		= qcam_read,
 };
 
diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index 260c666..0dfff50 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -1775,7 +1775,7 @@
 	.read = cafe_v4l_read,
 	.poll = cafe_v4l_poll,
 	.mmap = cafe_v4l_mmap,
-	.ioctl = video_ioctl2,
+	.unlocked_ioctl = video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops cafe_v4l_ioctl_ops = {
diff --git a/drivers/media/video/cx18/cx18-alsa-pcm.c b/drivers/media/video/cx18/cx18-alsa-pcm.c
index 8f55692..82d195b 100644
--- a/drivers/media/video/cx18/cx18-alsa-pcm.c
+++ b/drivers/media/video/cx18/cx18-alsa-pcm.c
@@ -218,7 +218,13 @@
 static int snd_cx18_pcm_ioctl(struct snd_pcm_substream *substream,
 		     unsigned int cmd, void *arg)
 {
-	return snd_pcm_lib_ioctl(substream, cmd, arg);
+	struct snd_cx18_card *cxsc = snd_pcm_substream_chip(substream);
+	int ret;
+
+	snd_cx18_lock(cxsc);
+	ret = snd_pcm_lib_ioctl(substream, cmd, arg);
+	snd_cx18_unlock(cxsc);
+	return ret;
 }
 
 
diff --git a/drivers/media/video/cx18/cx18-streams.c b/drivers/media/video/cx18/cx18-streams.c
index 9045f1e..ab461e2 100644
--- a/drivers/media/video/cx18/cx18-streams.c
+++ b/drivers/media/video/cx18/cx18-streams.c
@@ -41,7 +41,7 @@
 	.read = cx18_v4l2_read,
 	.open = cx18_v4l2_open,
 	/* FIXME change to video_ioctl2 if serialization lock can be removed */
-	.ioctl = cx18_v4l2_ioctl,
+	.unlocked_ioctl = cx18_v4l2_ioctl,
 	.release = cx18_v4l2_close,
 	.poll = cx18_v4l2_enc_poll,
 };
diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index a5cfc76..bb16409 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -2530,7 +2530,7 @@
 	.owner = THIS_MODULE,
 	.open =    et61x251_open,
 	.release = et61x251_release,
-	.ioctl =   et61x251_ioctl,
+	.unlocked_ioctl =   et61x251_ioctl,
 	.read =    et61x251_read,
 	.poll =    et61x251_poll,
 	.mmap =    et61x251_mmap,
diff --git a/drivers/media/video/gspca/sonixj.c b/drivers/media/video/gspca/sonixj.c
index 330dadc..e23de57 100644
--- a/drivers/media/video/gspca/sonixj.c
+++ b/drivers/media/video/gspca/sonixj.c
@@ -63,7 +63,10 @@
 #define QUALITY_DEF 80
 	u8 jpegqual;			/* webcam quality */
 
+	u8 reg01;
+	u8 reg17;
 	u8 reg18;
+	u8 flags;
 
 	s8 ag_cnt;
 #define AG_CNT_START 13
@@ -96,6 +99,22 @@
 	SENSOR_SP80708,
 };
 
+/* device flags */
+#define PDN_INV	1		/* inverse pin S_PWR_DN / sn_xxx tables */
+
+/* sn9c1xx definitions */
+/* register 0x01 */
+#define S_PWR_DN	0x01	/* sensor power down */
+#define S_PDN_INV	0x02	/* inverse pin S_PWR_DN */
+#define V_TX_EN		0x04	/* video transfer enable */
+#define LED		0x08	/* output to pin LED */
+#define SCL_SEL_OD	0x20	/* open-drain mode */
+#define SYS_SEL_48M	0x40	/* system clock 0: 24MHz, 1: 48MHz */
+/* register 0x17 */
+#define MCK_SIZE_MASK	0x1f	/* sensor master clock */
+#define SEN_CLK_EN	0x20	/* enable sensor clock */
+#define DEF_EN		0x80	/* defect pixel by 0: soft, 1: hard */
+
 /* V4L2 controls supported by the driver */
 static void setbrightness(struct gspca_dev *gspca_dev);
 static void setcontrast(struct gspca_dev *gspca_dev);
@@ -1755,141 +1774,6 @@
 	}
 }
 
-static void bridge_init(struct gspca_dev *gspca_dev,
-			  const u8 *sn9c1xx)
-{
-	struct sd *sd = (struct sd *) gspca_dev;
-	u8 reg0102[2];
-	const u8 *reg9a;
-	static const u8 reg9a_def[] =
-		{0x00, 0x40, 0x20, 0x00, 0x00, 0x00};
-	static const u8 reg9a_spec[] =
-		{0x00, 0x40, 0x38, 0x30, 0x00, 0x20};
-	static const u8 regd4[] = {0x60, 0x00, 0x00};
-
-	/* sensor clock already enabled in sd_init */
-	/* reg_w1(gspca_dev, 0xf1, 0x00); */
-	reg_w1(gspca_dev, 0x01, sn9c1xx[1]);
-
-	/* configure gpio */
-	reg0102[0] = sn9c1xx[1];
-	reg0102[1] = sn9c1xx[2];
-	if (gspca_dev->audio)
-		reg0102[1] |= 0x04;	/* keep the audio connection */
-	reg_w(gspca_dev, 0x01, reg0102, 2);
-	reg_w(gspca_dev, 0x08, &sn9c1xx[8], 2);
-	reg_w(gspca_dev, 0x17, &sn9c1xx[0x17], 5);
-	switch (sd->sensor) {
-	case SENSOR_GC0307:
-	case SENSOR_OV7660:
-	case SENSOR_PO1030:
-	case SENSOR_PO2030N:
-	case SENSOR_SOI768:
-	case SENSOR_SP80708:
-		reg9a = reg9a_spec;
-		break;
-	default:
-		reg9a = reg9a_def;
-		break;
-	}
-	reg_w(gspca_dev, 0x9a, reg9a, 6);
-
-	reg_w(gspca_dev, 0xd4, regd4, sizeof regd4);
-
-	reg_w(gspca_dev, 0x03, &sn9c1xx[3], 0x0f);
-
-	switch (sd->sensor) {
-	case SENSOR_ADCM1700:
-		reg_w1(gspca_dev, 0x01, 0x43);
-		reg_w1(gspca_dev, 0x17, 0x62);
-		reg_w1(gspca_dev, 0x01, 0x42);
-		reg_w1(gspca_dev, 0x01, 0x42);
-		break;
-	case SENSOR_GC0307:
-		msleep(50);
-		reg_w1(gspca_dev, 0x01, 0x61);
-		reg_w1(gspca_dev, 0x17, 0x22);
-		reg_w1(gspca_dev, 0x01, 0x60);
-		reg_w1(gspca_dev, 0x01, 0x40);
-		msleep(50);
-		break;
-	case SENSOR_MI0360B:
-		reg_w1(gspca_dev, 0x01, 0x61);
-		reg_w1(gspca_dev, 0x17, 0x60);
-		reg_w1(gspca_dev, 0x01, 0x60);
-		reg_w1(gspca_dev, 0x01, 0x40);
-		break;
-	case SENSOR_MT9V111:
-		reg_w1(gspca_dev, 0x01, 0x61);
-		reg_w1(gspca_dev, 0x17, 0x61);
-		reg_w1(gspca_dev, 0x01, 0x60);
-		reg_w1(gspca_dev, 0x01, 0x40);
-		break;
-	case SENSOR_OM6802:
-		msleep(10);
-		reg_w1(gspca_dev, 0x02, 0x73);
-		reg_w1(gspca_dev, 0x17, 0x60);
-		reg_w1(gspca_dev, 0x01, 0x22);
-		msleep(100);
-		reg_w1(gspca_dev, 0x01, 0x62);
-		reg_w1(gspca_dev, 0x17, 0x64);
-		reg_w1(gspca_dev, 0x17, 0x64);
-		reg_w1(gspca_dev, 0x01, 0x42);
-		msleep(10);
-		reg_w1(gspca_dev, 0x01, 0x42);
-		i2c_w8(gspca_dev, om6802_init0[0]);
-		i2c_w8(gspca_dev, om6802_init0[1]);
-		msleep(15);
-		reg_w1(gspca_dev, 0x02, 0x71);
-		msleep(150);
-		break;
-	case SENSOR_OV7630:
-		reg_w1(gspca_dev, 0x01, 0x61);
-		reg_w1(gspca_dev, 0x17, 0xe2);
-		reg_w1(gspca_dev, 0x01, 0x60);
-		reg_w1(gspca_dev, 0x01, 0x40);
-		break;
-	case SENSOR_OV7648:
-		reg_w1(gspca_dev, 0x01, 0x63);
-		reg_w1(gspca_dev, 0x17, 0x20);
-		reg_w1(gspca_dev, 0x01, 0x62);
-		reg_w1(gspca_dev, 0x01, 0x42);
-		break;
-	case SENSOR_PO1030:
-	case SENSOR_SOI768:
-		reg_w1(gspca_dev, 0x01, 0x61);
-		reg_w1(gspca_dev, 0x17, 0x20);
-		reg_w1(gspca_dev, 0x01, 0x60);
-		reg_w1(gspca_dev, 0x01, 0x40);
-		break;
-	case SENSOR_PO2030N:
-	case SENSOR_OV7660:
-		reg_w1(gspca_dev, 0x01, 0x63);
-		reg_w1(gspca_dev, 0x17, 0x20);
-		reg_w1(gspca_dev, 0x01, 0x62);
-		reg_w1(gspca_dev, 0x01, 0x42);
-		break;
-	case SENSOR_SP80708:
-		reg_w1(gspca_dev, 0x01, 0x63);
-		reg_w1(gspca_dev, 0x17, 0x20);
-		reg_w1(gspca_dev, 0x01, 0x62);
-		reg_w1(gspca_dev, 0x01, 0x42);
-		msleep(100);
-		reg_w1(gspca_dev, 0x02, 0x62);
-		break;
-	default:
-/*	case SENSOR_HV7131R: */
-/*	case SENSOR_MI0360: */
-/*	case SENSOR_MO4000: */
-		reg_w1(gspca_dev, 0x01, 0x43);
-		reg_w1(gspca_dev, 0x17, 0x61);
-		reg_w1(gspca_dev, 0x01, 0x42);
-		if (sd->sensor == SENSOR_HV7131R)
-			hv7131r_probe(gspca_dev);
-		break;
-	}
-}
-
 /* this function is called at probe time */
 static int sd_config(struct gspca_dev *gspca_dev,
 			const struct usb_device_id *id)
@@ -1898,7 +1782,8 @@
 	struct cam *cam;
 
 	sd->bridge = id->driver_info >> 16;
-	sd->sensor = id->driver_info;
+	sd->sensor = id->driver_info >> 8;
+	sd->flags = id->driver_info;
 
 	cam = &gspca_dev->cam;
 	if (sd->sensor == SENSOR_ADCM1700) {
@@ -1929,7 +1814,7 @@
 	/* setup a selector by bridge */
 	reg_w1(gspca_dev, 0xf1, 0x01);
 	reg_r(gspca_dev, 0x00, 1);
-	reg_w1(gspca_dev, 0xf1, gspca_dev->usb_buf[0]);
+	reg_w1(gspca_dev, 0xf1, 0x00);
 	reg_r(gspca_dev, 0x00, 1);		/* get sonix chip id */
 	regF1 = gspca_dev->usb_buf[0];
 	if (gspca_dev->usb_err < 0)
@@ -2423,10 +2308,17 @@
 {
 	struct sd *sd = (struct sd *) gspca_dev;
 	int i;
-	u8 reg1, reg17;
+	u8 reg01, reg17;
+	u8 reg0102[2];
 	const u8 *sn9c1xx;
 	const u8 (*init)[8];
+	const u8 *reg9a;
 	int mode;
+	static const u8 reg9a_def[] =
+		{0x00, 0x40, 0x20, 0x00, 0x00, 0x00};
+	static const u8 reg9a_spec[] =
+		{0x00, 0x40, 0x38, 0x30, 0x00, 0x20};
+	static const u8 regd4[] = {0x60, 0x00, 0x00};
 	static const u8 C0[] = { 0x2d, 0x2d, 0x3a, 0x05, 0x04, 0x3f };
 	static const u8 CA[] = { 0x28, 0xd8, 0x14, 0xec };
 	static const u8 CA_adcm1700[] =
@@ -2448,7 +2340,85 @@
 
 	/* initialize the bridge */
 	sn9c1xx = sn_tb[sd->sensor];
-	bridge_init(gspca_dev, sn9c1xx);
+
+	/* sensor clock already enabled in sd_init */
+	/* reg_w1(gspca_dev, 0xf1, 0x00); */
+	reg01 = sn9c1xx[1];
+	if (sd->flags & PDN_INV)
+		reg01 ^= S_PDN_INV;		/* power down inverted */
+	reg_w1(gspca_dev, 0x01, reg01);
+
+	/* configure gpio */
+	reg0102[0] = reg01;
+	reg0102[1] = sn9c1xx[2];
+	if (gspca_dev->audio)
+		reg0102[1] |= 0x04;	/* keep the audio connection */
+	reg_w(gspca_dev, 0x01, reg0102, 2);
+	reg_w(gspca_dev, 0x08, &sn9c1xx[8], 2);
+	reg_w(gspca_dev, 0x17, &sn9c1xx[0x17], 5);
+	switch (sd->sensor) {
+	case SENSOR_GC0307:
+	case SENSOR_OV7660:
+	case SENSOR_PO1030:
+	case SENSOR_PO2030N:
+	case SENSOR_SOI768:
+	case SENSOR_SP80708:
+		reg9a = reg9a_spec;
+		break;
+	default:
+		reg9a = reg9a_def;
+		break;
+	}
+	reg_w(gspca_dev, 0x9a, reg9a, 6);
+
+	reg_w(gspca_dev, 0xd4, regd4, sizeof regd4);
+
+	reg_w(gspca_dev, 0x03, &sn9c1xx[3], 0x0f);
+
+	reg17 = sn9c1xx[0x17];
+	switch (sd->sensor) {
+	case SENSOR_GC0307:
+		msleep(50);		/*fixme: is it useful? */
+		break;
+	case SENSOR_OM6802:
+		msleep(10);
+		reg_w1(gspca_dev, 0x02, 0x73);
+		reg17 |= SEN_CLK_EN;
+		reg_w1(gspca_dev, 0x17, reg17);
+		reg_w1(gspca_dev, 0x01, 0x22);
+		msleep(100);
+		reg01 = SCL_SEL_OD | S_PDN_INV;
+		reg17 &= MCK_SIZE_MASK;
+		reg17 |= 0x04;		/* clock / 4 */
+		break;
+	}
+	reg01 |= SYS_SEL_48M;
+	reg_w1(gspca_dev, 0x01, reg01);
+	reg17 |= SEN_CLK_EN;
+	reg_w1(gspca_dev, 0x17, reg17);
+	reg01 &= ~S_PWR_DN;		/* sensor power on */
+	reg_w1(gspca_dev, 0x01, reg01);
+	reg01 &= ~SYS_SEL_48M;
+	reg_w1(gspca_dev, 0x01, reg01);
+
+	switch (sd->sensor) {
+	case SENSOR_HV7131R:
+		hv7131r_probe(gspca_dev);	/*fixme: is it useful? */
+		break;
+	case SENSOR_OM6802:
+		msleep(10);
+		reg_w1(gspca_dev, 0x01, reg01);
+		i2c_w8(gspca_dev, om6802_init0[0]);
+		i2c_w8(gspca_dev, om6802_init0[1]);
+		msleep(15);
+		reg_w1(gspca_dev, 0x02, 0x71);
+		msleep(150);
+		break;
+	case SENSOR_SP80708:
+		msleep(100);
+		reg_w1(gspca_dev, 0x02, 0x62);
+		break;
+	}
 
 	/* initialize the sensor */
 	i2c_w_seq(gspca_dev, sensor_init[sd->sensor]);
@@ -2476,30 +2446,11 @@
 	}
 	reg_w1(gspca_dev, 0x18, sn9c1xx[0x18]);
 	switch (sd->sensor) {
-	case SENSOR_GC0307:
-		reg17 = 0xa2;
-		break;
-	case SENSOR_MT9V111:
-	case SENSOR_MI0360B:
-		reg17 = 0xe0;
-		break;
-	case SENSOR_ADCM1700:
-	case SENSOR_OV7630:
-		reg17 = 0xe2;
-		break;
-	case SENSOR_OV7648:
-		reg17 = 0x20;
-		break;
-	case SENSOR_OV7660:
-	case SENSOR_SOI768:
-		reg17 = 0xa0;
-		break;
-	case SENSOR_PO1030:
-	case SENSOR_PO2030N:
-		reg17 = 0xa0;
+	case SENSOR_OM6802:
+/*	case SENSOR_OV7648:		* fixme: sometimes */
 		break;
 	default:
-		reg17 = 0x60;
+		reg17 |= DEF_EN;
 		break;
 	}
 	reg_w1(gspca_dev, 0x17, reg17);
@@ -2546,95 +2497,67 @@
 
 	init = NULL;
 	mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv;
-	if (mode)
-		reg1 = 0x46;	/* 320x240: clk 48Mhz, video trf enable */
-	else
-		reg1 = 0x06;	/* 640x480: clk 24Mhz, video trf enable */
-	reg17 = 0x61;		/* 0x:20: enable sensor clock */
+	reg01 |= SYS_SEL_48M | V_TX_EN;
+	reg17 &= ~MCK_SIZE_MASK;
+	reg17 |= 0x02;			/* clock / 2 */
 	switch (sd->sensor) {
 	case SENSOR_ADCM1700:
 		init = adcm1700_sensor_param1;
-		reg1 = 0x46;
-		reg17 = 0xe2;
 		break;
 	case SENSOR_GC0307:
 		init = gc0307_sensor_param1;
-		reg17 = 0xa2;
-		reg1 = 0x44;
+		break;
+	case SENSOR_HV7131R:
+	case SENSOR_MI0360:
+		if (mode)
+			reg01 |= SYS_SEL_48M;	/* 320x240: clk 48Mhz */
+		else
+			reg01 &= ~SYS_SEL_48M;	/* 640x480: clk 24Mhz */
+		reg17 &= ~MCK_SIZE_MASK;
+		reg17 |= 0x01;			/* clock / 1 */
 		break;
 	case SENSOR_MI0360B:
 		init = mi0360b_sensor_param1;
-		reg1 &= ~0x02;		/* don't inverse pin S_PWR_DN */
-		reg17 = 0xe2;
 		break;
 	case SENSOR_MO4000:
-		if (mode) {
-/*			reg1 = 0x46;	 * 320 clk 48Mhz 60fp/s */
-			reg1 = 0x06;	/* clk 24Mz */
-		} else {
-			reg17 = 0x22;	/* 640 MCKSIZE */
-/*			reg1 = 0x06;	 * 640 clk 24Mz (done) */
+		if (mode) {			/* if 320x240 */
+			reg01 &= ~SYS_SEL_48M;	/* clk 24Mz */
+			reg17 &= ~MCK_SIZE_MASK;
+			reg17 |= 0x01;		/* clock / 1 */
 		}
 		break;
 	case SENSOR_MT9V111:
 		init = mt9v111_sensor_param1;
-		if (mode) {
-			reg1 = 0x04;	/* 320 clk 48Mhz */
-		} else {
-/*			reg1 = 0x06;	 * 640 clk 24Mz (done) */
-			reg17 = 0xc2;
-		}
 		break;
 	case SENSOR_OM6802:
 		init = om6802_sensor_param1;
-		reg17 = 0x64;		/* 640 MCKSIZE */
+		if (!mode) {			/* if 640x480 */
+			reg17 &= ~MCK_SIZE_MASK;
+			reg17 |= 0x01;		/* clock / 4 */
+		}
 		break;
 	case SENSOR_OV7630:
 		init = ov7630_sensor_param1;
-		reg17 = 0xe2;
-		reg1 = 0x44;
 		break;
 	case SENSOR_OV7648:
 		init = ov7648_sensor_param1;
-		reg17 = 0x21;
-/*		reg1 = 0x42;		 * 42 - 46? */
+		reg17 &= ~MCK_SIZE_MASK;
+		reg17 |= 0x01;			/* clock / 1 */
 		break;
 	case SENSOR_OV7660:
 		init = ov7660_sensor_param1;
-		if (sd->bridge == BRIDGE_SN9C120) {
-			if (mode) {		/* 320x240 - 160x120 */
-				reg17 = 0xa2;
-				reg1 = 0x44;	/* 48 Mhz, video trf eneble */
-			}
-		} else {
-			reg17 = 0x22;
-			reg1 = 0x06;	/* 24 Mhz, video trf eneble
-					 * inverse power down */
-		}
 		break;
 	case SENSOR_PO1030:
 		init = po1030_sensor_param1;
-		reg17 = 0xa2;
-		reg1 = 0x44;
 		break;
 	case SENSOR_PO2030N:
 		init = po2030n_sensor_param1;
-		reg1 = 0x46;
-		reg17 = 0xa2;
 		break;
 	case SENSOR_SOI768:
 		init = soi768_sensor_param1;
-		reg1 = 0x44;
-		reg17 = 0xa2;
 		break;
 	case SENSOR_SP80708:
 		init = sp80708_sensor_param1;
-		if (mode) {
-/*??			reg1 = 0x04;	 * 320 clk 48Mhz */
-		} else {
-			reg1 = 0x46;	 /* 640 clk 48Mz */
-			reg17 = 0xa2;
-		}
 		break;
 	}
 
@@ -2684,7 +2607,9 @@
 	setjpegqual(gspca_dev);
 
 	reg_w1(gspca_dev, 0x17, reg17);
-	reg_w1(gspca_dev, 0x01, reg1);
+	reg_w1(gspca_dev, 0x01, reg01);
+	sd->reg01 = reg01;
+	sd->reg17 = reg17;
 
 	sethvflip(gspca_dev);
 	setbrightness(gspca_dev);
@@ -2706,41 +2631,64 @@
 		{ 0xa1, 0x21, 0x76, 0x20, 0x00, 0x00, 0x00, 0x10 };
 	static const u8 stopsoi768[] =
 		{ 0xa1, 0x21, 0x12, 0x80, 0x00, 0x00, 0x00, 0x10 };
-	u8 data;
-	const u8 *sn9c1xx;
+	u8 reg01;
+	u8 reg17;
 
-	data = 0x0b;
+	reg01 = sd->reg01;
+	reg17 = sd->reg17 & ~SEN_CLK_EN;
 	switch (sd->sensor) {
+	case SENSOR_ADCM1700:
 	case SENSOR_GC0307:
-		data = 0x29;
+	case SENSOR_PO2030N:
+	case SENSOR_SP80708:
+		reg01 |= LED;
+		reg_w1(gspca_dev, 0x01, reg01);
+		reg01 &= ~(LED | V_TX_EN);
+		reg_w1(gspca_dev, 0x01, reg01);
+/*		reg_w1(gspca_dev, 0x02, 0x??);	 * LED off ? */
 		break;
 	case SENSOR_HV7131R:
+		reg01 &= ~V_TX_EN;
+		reg_w1(gspca_dev, 0x01, reg01);
 		i2c_w8(gspca_dev, stophv7131);
-		data = 0x2b;
 		break;
 	case SENSOR_MI0360:
 	case SENSOR_MI0360B:
+		reg01 &= ~V_TX_EN;
+		reg_w1(gspca_dev, 0x01, reg01);
+/*		reg_w1(gspca_dev, 0x02, 0x40);	  * LED off ? */
 		i2c_w8(gspca_dev, stopmi0360);
-		data = 0x29;
 		break;
-	case SENSOR_OV7648:
-		i2c_w8(gspca_dev, stopov7648);
-		/* fall thru */
 	case SENSOR_MT9V111:
-	case SENSOR_OV7630:
+	case SENSOR_OM6802:
 	case SENSOR_PO1030:
-		data = 0x29;
+		reg01 &= ~V_TX_EN;
+		reg_w1(gspca_dev, 0x01, reg01);
+		break;
+	case SENSOR_OV7630:
+	case SENSOR_OV7648:
+		reg01 &= ~V_TX_EN;
+		reg_w1(gspca_dev, 0x01, reg01);
+		i2c_w8(gspca_dev, stopov7648);
+		break;
+	case SENSOR_OV7660:
+		reg01 &= ~V_TX_EN;
+		reg_w1(gspca_dev, 0x01, reg01);
 		break;
 	case SENSOR_SOI768:
 		i2c_w8(gspca_dev, stopsoi768);
-		data = 0x29;
 		break;
 	}
-	sn9c1xx = sn_tb[sd->sensor];
-	reg_w1(gspca_dev, 0x01, sn9c1xx[1]);
-	reg_w1(gspca_dev, 0x17, sn9c1xx[0x17]);
-	reg_w1(gspca_dev, 0x01, sn9c1xx[1]);
-	reg_w1(gspca_dev, 0x01, data);
+
+	reg01 |= SCL_SEL_OD;
+	reg_w1(gspca_dev, 0x01, reg01);
+	reg01 |= S_PWR_DN;		/* sensor power down */
+	reg_w1(gspca_dev, 0x01, reg01);
+	reg_w1(gspca_dev, 0x17, reg17);
+	reg01 &= ~SYS_SEL_48M;		/* clock 24MHz */
+	reg_w1(gspca_dev, 0x01, reg01);
+	reg01 |= LED;
+	reg_w1(gspca_dev, 0x01, reg01);
 	/* Don't disable sensor clock as that disables the button on the cam */
 	/* reg_w1(gspca_dev, 0xf1, 0x01); */
 }
@@ -2954,14 +2902,18 @@
 /* -- module initialisation -- */
 #define BS(bridge, sensor) \
 	.driver_info = (BRIDGE_ ## bridge << 16) \
-			| SENSOR_ ## sensor
+			| (SENSOR_ ## sensor << 8)
+#define BSF(bridge, sensor, flags) \
+	.driver_info = (BRIDGE_ ## bridge << 16) \
+			| (SENSOR_ ## sensor << 8) \
+			| (flags)
 static const __devinitdata struct usb_device_id device_table[] = {
 #if !defined CONFIG_USB_SN9C102 && !defined CONFIG_USB_SN9C102_MODULE
 	{USB_DEVICE(0x0458, 0x7025), BS(SN9C120, MI0360)},
 	{USB_DEVICE(0x0458, 0x702e), BS(SN9C120, OV7660)},
 #endif
-	{USB_DEVICE(0x045e, 0x00f5), BS(SN9C105, OV7660)},
-	{USB_DEVICE(0x045e, 0x00f7), BS(SN9C105, OV7660)},
+	{USB_DEVICE(0x045e, 0x00f5), BSF(SN9C105, OV7660, PDN_INV)},
+	{USB_DEVICE(0x045e, 0x00f7), BSF(SN9C105, OV7660, PDN_INV)},
 	{USB_DEVICE(0x0471, 0x0327), BS(SN9C105, MI0360)},
 	{USB_DEVICE(0x0471, 0x0328), BS(SN9C105, MI0360)},
 	{USB_DEVICE(0x0471, 0x0330), BS(SN9C105, MI0360)},
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 2be23bc..48d2c24 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -1659,7 +1659,7 @@
 	.open		= meye_open,
 	.release	= meye_release,
 	.mmap		= meye_mmap,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 	.poll		= meye_poll,
 };
 
@@ -1831,12 +1831,6 @@
 	msleep(1);
 	mchip_set(MCHIP_MM_INTA, MCHIP_MM_INTA_HIC_1_MASK);
 
-	if (video_register_device(meye.vdev, VFL_TYPE_GRABBER,
-				  video_nr) < 0) {
-		v4l2_err(v4l2_dev, "video_register_device failed\n");
-		goto outvideoreg;
-	}
-
 	mutex_init(&meye.lock);
 	init_waitqueue_head(&meye.proc_list);
 	meye.brightness = 32 << 10;
@@ -1858,6 +1852,12 @@
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAPICTURE, 0);
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAAGC, 48);
 
+	if (video_register_device(meye.vdev, VFL_TYPE_GRABBER,
+				  video_nr) < 0) {
+		v4l2_err(v4l2_dev, "video_register_device failed\n");
+		goto outvideoreg;
+	}
+
 	v4l2_info(v4l2_dev, "Motion Eye Camera Driver v%s.\n",
 	       MEYE_DRIVER_VERSION);
 	v4l2_info(v4l2_dev, "mchip KL5A72002 rev. %d, base %lx, irq %d\n",
diff --git a/drivers/media/video/pms.c b/drivers/media/video/pms.c
index 7129b50..7551907 100644
--- a/drivers/media/video/pms.c
+++ b/drivers/media/video/pms.c
@@ -932,7 +932,7 @@
 
 static const struct v4l2_file_operations pms_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 	.read           = pms_read,
 };
 
diff --git a/drivers/media/video/sh_vou.c b/drivers/media/video/sh_vou.c
index 4e5a8cf..07cf0c6 100644
--- a/drivers/media/video/sh_vou.c
+++ b/drivers/media/video/sh_vou.c
@@ -75,6 +75,7 @@
 	int pix_idx;
 	struct videobuf_buffer *active;
 	enum sh_vou_status status;
+	struct mutex fop_lock;
 };
 
 struct sh_vou_file {
@@ -235,7 +236,7 @@
 	vb->state = VIDEOBUF_NEEDS_INIT;
 }
 
-/* Locking: caller holds vq->vb_lock mutex */
+/* Locking: caller holds fop_lock mutex */
 static int sh_vou_buf_setup(struct videobuf_queue *vq, unsigned int *count,
 			    unsigned int *size)
 {
@@ -257,7 +258,7 @@
 	return 0;
 }
 
-/* Locking: caller holds vq->vb_lock mutex */
+/* Locking: caller holds fop_lock mutex */
 static int sh_vou_buf_prepare(struct videobuf_queue *vq,
 			      struct videobuf_buffer *vb,
 			      enum v4l2_field field)
@@ -306,7 +307,7 @@
 	return 0;
 }
 
-/* Locking: caller holds vq->vb_lock mutex and vq->irqlock spinlock */
+/* Locking: caller holds fop_lock mutex and vq->irqlock spinlock */
 static void sh_vou_buf_queue(struct videobuf_queue *vq,
 			     struct videobuf_buffer *vb)
 {
@@ -1190,7 +1191,7 @@
 				       V4L2_BUF_TYPE_VIDEO_OUTPUT,
 				       V4L2_FIELD_NONE,
 				       sizeof(struct videobuf_buffer), vdev,
-				       NULL);
+				       &vou_dev->fop_lock);
 
 	return 0;
 }
@@ -1292,7 +1293,7 @@
 	.owner		= THIS_MODULE,
 	.open		= sh_vou_open,
 	.release	= sh_vou_release,
-	.ioctl		= video_ioctl2,
+	.unlocked_ioctl	= video_ioctl2,
 	.mmap		= sh_vou_mmap,
 	.poll		= sh_vou_poll,
 };
@@ -1331,6 +1332,7 @@
 
 	INIT_LIST_HEAD(&vou_dev->queue);
 	spin_lock_init(&vou_dev->lock);
+	mutex_init(&vou_dev->fop_lock);
 	atomic_set(&vou_dev->use_count, 0);
 	vou_dev->pdata = vou_pdata;
 	vou_dev->status = SH_VOU_IDLE;
@@ -1388,6 +1390,7 @@
 		vdev->tvnorms |= V4L2_STD_PAL;
 	vdev->v4l2_dev = &vou_dev->v4l2_dev;
 	vdev->release = video_device_release;
+	vdev->lock = &vou_dev->fop_lock;
 
 	vou_dev->vdev = vdev;
 	video_set_drvdata(vdev, vou_dev);
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index 28e19da..f49fbfb 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -3238,7 +3238,7 @@
 	.owner = THIS_MODULE,
 	.open = sn9c102_open,
 	.release = sn9c102_release,
-	.ioctl = sn9c102_ioctl,
+	.unlocked_ioctl = sn9c102_ioctl,
 	.read = sn9c102_read,
 	.poll = sn9c102_poll,
 	.mmap = sn9c102_mmap,
diff --git a/drivers/media/video/uvc/uvc_ctrl.c b/drivers/media/video/uvc/uvc_ctrl.c
index f169f77..59f8a9a 100644
--- a/drivers/media/video/uvc/uvc_ctrl.c
+++ b/drivers/media/video/uvc/uvc_ctrl.c
@@ -785,7 +785,7 @@
 	}
 }
 
-struct uvc_control *uvc_find_control(struct uvc_video_chain *chain,
+static struct uvc_control *uvc_find_control(struct uvc_video_chain *chain,
 	__u32 v4l2_id, struct uvc_control_mapping **mapping)
 {
 	struct uvc_control *ctrl = NULL;
@@ -944,6 +944,52 @@
 	return ret;
 }
 
+/*
+ * Mapping V4L2 controls to UVC controls can be straighforward if done well.
+ * Most of the UVC controls exist in V4L2, and can be mapped directly. Some
+ * must be grouped (for instance the Red Balance, Blue Balance and Do White
+ * Balance V4L2 controls use the White Balance Component UVC control) or
+ * otherwise translated. The approach we take here is to use a translation
+ * table for the controls that can be mapped directly, and handle the others
+ * manually.
+ */
+int uvc_query_v4l2_menu(struct uvc_video_chain *chain,
+	struct v4l2_querymenu *query_menu)
+{
+	struct uvc_menu_info *menu_info;
+	struct uvc_control_mapping *mapping;
+	struct uvc_control *ctrl;
+	u32 index = query_menu->index;
+	u32 id = query_menu->id;
+	int ret;
+
+	memset(query_menu, 0, sizeof(*query_menu));
+	query_menu->id = id;
+	query_menu->index = index;
+
+	ret = mutex_lock_interruptible(&chain->ctrl_mutex);
+	if (ret < 0)
+		return -ERESTARTSYS;
+
+	ctrl = uvc_find_control(chain, query_menu->id, &mapping);
+	if (ctrl == NULL || mapping->v4l2_type != V4L2_CTRL_TYPE_MENU) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if (query_menu->index >= mapping->menu_count) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	menu_info = &mapping->menu_info[query_menu->index];
+	strlcpy(query_menu->name, menu_info->name, sizeof query_menu->name);
+
+done:
+	mutex_unlock(&chain->ctrl_mutex);
+	return ret;
+}
+
 
 /* --------------------------------------------------------------------------
  * Control transactions
diff --git a/drivers/media/video/uvc/uvc_queue.c b/drivers/media/video/uvc/uvc_queue.c
index ed6d544..f14581b 100644
--- a/drivers/media/video/uvc/uvc_queue.c
+++ b/drivers/media/video/uvc/uvc_queue.c
@@ -90,6 +90,39 @@
 }
 
 /*
+ * Free the video buffers.
+ *
+ * This function must be called with the queue lock held.
+ */
+static int __uvc_free_buffers(struct uvc_video_queue *queue)
+{
+	unsigned int i;
+
+	for (i = 0; i < queue->count; ++i) {
+		if (queue->buffer[i].vma_use_count != 0)
+			return -EBUSY;
+	}
+
+	if (queue->count) {
+		vfree(queue->mem);
+		queue->count = 0;
+	}
+
+	return 0;
+}
+
+int uvc_free_buffers(struct uvc_video_queue *queue)
+{
+	int ret;
+
+	mutex_lock(&queue->mutex);
+	ret = __uvc_free_buffers(queue);
+	mutex_unlock(&queue->mutex);
+
+	return ret;
+}
+
+/*
  * Allocate the video buffers.
  *
  * Pages are reserved to make sure they will not be swapped, as they will be
@@ -110,7 +143,7 @@
 
 	mutex_lock(&queue->mutex);
 
-	if ((ret = uvc_free_buffers(queue)) < 0)
+	if ((ret = __uvc_free_buffers(queue)) < 0)
 		goto done;
 
 	/* Bail out if no buffers should be allocated. */
@@ -152,28 +185,6 @@
 }
 
 /*
- * Free the video buffers.
- *
- * This function must be called with the queue lock held.
- */
-int uvc_free_buffers(struct uvc_video_queue *queue)
-{
-	unsigned int i;
-
-	for (i = 0; i < queue->count; ++i) {
-		if (queue->buffer[i].vma_use_count != 0)
-			return -EBUSY;
-	}
-
-	if (queue->count) {
-		vfree(queue->mem);
-		queue->count = 0;
-	}
-
-	return 0;
-}
-
-/*
  * Check if buffers have been allocated.
  */
 int uvc_queue_allocated(struct uvc_video_queue *queue)
@@ -369,6 +380,82 @@
 }
 
 /*
+ * VMA operations.
+ */
+static void uvc_vm_open(struct vm_area_struct *vma)
+{
+	struct uvc_buffer *buffer = vma->vm_private_data;
+	buffer->vma_use_count++;
+}
+
+static void uvc_vm_close(struct vm_area_struct *vma)
+{
+	struct uvc_buffer *buffer = vma->vm_private_data;
+	buffer->vma_use_count--;
+}
+
+static const struct vm_operations_struct uvc_vm_ops = {
+	.open		= uvc_vm_open,
+	.close		= uvc_vm_close,
+};
+
+/*
+ * Memory-map a video buffer.
+ *
+ * This function implements video buffers memory mapping and is intended to be
+ * used by the device mmap handler.
+ */
+int uvc_queue_mmap(struct uvc_video_queue *queue, struct vm_area_struct *vma)
+{
+	struct uvc_buffer *uninitialized_var(buffer);
+	struct page *page;
+	unsigned long addr, start, size;
+	unsigned int i;
+	int ret = 0;
+
+	start = vma->vm_start;
+	size = vma->vm_end - vma->vm_start;
+
+	mutex_lock(&queue->mutex);
+
+	for (i = 0; i < queue->count; ++i) {
+		buffer = &queue->buffer[i];
+		if ((buffer->buf.m.offset >> PAGE_SHIFT) == vma->vm_pgoff)
+			break;
+	}
+
+	if (i == queue->count || size != queue->buf_size) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	/*
+	 * VM_IO marks the area as being an mmaped region for I/O to a
+	 * device. It also prevents the region from being core dumped.
+	 */
+	vma->vm_flags |= VM_IO;
+
+	addr = (unsigned long)queue->mem + buffer->buf.m.offset;
+	while (size > 0) {
+		page = vmalloc_to_page((void *)addr);
+		if ((ret = vm_insert_page(vma, start, page)) < 0)
+			goto done;
+
+		start += PAGE_SIZE;
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	vma->vm_ops = &uvc_vm_ops;
+	vma->vm_private_data = buffer;
+	uvc_vm_open(vma);
+
+done:
+	mutex_unlock(&queue->mutex);
+	return ret;
+}
+
+/*
  * Poll the video queue.
  *
  * This function implements video queue polling and is intended to be used by
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index 6d15de9..8cf61e8 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -101,40 +101,6 @@
  */
 
 /*
- * Mapping V4L2 controls to UVC controls can be straighforward if done well.
- * Most of the UVC controls exist in V4L2, and can be mapped directly. Some
- * must be grouped (for instance the Red Balance, Blue Balance and Do White
- * Balance V4L2 controls use the White Balance Component UVC control) or
- * otherwise translated. The approach we take here is to use a translation
- * table for the controls that can be mapped directly, and handle the others
- * manually.
- */
-static int uvc_v4l2_query_menu(struct uvc_video_chain *chain,
-	struct v4l2_querymenu *query_menu)
-{
-	struct uvc_menu_info *menu_info;
-	struct uvc_control_mapping *mapping;
-	struct uvc_control *ctrl;
-	u32 index = query_menu->index;
-	u32 id = query_menu->id;
-
-	ctrl = uvc_find_control(chain, query_menu->id, &mapping);
-	if (ctrl == NULL || mapping->v4l2_type != V4L2_CTRL_TYPE_MENU)
-		return -EINVAL;
-
-	if (query_menu->index >= mapping->menu_count)
-		return -EINVAL;
-
-	memset(query_menu, 0, sizeof(*query_menu));
-	query_menu->id = id;
-	query_menu->index = index;
-
-	menu_info = &mapping->menu_info[query_menu->index];
-	strlcpy(query_menu->name, menu_info->name, sizeof query_menu->name);
-	return 0;
-}
-
-/*
  * Find the frame interval closest to the requested frame interval for the
  * given frame format and size. This should be done by the device as part of
  * the Video Probe and Commit negotiation, but some hardware don't implement
@@ -260,12 +226,14 @@
 	 * developers test their webcams with the Linux driver as well as with
 	 * the Windows driver).
 	 */
+	mutex_lock(&stream->mutex);
 	if (stream->dev->quirks & UVC_QUIRK_PROBE_EXTRAFIELDS)
 		probe->dwMaxVideoFrameSize =
 			stream->ctrl.dwMaxVideoFrameSize;
 
 	/* Probe the device. */
 	ret = uvc_probe_video(stream, probe);
+	mutex_unlock(&stream->mutex);
 	if (ret < 0)
 		goto done;
 
@@ -289,14 +257,21 @@
 static int uvc_v4l2_get_format(struct uvc_streaming *stream,
 	struct v4l2_format *fmt)
 {
-	struct uvc_format *format = stream->cur_format;
-	struct uvc_frame *frame = stream->cur_frame;
+	struct uvc_format *format;
+	struct uvc_frame *frame;
+	int ret = 0;
 
 	if (fmt->type != stream->type)
 		return -EINVAL;
 
-	if (format == NULL || frame == NULL)
-		return -EINVAL;
+	mutex_lock(&stream->mutex);
+	format = stream->cur_format;
+	frame = stream->cur_frame;
+
+	if (format == NULL || frame == NULL) {
+		ret = -EINVAL;
+		goto done;
+	}
 
 	fmt->fmt.pix.pixelformat = format->fcc;
 	fmt->fmt.pix.width = frame->wWidth;
@@ -307,7 +282,9 @@
 	fmt->fmt.pix.colorspace = format->colorspace;
 	fmt->fmt.pix.priv = 0;
 
-	return 0;
+done:
+	mutex_unlock(&stream->mutex);
+	return ret;
 }
 
 static int uvc_v4l2_set_format(struct uvc_streaming *stream,
@@ -321,18 +298,24 @@
 	if (fmt->type != stream->type)
 		return -EINVAL;
 
-	if (uvc_queue_allocated(&stream->queue))
-		return -EBUSY;
-
 	ret = uvc_v4l2_try_format(stream, fmt, &probe, &format, &frame);
 	if (ret < 0)
 		return ret;
 
+	mutex_lock(&stream->mutex);
+
+	if (uvc_queue_allocated(&stream->queue)) {
+		ret = -EBUSY;
+		goto done;
+	}
+
 	memcpy(&stream->ctrl, &probe, sizeof probe);
 	stream->cur_format = format;
 	stream->cur_frame = frame;
 
-	return 0;
+done:
+	mutex_unlock(&stream->mutex);
+	return ret;
 }
 
 static int uvc_v4l2_get_streamparm(struct uvc_streaming *stream,
@@ -343,7 +326,10 @@
 	if (parm->type != stream->type)
 		return -EINVAL;
 
+	mutex_lock(&stream->mutex);
 	numerator = stream->ctrl.dwFrameInterval;
+	mutex_unlock(&stream->mutex);
+
 	denominator = 10000000;
 	uvc_simplify_fraction(&numerator, &denominator, 8, 333);
 
@@ -370,7 +356,6 @@
 static int uvc_v4l2_set_streamparm(struct uvc_streaming *stream,
 		struct v4l2_streamparm *parm)
 {
-	struct uvc_frame *frame = stream->cur_frame;
 	struct uvc_streaming_control probe;
 	struct v4l2_fract timeperframe;
 	uint32_t interval;
@@ -379,28 +364,36 @@
 	if (parm->type != stream->type)
 		return -EINVAL;
 
-	if (uvc_queue_streaming(&stream->queue))
-		return -EBUSY;
-
 	if (parm->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		timeperframe = parm->parm.capture.timeperframe;
 	else
 		timeperframe = parm->parm.output.timeperframe;
 
-	memcpy(&probe, &stream->ctrl, sizeof probe);
 	interval = uvc_fraction_to_interval(timeperframe.numerator,
 		timeperframe.denominator);
-
 	uvc_trace(UVC_TRACE_FORMAT, "Setting frame interval to %u/%u (%u).\n",
 		timeperframe.numerator, timeperframe.denominator, interval);
-	probe.dwFrameInterval = uvc_try_frame_interval(frame, interval);
+
+	mutex_lock(&stream->mutex);
+
+	if (uvc_queue_streaming(&stream->queue)) {
+		mutex_unlock(&stream->mutex);
+		return -EBUSY;
+	}
+
+	memcpy(&probe, &stream->ctrl, sizeof probe);
+	probe.dwFrameInterval =
+		uvc_try_frame_interval(stream->cur_frame, interval);
 
 	/* Probe the device with the new settings. */
 	ret = uvc_probe_video(stream, &probe);
-	if (ret < 0)
+	if (ret < 0) {
+		mutex_unlock(&stream->mutex);
 		return ret;
+	}
 
 	memcpy(&stream->ctrl, &probe, sizeof probe);
+	mutex_unlock(&stream->mutex);
 
 	/* Return the actual frame period. */
 	timeperframe.numerator = probe.dwFrameInterval;
@@ -528,11 +521,9 @@
 	if (uvc_has_privileges(handle)) {
 		uvc_video_enable(stream, 0);
 
-		mutex_lock(&stream->queue.mutex);
 		if (uvc_free_buffers(&stream->queue) < 0)
 			uvc_printk(KERN_ERR, "uvc_v4l2_release: Unable to "
 					"free buffers.\n");
-		mutex_unlock(&stream->queue.mutex);
 	}
 
 	/* Release the file handle. */
@@ -624,7 +615,7 @@
 	}
 
 	case VIDIOC_QUERYMENU:
-		return uvc_v4l2_query_menu(chain, arg);
+		return uvc_query_v4l2_menu(chain, arg);
 
 	case VIDIOC_G_EXT_CTRLS:
 	{
@@ -905,15 +896,17 @@
 	case VIDIOC_CROPCAP:
 	{
 		struct v4l2_cropcap *ccap = arg;
-		struct uvc_frame *frame = stream->cur_frame;
 
 		if (ccap->type != stream->type)
 			return -EINVAL;
 
 		ccap->bounds.left = 0;
 		ccap->bounds.top = 0;
-		ccap->bounds.width = frame->wWidth;
-		ccap->bounds.height = frame->wHeight;
+
+		mutex_lock(&stream->mutex);
+		ccap->bounds.width = stream->cur_frame->wWidth;
+		ccap->bounds.height = stream->cur_frame->wHeight;
+		mutex_unlock(&stream->mutex);
 
 		ccap->defrect = ccap->bounds;
 
@@ -930,8 +923,6 @@
 	case VIDIOC_REQBUFS:
 	{
 		struct v4l2_requestbuffers *rb = arg;
-		unsigned int bufsize =
-			stream->ctrl.dwMaxVideoFrameSize;
 
 		if (rb->type != stream->type ||
 		    rb->memory != V4L2_MEMORY_MMAP)
@@ -940,7 +931,10 @@
 		if ((ret = uvc_acquire_privileges(handle)) < 0)
 			return ret;
 
-		ret = uvc_alloc_buffers(&stream->queue, rb->count, bufsize);
+		mutex_lock(&stream->mutex);
+		ret = uvc_alloc_buffers(&stream->queue, rb->count,
+					stream->ctrl.dwMaxVideoFrameSize);
+		mutex_unlock(&stream->mutex);
 		if (ret < 0)
 			return ret;
 
@@ -988,7 +982,9 @@
 		if (!uvc_has_privileges(handle))
 			return -EBUSY;
 
+		mutex_lock(&stream->mutex);
 		ret = uvc_video_enable(stream, 1);
+		mutex_unlock(&stream->mutex);
 		if (ret < 0)
 			return ret;
 		break;
@@ -1068,79 +1064,14 @@
 	return -EINVAL;
 }
 
-/*
- * VMA operations.
- */
-static void uvc_vm_open(struct vm_area_struct *vma)
-{
-	struct uvc_buffer *buffer = vma->vm_private_data;
-	buffer->vma_use_count++;
-}
-
-static void uvc_vm_close(struct vm_area_struct *vma)
-{
-	struct uvc_buffer *buffer = vma->vm_private_data;
-	buffer->vma_use_count--;
-}
-
-static const struct vm_operations_struct uvc_vm_ops = {
-	.open		= uvc_vm_open,
-	.close		= uvc_vm_close,
-};
-
 static int uvc_v4l2_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct uvc_fh *handle = file->private_data;
 	struct uvc_streaming *stream = handle->stream;
-	struct uvc_video_queue *queue = &stream->queue;
-	struct uvc_buffer *uninitialized_var(buffer);
-	struct page *page;
-	unsigned long addr, start, size;
-	unsigned int i;
-	int ret = 0;
 
 	uvc_trace(UVC_TRACE_CALLS, "uvc_v4l2_mmap\n");
 
-	start = vma->vm_start;
-	size = vma->vm_end - vma->vm_start;
-
-	mutex_lock(&queue->mutex);
-
-	for (i = 0; i < queue->count; ++i) {
-		buffer = &queue->buffer[i];
-		if ((buffer->buf.m.offset >> PAGE_SHIFT) == vma->vm_pgoff)
-			break;
-	}
-
-	if (i == queue->count || size != queue->buf_size) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	/*
-	 * VM_IO marks the area as being an mmaped region for I/O to a
-	 * device. It also prevents the region from being core dumped.
-	 */
-	vma->vm_flags |= VM_IO;
-
-	addr = (unsigned long)queue->mem + buffer->buf.m.offset;
-	while (size > 0) {
-		page = vmalloc_to_page((void *)addr);
-		if ((ret = vm_insert_page(vma, start, page)) < 0)
-			goto done;
-
-		start += PAGE_SIZE;
-		addr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	vma->vm_ops = &uvc_vm_ops;
-	vma->vm_private_data = buffer;
-	uvc_vm_open(vma);
-
-done:
-	mutex_unlock(&queue->mutex);
-	return ret;
+	return uvc_queue_mmap(&stream->queue, vma);
 }
 
 static unsigned int uvc_v4l2_poll(struct file *file, poll_table *wait)
@@ -1157,7 +1088,7 @@
 	.owner		= THIS_MODULE,
 	.open		= uvc_v4l2_open,
 	.release	= uvc_v4l2_release,
-	.ioctl		= uvc_v4l2_ioctl,
+	.unlocked_ioctl	= uvc_v4l2_ioctl,
 	.read		= uvc_v4l2_read,
 	.mmap		= uvc_v4l2_mmap,
 	.poll		= uvc_v4l2_poll,
diff --git a/drivers/media/video/uvc/uvc_video.c b/drivers/media/video/uvc/uvc_video.c
index 5555f01..5673d67 100644
--- a/drivers/media/video/uvc/uvc_video.c
+++ b/drivers/media/video/uvc/uvc_video.c
@@ -293,8 +293,6 @@
 	unsigned int i;
 	int ret;
 
-	mutex_lock(&stream->mutex);
-
 	/* Perform probing. The device should adjust the requested values
 	 * according to its capabilities. However, some devices, namely the
 	 * first generation UVC Logitech webcams, don't implement the Video
@@ -346,7 +344,6 @@
 	}
 
 done:
-	mutex_unlock(&stream->mutex);
 	return ret;
 }
 
diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h
index d97cf6d..45f01e7 100644
--- a/drivers/media/video/uvc/uvcvideo.h
+++ b/drivers/media/video/uvc/uvcvideo.h
@@ -436,7 +436,9 @@
 	struct uvc_streaming_control ctrl;
 	struct uvc_format *cur_format;
 	struct uvc_frame *cur_frame;
-
+	/* Protect access to ctrl, cur_format, cur_frame and hardware video
+	 * probe control.
+	 */
 	struct mutex mutex;
 
 	unsigned int frozen : 1;
@@ -574,6 +576,8 @@
 extern void uvc_queue_cancel(struct uvc_video_queue *queue, int disconnect);
 extern struct uvc_buffer *uvc_queue_next_buffer(struct uvc_video_queue *queue,
 		struct uvc_buffer *buf);
+extern int uvc_queue_mmap(struct uvc_video_queue *queue,
+		struct vm_area_struct *vma);
 extern unsigned int uvc_queue_poll(struct uvc_video_queue *queue,
 		struct file *file, poll_table *wait);
 extern int uvc_queue_allocated(struct uvc_video_queue *queue);
@@ -606,10 +610,10 @@
 extern int uvc_status_resume(struct uvc_device *dev);
 
 /* Controls */
-extern struct uvc_control *uvc_find_control(struct uvc_video_chain *chain,
-		__u32 v4l2_id, struct uvc_control_mapping **mapping);
 extern int uvc_query_v4l2_ctrl(struct uvc_video_chain *chain,
 		struct v4l2_queryctrl *v4l2_ctrl);
+extern int uvc_query_v4l2_menu(struct uvc_video_chain *chain,
+		struct v4l2_querymenu *query_menu);
 
 extern int uvc_ctrl_add_mapping(struct uvc_video_chain *chain,
 		const struct uvc_control_mapping *mapping);
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 03f7f46..359e232 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -186,12 +186,12 @@
 		size_t sz, loff_t *off)
 {
 	struct video_device *vdev = video_devdata(filp);
-	int ret = -EIO;
+	int ret = -ENODEV;
 
 	if (!vdev->fops->read)
 		return -EINVAL;
-	if (vdev->lock)
-		mutex_lock(vdev->lock);
+	if (vdev->lock && mutex_lock_interruptible(vdev->lock))
+		return -ERESTARTSYS;
 	if (video_is_registered(vdev))
 		ret = vdev->fops->read(filp, buf, sz, off);
 	if (vdev->lock)
@@ -203,12 +203,12 @@
 		size_t sz, loff_t *off)
 {
 	struct video_device *vdev = video_devdata(filp);
-	int ret = -EIO;
+	int ret = -ENODEV;
 
 	if (!vdev->fops->write)
 		return -EINVAL;
-	if (vdev->lock)
-		mutex_lock(vdev->lock);
+	if (vdev->lock && mutex_lock_interruptible(vdev->lock))
+		return -ERESTARTSYS;
 	if (video_is_registered(vdev))
 		ret = vdev->fops->write(filp, buf, sz, off);
 	if (vdev->lock)
@@ -219,10 +219,10 @@
 static unsigned int v4l2_poll(struct file *filp, struct poll_table_struct *poll)
 {
 	struct video_device *vdev = video_devdata(filp);
-	int ret = DEFAULT_POLLMASK;
+	int ret = POLLERR | POLLHUP;
 
 	if (!vdev->fops->poll)
-		return ret;
+		return DEFAULT_POLLMASK;
 	if (vdev->lock)
 		mutex_lock(vdev->lock);
 	if (video_is_registered(vdev))
@@ -238,20 +238,45 @@
 	int ret = -ENODEV;
 
 	if (vdev->fops->unlocked_ioctl) {
-		if (vdev->lock)
-			mutex_lock(vdev->lock);
+		if (vdev->lock && mutex_lock_interruptible(vdev->lock))
+			return -ERESTARTSYS;
 		if (video_is_registered(vdev))
 			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
 		if (vdev->lock)
 			mutex_unlock(vdev->lock);
 	} else if (vdev->fops->ioctl) {
-		/* TODO: convert all drivers to unlocked_ioctl */
+		/* This code path is a replacement for the BKL. It is a major
+		 * hack but it will have to do for those drivers that are not
+		 * yet converted to use unlocked_ioctl.
+		 *
+		 * There are two options: if the driver implements struct
+		 * v4l2_device, then the lock defined there is used to
+		 * serialize the ioctls. Otherwise the v4l2 core lock defined
+		 * below is used. This lock is really bad since it serializes
+		 * completely independent devices.
+		 *
+		 * Both variants suffer from the same problem: if the driver
+		 * sleeps, then it blocks all ioctls since the lock is still
+		 * held. This is very common for VIDIOC_DQBUF since that
+		 * normally waits for a frame to arrive. As a result any other
+		 * ioctl calls will proceed very, very slowly since each call
+		 * will have to wait for the VIDIOC_QBUF to finish. Things that
+		 * should take 0.01s may now take 10-20 seconds.
+		 *
+		 * The workaround is to *not* take the lock for VIDIOC_DQBUF.
+		 * This actually works OK for videobuf-based drivers, since
+		 * videobuf will take its own internal lock.
+		 */
 		static DEFINE_MUTEX(v4l2_ioctl_mutex);
+		struct mutex *m = vdev->v4l2_dev ?
+			&vdev->v4l2_dev->ioctl_lock : &v4l2_ioctl_mutex;
 
-		mutex_lock(&v4l2_ioctl_mutex);
+		if (cmd != VIDIOC_DQBUF && mutex_lock_interruptible(m))
+			return -ERESTARTSYS;
 		if (video_is_registered(vdev))
 			ret = vdev->fops->ioctl(filp, cmd, arg);
-		mutex_unlock(&v4l2_ioctl_mutex);
+		if (cmd != VIDIOC_DQBUF)
+			mutex_unlock(m);
 	} else
 		ret = -ENOTTY;
 
@@ -265,8 +290,8 @@
 
 	if (!vdev->fops->mmap)
 		return ret;
-	if (vdev->lock)
-		mutex_lock(vdev->lock);
+	if (vdev->lock && mutex_lock_interruptible(vdev->lock))
+		return -ERESTARTSYS;
 	if (video_is_registered(vdev))
 		ret = vdev->fops->mmap(filp, vm);
 	if (vdev->lock)
@@ -284,7 +309,7 @@
 	mutex_lock(&videodev_lock);
 	vdev = video_devdata(filp);
 	/* return ENODEV if the video device has already been removed. */
-	if (vdev == NULL) {
+	if (vdev == NULL || !video_is_registered(vdev)) {
 		mutex_unlock(&videodev_lock);
 		return -ENODEV;
 	}
@@ -292,8 +317,10 @@
 	video_get(vdev);
 	mutex_unlock(&videodev_lock);
 	if (vdev->fops->open) {
-		if (vdev->lock)
-			mutex_lock(vdev->lock);
+		if (vdev->lock && mutex_lock_interruptible(vdev->lock)) {
+			ret = -ERESTARTSYS;
+			goto err;
+		}
 		if (video_is_registered(vdev))
 			ret = vdev->fops->open(filp);
 		else
@@ -302,6 +329,7 @@
 			mutex_unlock(vdev->lock);
 	}
 
+err:
 	/* decrease the refcount in case of an error */
 	if (ret)
 		video_put(vdev);
@@ -596,7 +624,12 @@
 	if (!vdev || !video_is_registered(vdev))
 		return;
 
+	mutex_lock(&videodev_lock);
+	/* This must be in a critical section to prevent a race with v4l2_open.
+	 * Once this bit has been cleared video_get may never be called again.
+	 */
 	clear_bit(V4L2_FL_REGISTERED, &vdev->flags);
+	mutex_unlock(&videodev_lock);
 	device_unregister(&vdev->dev);
 }
 EXPORT_SYMBOL(video_unregister_device);
diff --git a/drivers/media/video/v4l2-device.c b/drivers/media/video/v4l2-device.c
index 0b08f96..7fe6f92 100644
--- a/drivers/media/video/v4l2-device.c
+++ b/drivers/media/video/v4l2-device.c
@@ -35,6 +35,7 @@
 
 	INIT_LIST_HEAD(&v4l2_dev->subdevs);
 	spin_lock_init(&v4l2_dev->lock);
+	mutex_init(&v4l2_dev->ioctl_lock);
 	v4l2_dev->dev = dev;
 	if (dev == NULL) {
 		/* If dev == NULL, then name must be filled in by the caller */
diff --git a/drivers/media/video/w9966.c b/drivers/media/video/w9966.c
index 635420d..019ee20 100644
--- a/drivers/media/video/w9966.c
+++ b/drivers/media/video/w9966.c
@@ -815,7 +815,7 @@
 
 static const struct v4l2_file_operations w9966_fops = {
 	.owner		= THIS_MODULE,
-	.ioctl          = video_ioctl2,
+	.unlocked_ioctl = video_ioctl2,
 	.read           = w9966_v4l_read,
 };
 
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 003170e..69546e9 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -64,77 +64,6 @@
 	}
 }
 
-static bool pci_bus_resource_better(struct resource *res1, bool pos1,
-				    struct resource *res2, bool pos2)
-{
-	/* If exactly one is positive decode, always prefer that one */
-	if (pos1 != pos2)
-		return pos1 ? true : false;
-
-	/* Prefer the one that contains the highest address */
-	if (res1->end != res2->end)
-		return (res1->end > res2->end) ? true : false;
-
-	/* Otherwise, prefer the one with highest "center of gravity" */
-	if (res1->start != res2->start)
-		return (res1->start > res2->start) ? true : false;
-
-	/* Otherwise, choose one arbitrarily (but consistently) */
-	return (res1 > res2) ? true : false;
-}
-
-static bool pci_bus_resource_positive(struct pci_bus *bus, struct resource *res)
-{
-	struct pci_bus_resource *bus_res;
-
-	/*
-	 * This relies on the fact that pci_bus.resource[] refers to P2P or
-	 * CardBus bridge base/limit registers, which are always positively
-	 * decoded.  The pci_bus.resources list contains host bridge or
-	 * subtractively decoded resources.
-	 */
-	list_for_each_entry(bus_res, &bus->resources, list) {
-		if (bus_res->res == res)
-			return (bus_res->flags & PCI_SUBTRACTIVE_DECODE) ?
-				false : true;
-	}
-	return true;
-}
-
-/*
- * Find the next-best bus resource after the cursor "res".  If the cursor is
- * NULL, return the best resource.  "Best" means that we prefer positive
- * decode regions over subtractive decode, then those at higher addresses.
- */
-static struct resource *pci_bus_find_resource_prev(struct pci_bus *bus,
-						   unsigned int type,
-						   struct resource *res)
-{
-	bool res_pos, r_pos, prev_pos = false;
-	struct resource *r, *prev = NULL;
-	int i;
-
-	res_pos = pci_bus_resource_positive(bus, res);
-	pci_bus_for_each_resource(bus, r, i) {
-		if (!r)
-			continue;
-
-		if ((r->flags & IORESOURCE_TYPE_BITS) != type)
-			continue;
-
-		r_pos = pci_bus_resource_positive(bus, r);
-		if (!res || pci_bus_resource_better(res, res_pos, r, r_pos)) {
-			if (!prev || pci_bus_resource_better(r, r_pos,
-							     prev, prev_pos)) {
-				prev = r;
-				prev_pos = r_pos;
-			}
-		}
-	}
-
-	return prev;
-}
-
 /**
  * pci_bus_alloc_resource - allocate a resource from a parent bus
  * @bus: PCI bus
@@ -160,10 +89,9 @@
 					  resource_size_t),
 		void *alignf_data)
 {
-	int ret = -ENOMEM;
+	int i, ret = -ENOMEM;
 	struct resource *r;
 	resource_size_t max = -1;
-	unsigned int type = res->flags & IORESOURCE_TYPE_BITS;
 
 	type_mask |= IORESOURCE_IO | IORESOURCE_MEM;
 
@@ -171,9 +99,10 @@
 	if (!(res->flags & IORESOURCE_MEM_64))
 		max = PCIBIOS_MAX_MEM_32;
 
-	/* Look for space at highest addresses first */
-	r = pci_bus_find_resource_prev(bus, type, NULL);
-	for ( ; r; r = pci_bus_find_resource_prev(bus, type, r)) {
+	pci_bus_for_each_resource(bus, r, i) {
+		if (!r)
+			continue;
+
 		/* type_mask must match */
 		if ((res->flags ^ r->flags) & type_mask)
 			continue;
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 0157708..09933eb 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -1417,6 +1417,11 @@
 			       (unsigned long long)drhd->reg_base_addr, ret);
 			return -1;
 		}
+
+		/*
+		 * Clear any previous faults.
+		 */
+		dmar_fault(iommu->irq, iommu);
 	}
 
 	return 0;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6f9350c..53a786f 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2329,6 +2329,9 @@
 {
 	u32 cfg;
 
+	if (!pci_find_capability(dev, PCI_CAP_ID_HT))
+		return;
+
 	pci_read_config_dword(dev, 0x74, &cfg);
 
 	if (cfg & ((1 << 2) | (1 << 15))) {
@@ -2764,6 +2767,29 @@
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
 #endif /*CONFIG_MMC_RICOH_MMC*/
 
+#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
+#define VTUNCERRMSK_REG	0x1ac
+#define VTD_MSK_SPEC_ERRORS	(1 << 31)
+/*
+ * This is a quirk for masking vt-d spec defined errors to platform error
+ * handling logic. With out this, platforms using Intel 7500, 5500 chipsets
+ * (and the derivative chipsets like X58 etc) seem to generate NMI/SMI (based
+ * on the RAS config settings of the platform) when a vt-d fault happens.
+ * The resulting SMI caused the system to hang.
+ *
+ * VT-d spec related errors are already handled by the VT-d OS code, so no
+ * need to report the same error through other channels.
+ */
+static void vtd_mask_spec_errors(struct pci_dev *dev)
+{
+	u32 word;
+
+	pci_read_config_dword(dev, VTUNCERRMSK_REG, &word);
+	pci_write_config_dword(dev, VTUNCERRMSK_REG, word | VTD_MSK_SPEC_ERRORS);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, vtd_mask_spec_errors);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors);
+#endif
 
 static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
 			  struct pci_fixup *end)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 5b6bbae..4a38422 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1637,9 +1637,8 @@
 
 	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
 
-	/* New queue, no concurrency on queue_flags */
 	if (!shost->use_clustering)
-		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
+		q->limits.cluster = 0;
 
 	/*
 	 * set a reasonable default alignment on word boundaries: the
diff --git a/drivers/staging/cx25821/cx25821-video.c b/drivers/staging/cx25821/cx25821-video.c
index e7f1d57..5238930 100644
--- a/drivers/staging/cx25821/cx25821-video.c
+++ b/drivers/staging/cx25821/cx25821-video.c
@@ -92,7 +92,7 @@
 	return ARRAY_SIZE(formats);
 }
 
-struct cx25821_fmt *format_by_fourcc(unsigned int fourcc)
+struct cx25821_fmt *cx25821_format_by_fourcc(unsigned int fourcc)
 {
 	unsigned int i;
 
@@ -848,7 +848,7 @@
        pix_format =
 	   (dev->channels[ch_id].pixel_formats ==
 	    PIXEL_FRMT_411) ? V4L2_PIX_FMT_Y41P : V4L2_PIX_FMT_YUYV;
-       fh->fmt = format_by_fourcc(pix_format);
+       fh->fmt = cx25821_format_by_fourcc(pix_format);
 
        v4l2_prio_open(&dev->channels[ch_id].prio, &fh->prio);
 
@@ -1010,7 +1010,7 @@
        if (0 != err)
 	       return err;
 
-       fh->fmt = format_by_fourcc(f->fmt.pix.pixelformat);
+       fh->fmt = cx25821_format_by_fourcc(f->fmt.pix.pixelformat);
        fh->vidq.field = f->fmt.pix.field;
 
        /* check if width and height is valid based on set standard */
@@ -1119,7 +1119,7 @@
 	enum v4l2_field field;
 	unsigned int maxw, maxh;
 
-	fmt = format_by_fourcc(f->fmt.pix.pixelformat);
+	fmt = cx25821_format_by_fourcc(f->fmt.pix.pixelformat);
 	if (NULL == fmt)
 		return -EINVAL;
 
diff --git a/drivers/staging/cx25821/cx25821-video.h b/drivers/staging/cx25821/cx25821-video.h
index cc6034b..a2415d3 100644
--- a/drivers/staging/cx25821/cx25821-video.h
+++ b/drivers/staging/cx25821/cx25821-video.h
@@ -87,7 +87,7 @@
 
 #define FORMAT_FLAGS_PACKED       0x01
 extern struct cx25821_fmt formats[];
-extern struct cx25821_fmt *format_by_fourcc(unsigned int fourcc);
+extern struct cx25821_fmt *cx25821_format_by_fourcc(unsigned int fourcc);
 extern struct cx25821_data timeout_data[MAX_VID_CHANNEL_NUM];
 
 extern void cx25821_dump_video_queue(struct cx25821_dev *dev,
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 81b4658..c5f8e5b 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -716,8 +716,8 @@
 		if (msg->len < 128)
 			*--dp = (msg->len << 1) | EA;
 		else {
-			*--dp = ((msg->len & 127) << 1) | EA;
-			*--dp = (msg->len >> 6) & 0xfe;
+			*--dp = (msg->len >> 7);	/* bits 7 - 15 */
+			*--dp = (msg->len & 127) << 1;	/* bits 0 - 6 */
 		}
 	}
 
@@ -968,6 +968,8 @@
 {
 	struct gsm_msg *msg;
 	msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->ftype);
+	if (msg == NULL)
+		return;
 	msg->data[0] = (cmd & 0xFE) << 1 | EA;	/* Clear C/R */
 	msg->data[1] = (dlen << 1) | EA;
 	memcpy(msg->data + 2, data, dlen);
diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index 9eed5b5..bcc2477 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig
@@ -107,11 +107,19 @@
 	  If you are unsure about this, say N here.
 
 config USB_OTG
-	bool
+	bool "OTG support"
 	depends on USB && EXPERIMENTAL
 	depends on USB_SUSPEND
 	default n
+	help
+	  The most notable feature of USB OTG is support for a
+	  "Dual-Role" device, which can act as either a device
+	  or a host. The initial role is decided by the type of
+	  plug inserted and can be changed later when two dual
+	  role devices talk to each other.
 
+	  Select this only if your board has Mini-AB/Micro-AB
+	  connector.
 
 config USB_OTG_WHITELIST
 	bool "Rely on OTG Targeted Peripherals List"
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 7b5cc16..8572dad 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1047,9 +1047,9 @@
 		kfree(cdev->req->buf);
 		usb_ep_free_request(gadget->ep0, cdev->req);
 	}
+	device_remove_file(&gadget->dev, &dev_attr_suspended);
 	kfree(cdev);
 	set_gadget_data(gadget, NULL);
-	device_remove_file(&gadget->dev, &dev_attr_suspended);
 	composite = NULL;
 }
 
@@ -1107,14 +1107,6 @@
 	 */
 	usb_ep_autoconfig_reset(cdev->gadget);
 
-	/* standardized runtime overrides for device ID data */
-	if (idVendor)
-		cdev->desc.idVendor = cpu_to_le16(idVendor);
-	if (idProduct)
-		cdev->desc.idProduct = cpu_to_le16(idProduct);
-	if (bcdDevice)
-		cdev->desc.bcdDevice = cpu_to_le16(bcdDevice);
-
 	/* composite gadget needs to assign strings for whole device (like
 	 * serial number), register function drivers, potentially update
 	 * power state and consumption, etc
@@ -1126,6 +1118,14 @@
 	cdev->desc = *composite->dev;
 	cdev->desc.bMaxPacketSize0 = gadget->ep0->maxpacket;
 
+	/* standardized runtime overrides for device ID data */
+	if (idVendor)
+		cdev->desc.idVendor = cpu_to_le16(idVendor);
+	if (idProduct)
+		cdev->desc.idProduct = cpu_to_le16(idProduct);
+	if (bcdDevice)
+		cdev->desc.bcdDevice = cpu_to_le16(bcdDevice);
+
 	/* stirng overrides */
 	if (iManufacturer || !cdev->desc.iManufacturer) {
 		if (!iManufacturer && !composite->iManufacturer &&
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 0fae58e..1d0f45f 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1680,6 +1680,7 @@
 				xhci->port_array[i] = (u8) -1;
 			}
 			/* FIXME: Should we disable the port? */
+			continue;
 		}
 		xhci->port_array[i] = major_revision;
 		if (major_revision == 0x03)
@@ -1758,16 +1759,20 @@
 			return -ENOMEM;
 
 		port_index = 0;
-		for (i = 0; i < num_ports; i++)
-			if (xhci->port_array[i] != 0x03) {
-				xhci->usb2_ports[port_index] =
-					&xhci->op_regs->port_status_base +
-					NUM_PORT_REGS*i;
-				xhci_dbg(xhci, "USB 2.0 port at index %u, "
-						"addr = %p\n", i,
-						xhci->usb2_ports[port_index]);
-				port_index++;
-			}
+		for (i = 0; i < num_ports; i++) {
+			if (xhci->port_array[i] == 0x03 ||
+					xhci->port_array[i] == 0 ||
+					xhci->port_array[i] == -1)
+				continue;
+
+			xhci->usb2_ports[port_index] =
+				&xhci->op_regs->port_status_base +
+				NUM_PORT_REGS*i;
+			xhci_dbg(xhci, "USB 2.0 port at index %u, "
+					"addr = %p\n", i,
+					xhci->usb2_ports[port_index]);
+			port_index++;
+		}
 	}
 	if (xhci->num_usb3_ports) {
 		xhci->usb3_ports = kmalloc(sizeof(*xhci->usb3_ports)*
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index 796e2f6..4ff2158 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -3,7 +3,7 @@
 /*
  *	uss720.c  --  USS720 USB Parport Cable.
  *
- *	Copyright (C) 1999, 2005
+ *	Copyright (C) 1999, 2005, 2010
  *	    Thomas Sailer (t.sailer@alumni.ethz.ch)
  *
  *	This program is free software; you can redistribute it and/or modify
@@ -776,6 +776,8 @@
 	{ USB_DEVICE(0x0557, 0x2001) },
 	{ USB_DEVICE(0x0729, 0x1284) },
 	{ USB_DEVICE(0x1293, 0x0002) },
+	{ USB_DEVICE(0x1293, 0x0002) },
+	{ USB_DEVICE(0x050d, 0x0002) },
 	{ }						/* Terminating entry */
 };
 
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 6a50965..2dec500 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -796,6 +796,7 @@
 	{ USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_LOGBOOKML_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_LS_LOGBOOK_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_HS_LOGBOOK_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_DOTEC_PID) },
 	{ USB_DEVICE(QIHARDWARE_VID, MILKYMISTONE_JTAGSERIAL_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ },					/* Optional parameter entry */
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 1286f1e2..bf08672 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -1081,6 +1081,11 @@
 #define MJSG_HD_RADIO_PID	0x937C
 
 /*
+ * D.O.Tec products (http://www.directout.eu)
+ */
+#define FTDI_DOTEC_PID 0x9868
+
+/*
  * Xverve Signalyzer tools (http://www.signalyzer.com/)
  */
 #define XVERVE_SIGNALYZER_ST_PID	0xBCA0
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 6ccdd3d..fcc1e32 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -481,6 +481,13 @@
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64),
 
+/* Reported by Vitaly Kuznetsov <vitty@altlinux.ru> */
+UNUSUAL_DEV(  0x04e8, 0x5122, 0x0000, 0x9999,
+		"Samsung",
+		"YP-CP3",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_MAX_SECTORS_64 | US_FL_BULK_IGNORE_TAG),
+
 /* Entry and supporting patch by Theodore Kilgore <kilgota@auburn.edu>.
  * Device uses standards-violating 32-byte Bulk Command Block Wrappers and
  * reports itself as "Proprietary SCSI Bulk." Cf. device entry 0x084d:0x0011.
diff --git a/drivers/video/omap/Kconfig b/drivers/video/omap/Kconfig
index 455c605..083c8fe 100644
--- a/drivers/video/omap/Kconfig
+++ b/drivers/video/omap/Kconfig
@@ -1,7 +1,7 @@
 config FB_OMAP
 	tristate "OMAP frame buffer support (EXPERIMENTAL)"
-	depends on FB && ARCH_OMAP && (OMAP2_DSS = "n")
-
+	depends on FB && (OMAP2_DSS = "n")
+	depends on ARCH_OMAP1 || ARCH_OMAP2 || ARCH_OMAP3
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/omap2/vram.c b/drivers/video/omap2/vram.c
index 2fd7e52..9441e2e 100644
--- a/drivers/video/omap2/vram.c
+++ b/drivers/video/omap2/vram.c
@@ -551,7 +551,7 @@
 	if (!size)
 		return;
 
-	size = PAGE_ALIGN(size);
+	size = ALIGN(size, SZ_2M);
 
 	if (paddr) {
 		if (paddr & ~PAGE_MASK) {
@@ -576,7 +576,7 @@
 			return;
 		}
 	} else {
-		paddr = memblock_alloc(size, PAGE_SIZE);
+		paddr = memblock_alloc(size, SZ_2M);
 	}
 
 	memblock_free(paddr, size);
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 3d77116..dea7b5b 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -642,19 +642,14 @@
  */
 
 #ifdef CONFIG_HPWDT_NMI_DECODING
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#ifdef CONFIG_X86_LOCAL_APIC
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
 {
 	/*
 	 * If nmi_watchdog is turned off then we can turn on
 	 * our nmi decoding capability.
 	 */
-	if (!nmi_watchdog_active())
-		hpwdt_nmi_decoding = 1;
-	else
-		dev_warn(&dev->dev, "NMI decoding is disabled. To enable this "
-			"functionality you must reboot with nmi_watchdog=0 "
-			"and load the hpwdt driver with priority=1.\n");
+	hpwdt_nmi_decoding = 1;
 }
 #else
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
@@ -662,7 +657,7 @@
 	dev_warn(&dev->dev, "NMI decoding is disabled. "
 		"Your kernel does not support a NMI Watchdog.\n");
 }
-#endif /* ARCH_HAS_NMI_WATCHDOG */
+#endif /* CONFIG_X86_LOCAL_APIC */
 
 static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 6f04444..659f532 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -166,7 +166,7 @@
 static struct dentry *btrfs_get_parent(struct dentry *child)
 {
 	struct inode *dir = child->d_inode;
-	static struct dentry *dentry;
+	struct dentry *dentry;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 158c700..d902948 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,7 +40,8 @@
 	if (dentry->d_fsdata)
 		return 0;
 
-	if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+	if (dentry->d_parent == NULL ||   /* nfs fh_to_dentry */
+	    ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
 		dentry->d_op = &ceph_dentry_ops;
 	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
 		dentry->d_op = &ceph_snapdir_dentry_ops;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8d79b89..7d0e4a8 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -282,7 +282,8 @@
 static int striped_read(struct inode *inode,
 			u64 off, u64 len,
 			struct page **pages, int num_pages,
-			int *checkeof, bool align_to_pages)
+			int *checkeof, bool align_to_pages,
+			unsigned long buf_align)
 {
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -307,7 +308,7 @@
 
 more:
 	if (align_to_pages)
-		page_align = (pos - io_align) & ~PAGE_MASK;
+		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
 	else
 		page_align = pos & ~PAGE_MASK;
 	this_len = left;
@@ -376,16 +377,18 @@
 	struct inode *inode = file->f_dentry->d_inode;
 	struct page **pages;
 	u64 off = *poff;
-	int num_pages = calc_pages_for(off, len);
-	int ret;
+	int num_pages, ret;
 
 	dout("sync_read on file %p %llu~%u %s\n", file, off, len,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
-	if (file->f_flags & O_DIRECT)
-		pages = ceph_get_direct_page_vector(data, num_pages);
-	else
+	if (file->f_flags & O_DIRECT) {
+		num_pages = calc_pages_for((unsigned long)data, len);
+		pages = ceph_get_direct_page_vector(data, num_pages, true);
+	} else {
+		num_pages = calc_pages_for(off, len);
 		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+	}
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
@@ -400,7 +403,8 @@
 		goto done;
 
 	ret = striped_read(inode, off, len, pages, num_pages, checkeof,
-			   file->f_flags & O_DIRECT);
+			   file->f_flags & O_DIRECT,
+			   (unsigned long)data & ~PAGE_MASK);
 
 	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
 		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -409,7 +413,7 @@
 
 done:
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages, true);
 	else
 		ceph_release_page_vector(pages, num_pages);
 	dout("sync_read result %d\n", ret);
@@ -456,6 +460,7 @@
 	int do_sync = 0;
 	int check_caps = 0;
 	int page_align, io_align;
+	unsigned long buf_align;
 	int ret;
 	struct timespec mtime = CURRENT_TIME;
 
@@ -471,6 +476,7 @@
 		pos = *offset;
 
 	io_align = pos & ~PAGE_MASK;
+	buf_align = (unsigned long)data & ~PAGE_MASK;
 
 	ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
 	if (ret < 0)
@@ -496,12 +502,15 @@
 	 */
 more:
 	len = left;
-	if (file->f_flags & O_DIRECT)
+	if (file->f_flags & O_DIRECT) {
 		/* write from beginning of first page, regardless of
 		   io alignment */
-		page_align = (pos - io_align) & ~PAGE_MASK;
-	else
+		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+		num_pages = calc_pages_for((unsigned long)data, len);
+	} else {
 		page_align = pos & ~PAGE_MASK;
+		num_pages = calc_pages_for(pos, len);
+	}
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 				    ceph_vino(inode), pos, &len,
 				    CEPH_OSD_OP_WRITE, flags,
@@ -512,10 +521,8 @@
 	if (!req)
 		return -ENOMEM;
 
-	num_pages = calc_pages_for(pos, len);
-
 	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages);
+		pages = ceph_get_direct_page_vector(data, num_pages, false);
 		if (IS_ERR(pages)) {
 			ret = PTR_ERR(pages);
 			goto out;
@@ -565,7 +572,7 @@
 	}
 
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages, false);
 	else if (file->f_flags & O_SYNC)
 		ceph_release_page_vector(pages, num_pages);
 
diff --git a/fs/namei.c b/fs/namei.c
index 5362af9..4ff7ca5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1748,6 +1748,9 @@
 	if (!(open_flag & O_CREAT))
 		mode = 0;
 
+	/* Must never be set by userspace */
+	open_flag &= ~FMODE_NONOTIFY;
+
 	/*
 	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
 	 * check for O_DSYNC if the need any syncing at all we enforce it's
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 33ad25d..caf9a6a 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -176,7 +176,6 @@
 int nilfs_init_gcinode(struct inode *inode)
 {
 	struct nilfs_inode_info *ii = NILFS_I(inode);
-	struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
 
 	inode->i_mode = S_IFREG;
 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
@@ -186,14 +185,6 @@
 	ii->i_flags = 0;
 	nilfs_bmap_init_gc(ii->i_bmap);
 
-	/*
-	 * Add the inode to GC inode list. Garbage Collection
-	 * is serialized and no two processes manipulate the
-	 * list simultaneously.
-	 */
-	igrab(inode);
-	list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes);
-
 	return 0;
 }
 
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index e00d945..b185e93 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -337,6 +337,7 @@
 				   struct nilfs_argv *argv, void *buf)
 {
 	size_t nmembs = argv->v_nmembs;
+	struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs;
 	struct inode *inode;
 	struct nilfs_vdesc *vdesc;
 	struct buffer_head *bh, *n;
@@ -353,6 +354,17 @@
 			ret = PTR_ERR(inode);
 			goto failed;
 		}
+		if (list_empty(&NILFS_I(inode)->i_dirty)) {
+			/*
+			 * Add the inode to GC inode list. Garbage Collection
+			 * is serialized and no two processes manipulate the
+			 * list simultaneously.
+			 */
+			igrab(inode);
+			list_add(&NILFS_I(inode)->i_dirty,
+				 &nilfs->ns_gc_inodes);
+		}
+
 		do {
 			ret = nilfs_ioctl_move_inode_block(inode, vdesc,
 							   &buffers);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index b04f88e..f35794b 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,11 @@
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	wait_event(group->fanotify_data.access_waitq, event->response);
+	wait_event(group->fanotify_data.access_waitq, event->response ||
+				atomic_read(&group->fanotify_data.bypass_perm));
+
+	if (!event->response) /* bypass_perm set */
+		return 0;
 
 	/* userspace responded, convert to something usable */
 	spin_lock(&event->lock);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 0632248..8b61220 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -106,20 +106,29 @@
 	return client_fd;
 }
 
-static ssize_t fill_event_metadata(struct fsnotify_group *group,
+static int fill_event_metadata(struct fsnotify_group *group,
 				   struct fanotify_event_metadata *metadata,
 				   struct fsnotify_event *event)
 {
+	int ret = 0;
+
 	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
 		 group, metadata, event);
 
 	metadata->event_len = FAN_EVENT_METADATA_LEN;
+	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
 	metadata->pid = pid_vnr(event->tgid);
-	metadata->fd = create_fd(group, event);
+	if (unlikely(event->mask & FAN_Q_OVERFLOW))
+		metadata->fd = FAN_NOFD;
+	else {
+		metadata->fd = create_fd(group, event);
+		if (metadata->fd < 0)
+			ret = metadata->fd;
+	}
 
-	return metadata->fd;
+	return ret;
 }
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
@@ -200,7 +209,7 @@
 
 	mutex_lock(&group->fanotify_data.access_mutex);
 
-	if (group->fanotify_data.bypass_perm) {
+	if (atomic_read(&group->fanotify_data.bypass_perm)) {
 		mutex_unlock(&group->fanotify_data.access_mutex);
 		kmem_cache_free(fanotify_response_event_cache, re);
 		event->response = FAN_ALLOW;
@@ -257,24 +266,34 @@
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	fd = fill_event_metadata(group, &fanotify_event_metadata, event);
-	if (fd < 0)
-		return fd;
+	ret = fill_event_metadata(group, &fanotify_event_metadata, event);
+	if (ret < 0)
+		goto out;
 
+	fd = fanotify_event_metadata.fd;
 	ret = prepare_for_access_response(group, event, fd);
 	if (ret)
 		goto out_close_fd;
 
 	ret = -EFAULT;
-	if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
+	if (copy_to_user(buf, &fanotify_event_metadata,
+			 fanotify_event_metadata.event_len))
 		goto out_kill_access_response;
 
-	return FAN_EVENT_METADATA_LEN;
+	return fanotify_event_metadata.event_len;
 
 out_kill_access_response:
 	remove_access_response(group, event, fd);
 out_close_fd:
-	sys_close(fd);
+	if (fd != FAN_NOFD)
+		sys_close(fd);
+out:
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	if (event->mask & FAN_ALL_PERM_EVENTS) {
+		event->response = FAN_DENY;
+		wake_up(&group->fanotify_data.access_waitq);
+	}
+#endif
 	return ret;
 }
 
@@ -382,7 +401,7 @@
 
 	mutex_lock(&group->fanotify_data.access_mutex);
 
-	group->fanotify_data.bypass_perm = true;
+	atomic_inc(&group->fanotify_data.bypass_perm);
 
 	list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
 		pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
@@ -586,11 +605,10 @@
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
+	int ret = 0;
 
 	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
 	if (!fsn_mark) {
-		int ret;
-
 		if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
 			return -ENOSPC;
 
@@ -600,17 +618,16 @@
 
 		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
 		ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
-		if (ret) {
-			fanotify_free_mark(fsn_mark);
-			return ret;
-		}
+		if (ret)
+			goto err;
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	fsnotify_put_mark(fsn_mark);
+
 	if (added & ~mnt->mnt_fsnotify_mask)
 		fsnotify_recalc_vfsmount_mask(mnt);
-
-	return 0;
+err:
+	fsnotify_put_mark(fsn_mark);
+	return ret;
 }
 
 static int fanotify_add_inode_mark(struct fsnotify_group *group,
@@ -619,6 +636,7 @@
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
+	int ret = 0;
 
 	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
 
@@ -634,8 +652,6 @@
 
 	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark) {
-		int ret;
-
 		if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
 			return -ENOSPC;
 
@@ -645,16 +661,16 @@
 
 		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
 		ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
-		if (ret) {
-			fanotify_free_mark(fsn_mark);
-			return ret;
-		}
+		if (ret)
+			goto err;
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	fsnotify_put_mark(fsn_mark);
+
 	if (added & ~inode->i_fsnotify_mask)
 		fsnotify_recalc_inode_mask(inode);
-	return 0;
+err:
+	fsnotify_put_mark(fsn_mark);
+	return ret;
 }
 
 /* fanotify syscalls */
@@ -687,8 +703,10 @@
 
 	/* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
 	group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
-	if (IS_ERR(group))
+	if (IS_ERR(group)) {
+		free_uid(user);
 		return PTR_ERR(group);
+	}
 
 	group->fanotify_data.user = user;
 	atomic_inc(&user->fanotify_listeners);
@@ -698,6 +716,7 @@
 	mutex_init(&group->fanotify_data.access_mutex);
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
+	atomic_set(&group->fanotify_data.bypass_perm, 0);
 #endif
 	switch (flags & FAN_ALL_CLASS_BITS) {
 	case FAN_CLASS_NOTIF:
@@ -764,8 +783,10 @@
 	if (flags & ~FAN_ALL_MARK_FLAGS)
 		return -EINVAL;
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
-	case FAN_MARK_ADD:
+	case FAN_MARK_ADD:		/* fallthrough */
 	case FAN_MARK_REMOVE:
+		if (!mask)
+			return -EINVAL;
 	case FAN_MARK_FLUSH:
 		break;
 	default:
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 444c305..4cd5d5d 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -752,6 +752,7 @@
 	if (ret >= 0)
 		return ret;
 
+	fsnotify_put_group(group);
 	atomic_dec(&user->inotify_devs);
 out_free_uid:
 	free_uid(user);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aae86fd..36ab42c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -250,7 +250,7 @@
 
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
-	unsigned char		no_cluster;
+	unsigned char		cluster;
 	signed char		discard_zeroes_data;
 };
 
@@ -380,7 +380,6 @@
 #endif
 };
 
-#define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
@@ -403,7 +402,6 @@
 #define QUEUE_FLAG_SECDISCARD  19	/* supports SECDISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
-				 (1 << QUEUE_FLAG_CLUSTER) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_ADD_RANDOM))
@@ -510,6 +508,11 @@
 
 #define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 
+static inline unsigned int blk_queue_cluster(struct request_queue *q)
+{
+	return q->limits.cluster;
+}
+
 /*
  * We regard a request as sync, if either a read or a sync write
  */
@@ -805,6 +808,7 @@
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
+extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 266ab92..499dfe9 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -105,6 +105,8 @@
 
 #define alloc_bootmem(x) \
 	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+#define alloc_bootmem_align(x, align) \
+	__alloc_bootmem(x, align, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_nopanic(x) \
 	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages(x) \
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 9e76d35..72c72bf 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -227,8 +227,10 @@
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 
 extern struct page **ceph_get_direct_page_vector(const char __user *data,
-						 int num_pages);
-extern void ceph_put_page_vector(struct page **pages, int num_pages);
+						 int num_pages,
+						 bool write_page);
+extern void ceph_put_page_vector(struct page **pages, int num_pages,
+				 bool dirty);
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_copy_user_to_page_vector(struct page **pages,
diff --git a/include/linux/cnt32_to_63.h b/include/linux/cnt32_to_63.h
index 7605fdd..e3d8bf2 100644
--- a/include/linux/cnt32_to_63.h
+++ b/include/linux/cnt32_to_63.h
@@ -61,13 +61,31 @@
  *
  * 2) this code must not be preempted for a duration longer than the
  *    32-bit counter half period minus the longest period between two
- *    calls to this code.
+ *    calls to this code;
  *
  * Those requirements ensure proper update to the state bit in memory.
  * This is usually not a problem in practice, but if it is then a kernel
  * timer should be scheduled to manage for this code to be executed often
  * enough.
  *
+ * And finally:
+ *
+ * 3) the cnt_lo argument must be seen as a globally incrementing value,
+ *    meaning that it should be a direct reference to the counter data which
+ *    can be evaluated according to a specific ordering within the macro,
+ *    and not the result of a previous evaluation stored in a variable.
+ *
+ * For example, this is wrong:
+ *
+ *	u32 partial = get_hw_count();
+ *	u64 full = cnt32_to_63(partial);
+ *	return full;
+ *
+ * This is fine:
+ *
+ *	u64 full = cnt32_to_63(get_hw_count());
+ *	return full;
+ *
  * Note that the top bit (bit 63) in the returned value should be considered
  * as garbage.  It is not cleared here because callers are likely to use a
  * multiplier on the returned value which can get rid of the top bit
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 0f01214..6c6133f 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -83,11 +83,13 @@
 				 FAN_ALL_PERM_EVENTS |\
 				 FAN_Q_OVERFLOW)
 
-#define FANOTIFY_METADATA_VERSION	2
+#define FANOTIFY_METADATA_VERSION	3
 
 struct fanotify_event_metadata {
 	__u32 event_len;
-	__u32 vers;
+	__u8 vers;
+	__u8 reserved;
+	__u16 metadata_len;
 	__aligned_u64 mask;
 	__s32 fd;
 	__s32 pid;
@@ -96,11 +98,13 @@
 struct fanotify_response {
 	__s32 fd;
 	__u32 response;
-} __attribute__ ((packed));
+};
 
 /* Legit userspace responses to a _PERM event */
 #define FAN_ALLOW	0x01
 #define FAN_DENY	0x02
+/* No fd set in event */
+#define FAN_NOFD	-1
 
 /* Helper functions to deal with fanotify_event_metadata buffers */
 #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 5c185fa..b10bcde 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -235,9 +235,6 @@
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_ISDIR;
 
-	/* FMODE_NONOTIFY must never be set from user */
-	file->f_mode &= ~FMODE_NONOTIFY;
-
 	fsnotify_parent(path, NULL, mask);
 	fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0);
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0a68f92..7380763 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -166,7 +166,7 @@
 			struct mutex access_mutex;
 			struct list_head access_list;
 			wait_queue_head_t access_waitq;
-			bool bypass_perm; /* protected by access_mutex */
+			atomic_t bypass_perm;
 #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
 			int f_flags;
 			unsigned int max_marks;
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 8beabb9..47e3997 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -154,12 +154,14 @@
 	TRACE_EVENT_FL_ENABLED_BIT,
 	TRACE_EVENT_FL_FILTERED_BIT,
 	TRACE_EVENT_FL_RECORDED_CMD_BIT,
+	TRACE_EVENT_FL_CAP_ANY_BIT,
 };
 
 enum {
 	TRACE_EVENT_FL_ENABLED		= (1 << TRACE_EVENT_FL_ENABLED_BIT),
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
 	TRACE_EVENT_FL_RECORDED_CMD	= (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
+	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 };
 
 struct ftrace_event_call {
@@ -196,6 +198,14 @@
 #endif
 };
 
+#define __TRACE_EVENT_FLAGS(name, value)				\
+	static int __init trace_init_flags_##name(void)			\
+	{								\
+		event_##name.flags = value;				\
+		return 0;						\
+	}								\
+	early_initcall(trace_init_flags_##name);
+
 #define PERF_MAX_TRACE_SIZE	2048
 
 #define MAX_FILTER_PRED		32
@@ -215,6 +225,10 @@
 	FILTER_PTR_STRING,
 };
 
+#define EVENT_STORAGE_SIZE 128
+extern struct mutex event_storage_mutex;
+extern char event_storage[EVENT_STORAGE_SIZE];
+
 extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
diff --git a/include/linux/input.h b/include/linux/input.h
index a8af21d..9777668 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -104,8 +104,10 @@
 #define EVIOCGREP		_IOR('E', 0x03, unsigned int[2])	/* get repeat settings */
 #define EVIOCSREP		_IOW('E', 0x03, unsigned int[2])	/* set repeat settings */
 
-#define EVIOCGKEYCODE		_IOR('E', 0x04, struct input_keymap_entry)	/* get keycode */
-#define EVIOCSKEYCODE		_IOW('E', 0x04, struct input_keymap_entry)	/* set keycode */
+#define EVIOCGKEYCODE		_IOR('E', 0x04, unsigned int[2])        /* get keycode */
+#define EVIOCGKEYCODE_V2	_IOR('E', 0x04, struct input_keymap_entry)
+#define EVIOCSKEYCODE		_IOW('E', 0x04, unsigned int[2])        /* set keycode */
+#define EVIOCSKEYCODE_V2	_IOW('E', 0x04, struct input_keymap_entry)
 
 #define EVIOCGNAME(len)		_IOC(_IOC_READ, 'E', 0x06, len)		/* get device name */
 #define EVIOCGPHYS(len)		_IOC(_IOC_READ, 'E', 0x07, len)		/* get physical location */
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index d377ea8..e9bb22c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -112,7 +112,6 @@
 /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
-extern int resource_alloc_from_bottom;
 
 extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
 extern int request_resource(struct resource *root, struct resource *new);
@@ -124,6 +123,7 @@
 extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new);
 extern int insert_resource(struct resource *parent, struct resource *new);
 extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
+extern void arch_remove_reservations(struct resource *avail);
 extern int allocate_resource(struct resource *root, struct resource *new,
 			     resource_size_t size, resource_size_t min,
 			     resource_size_t max, resource_size_t align,
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e7d1b2e..b78edb5 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -275,7 +275,9 @@
 extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
 extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
 extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
-extern int  arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_optimize_kprobes(struct list_head *oplist);
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+				    struct list_head *done_list);
 extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
 extern kprobe_opcode_t *get_optinsn_slot(void);
 extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 06aab5e..c536f85 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -14,22 +14,14 @@
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 #include <asm/nmi.h>
 extern void touch_nmi_watchdog(void);
-extern void acpi_nmi_disable(void);
-extern void acpi_nmi_enable(void);
 #else
-#ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
 	touch_softlockup_watchdog();
 }
-#else
-extern void touch_nmi_watchdog(void);
-#endif
-static inline void acpi_nmi_disable(void) { }
-static inline void acpi_nmi_enable(void) { }
 #endif
 
 /*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index de2c417..dda5b0a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -215,8 +215,9 @@
 				 */
 				precise_ip     :  2, /* skid constraint       */
 				mmap_data      :  1, /* non-exec mmap data    */
+				sample_id_all  :  1, /* sample_type all events */
 
-				__reserved_1   : 46;
+				__reserved_1   : 45;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -327,6 +328,15 @@
 enum perf_event_type {
 
 	/*
+	 * If perf_event_attr.sample_id_all is set then all event types will
+	 * have the sample_type selected fields related to where/when
+	 * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
+	 * described in PERF_RECORD_SAMPLE below, it will be stashed just after
+	 * the perf_event_header and the fields already present for the existing
+	 * fields, i.e. at the end of the payload. That way a newer perf.data
+	 * file will be supported by older perf tools, with these new optional
+	 * fields being ignored.
+	 *
 	 * The MMAP events record the PROT_EXEC mappings so that we can
 	 * correlate userspace IPs to code. They have the following structure:
 	 *
@@ -578,6 +588,10 @@
 struct pmu {
 	struct list_head		entry;
 
+	struct device			*dev;
+	char				*name;
+	int				type;
+
 	int * __percpu			pmu_disable_count;
 	struct perf_cpu_context * __percpu pmu_cpu_context;
 	int				task_ctx_nr;
@@ -758,6 +772,9 @@
 	u64				shadow_ctx_time;
 
 	struct perf_event_attr		attr;
+	u16				header_size;
+	u16				id_header_size;
+	u16				read_size;
 	struct hw_perf_event		hw;
 
 	struct perf_event_context	*ctx;
@@ -887,6 +904,7 @@
 	int				exclusive;
 	struct list_head		rotation_list;
 	int				jiffies_interval;
+	struct pmu			*active_pmu;
 };
 
 struct perf_output_handle {
@@ -902,7 +920,7 @@
 
 #ifdef CONFIG_PERF_EVENTS
 
-extern int perf_pmu_register(struct pmu *pmu);
+extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
 extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern int perf_num_counters(void);
@@ -969,6 +987,11 @@
 				 struct perf_sample_data *data,
 				 struct pt_regs *regs);
 
+static inline bool is_sampling_event(struct perf_event *event)
+{
+	return event->attr.sample_period != 0;
+}
+
 /*
  * Return 1 for a software event, 0 for a hardware event
  */
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 3ec2358..d19f1cc 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -77,7 +77,8 @@
 
 static inline bool pm_runtime_suspended(struct device *dev)
 {
-	return dev->power.runtime_status == RPM_SUSPENDED;
+	return dev->power.runtime_status == RPM_SUSPENDED
+		&& !dev->power.disable_depth;
 }
 
 static inline void pm_runtime_mark_last_busy(struct device *dev)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c79e92..a99d735 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -143,7 +143,7 @@
 extern unsigned long this_cpu_load(void);
 
 
-extern void calc_global_load(void);
+extern void calc_global_load(unsigned long ticks);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
@@ -316,6 +316,7 @@
 				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
+void lockup_detector_init(void);
 #else
 static inline void touch_softlockup_watchdog(void)
 {
@@ -326,6 +327,9 @@
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
+static inline void lockup_detector_init(void)
+{
+}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h
index 942e387..eba52a1 100644
--- a/include/linux/ssb/ssb_driver_gige.h
+++ b/include/linux/ssb/ssb_driver_gige.h
@@ -96,16 +96,21 @@
 	return 0;
 }
 
-extern char * nvram_get(const char *name);
+#ifdef CONFIG_BCM47XX
+#include <asm/mach-bcm47xx/nvram.h>
 /* Get the device MAC address */
 static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
 {
-#ifdef CONFIG_BCM47XX
-	char *res = nvram_get("et0macaddr");
-	if (res)
-		memcpy(macaddr, res, 6);
-#endif
+	char buf[20];
+	if (nvram_getenv("et0macaddr", buf, sizeof(buf)) < 0)
+		return;
+	nvram_parse_macaddr(buf, macaddr);
 }
+#else
+static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
+{
+}
+#endif
 
 extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev,
 					  struct pci_dev *pdev);
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 51efbef..25310f1 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -2,6 +2,7 @@
 #define __LINUX_STACKTRACE_H
 
 struct task_struct;
+struct pt_regs;
 
 #ifdef CONFIG_STACKTRACE
 struct task_struct;
@@ -13,7 +14,8 @@
 };
 
 extern void save_stack_trace(struct stack_trace *trace);
-extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp);
+extern void save_stack_trace_regs(struct stack_trace *trace,
+				  struct pt_regs *regs);
 extern void save_stack_trace_tsk(struct task_struct *tsk,
 				struct stack_trace *trace);
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cacc27a..18cd068 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -127,8 +127,6 @@
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
 	static struct syscall_metadata					\
 	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
-	static struct ftrace_event_call					\
-	__attribute__((__aligned__(4))) event_enter_##sname;		\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
@@ -137,13 +135,12 @@
 		.class			= &event_class_syscall_enter,	\
 		.event.funcs            = &enter_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-	}
+	};								\
+	__TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
 	static struct syscall_metadata					\
 	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
-	static struct ftrace_event_call					\
-	__attribute__((__aligned__(4))) event_exit_##sname;		\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
@@ -152,7 +149,8 @@
 		.class			= &event_class_syscall_exit,	\
 		.event.funcs		= &exit_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-	}
+	};								\
+	__TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_METADATA(sname, nb)				\
 	SYSCALL_TRACE_ENTER_EVENT(sname);			\
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a4a90b6..d3e4f87 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -106,6 +106,7 @@
 
 #define TP_PROTO(args...)	args
 #define TP_ARGS(args...)	args
+#define TP_CONDITION(args...)	args
 
 #ifdef CONFIG_TRACEPOINTS
 
@@ -119,12 +120,14 @@
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args)					\
+#define __DO_TRACE(tp, proto, args, cond)				\
 	do {								\
 		struct tracepoint_func *it_func_ptr;			\
 		void *it_func;						\
 		void *__data;						\
 									\
+		if (!(cond))						\
+			return;						\
 		rcu_read_lock_sched_notrace();				\
 		it_func_ptr = rcu_dereference_sched((tp)->funcs);	\
 		if (it_func_ptr) {					\
@@ -142,7 +145,7 @@
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)	\
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)	\
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
@@ -151,7 +154,8 @@
 do_trace:								\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TP_PROTO(data_proto),			\
-				TP_ARGS(data_args));			\
+				TP_ARGS(data_args),			\
+				TP_CONDITION(cond));			\
 	}								\
 	static inline int						\
 	register_trace_##name(void (*probe)(data_proto), void *data)	\
@@ -186,7 +190,7 @@
 	EXPORT_SYMBOL(__tracepoint_##name)
 
 #else /* !CONFIG_TRACEPOINTS */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)	\
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)	\
 	static inline void trace_##name(proto)				\
 	{ }								\
 	static inline int						\
@@ -227,13 +231,20 @@
  * "void *__data, proto" as the callback prototype.
  */
 #define DECLARE_TRACE_NOARGS(name)					\
-		__DECLARE_TRACE(name, void, , void *__data, __data)
+		__DECLARE_TRACE(name, void, , 1, void *__data, __data)
 
 #define DECLARE_TRACE(name, proto, args)				\
-		__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),	\
+		__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1,	\
 				PARAMS(void *__data, proto),		\
 				PARAMS(__data, args))
 
+#define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
+			PARAMS(void *__data, proto),			\
+			PARAMS(__data, args))
+
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #endif /* DECLARE_TRACE */
 
 #ifndef TRACE_EVENT
@@ -347,11 +358,21 @@
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_CONDITION(template, name, proto,		\
+			       args, cond)			\
+	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
+				PARAMS(args), PARAMS(cond))
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,		\
 		assign, print, reg, unreg)			\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
+			      struct, assign, print)		\
+	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
+				PARAMS(args), PARAMS(cond))
+
+#define TRACE_EVENT_FLAGS(event, flag)
 
 #endif /* ifdef TRACE_EVENT (see note above) */
diff --git a/include/media/saa7146.h b/include/media/saa7146.h
index 7a9f76e..ac7ce00 100644
--- a/include/media/saa7146.h
+++ b/include/media/saa7146.h
@@ -161,7 +161,7 @@
 extern struct mutex saa7146_devices_lock;
 int saa7146_register_extension(struct saa7146_extension*);
 int saa7146_unregister_extension(struct saa7146_extension*);
-struct saa7146_format* format_by_fourcc(struct saa7146_dev *dev, int fourcc);
+struct saa7146_format* saa7146_format_by_fourcc(struct saa7146_dev *dev, int fourcc);
 int saa7146_pgtable_alloc(struct pci_dev *pci, struct saa7146_pgtable *pt);
 void saa7146_pgtable_free(struct pci_dev *pci, struct saa7146_pgtable *pt);
 int saa7146_pgtable_build_single(struct pci_dev *pci, struct saa7146_pgtable *pt, struct scatterlist *list, int length );
diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
index 6648036..b16f307 100644
--- a/include/media/v4l2-device.h
+++ b/include/media/v4l2-device.h
@@ -51,6 +51,8 @@
 			unsigned int notification, void *arg);
 	/* The control handler. May be NULL. */
 	struct v4l2_ctrl_handler *ctrl_handler;
+	/* BKL replacement mutex. Temporary solution only. */
+	struct mutex ioctl_lock;
 };
 
 /* Initialize v4l2_dev and make dev->driver_data point to v4l2_dev.
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 1dfab54..b0b4eb2 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -26,6 +26,15 @@
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
 	DEFINE_TRACE(name)
 
+#undef TRACE_EVENT_CONDITION
+#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
+	TRACE_EVENT(name,						\
+		PARAMS(proto),						\
+		PARAMS(args),						\
+		PARAMS(tstruct),					\
+		PARAMS(assign),						\
+		PARAMS(print))
+
 #undef TRACE_EVENT_FN
 #define TRACE_EVENT_FN(name, proto, args, tstruct,		\
 		assign, print, reg, unreg)			\
@@ -39,6 +48,10 @@
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DEFINE_TRACE(name)
 
+#undef DEFINE_EVENT_CONDITION
+#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
@@ -75,9 +88,11 @@
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
+#undef TRACE_EVENT_CONDITION
 #undef DECLARE_EVENT_CLASS
 #undef DEFINE_EVENT
 #undef DEFINE_EVENT_PRINT
+#undef DEFINE_EVENT_CONDITION
 #undef TRACE_HEADER_MULTI_READ
 #undef DECLARE_TRACE
 
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index fb726ac..5a4c04a 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -40,6 +40,8 @@
 	syscall_regfunc, syscall_unregfunc
 );
 
+TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY)
+
 TRACE_EVENT_FN(sys_exit,
 
 	TP_PROTO(struct pt_regs *regs, long ret),
@@ -62,6 +64,8 @@
 	syscall_regfunc, syscall_unregfunc
 );
 
+TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY)
+
 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
 
 #endif /* _TRACE_EVENTS_SYSCALLS_H */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a9377c0..e16610c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -82,6 +82,10 @@
 	TRACE_EVENT(name, PARAMS(proto), PARAMS(args),			\
 		PARAMS(tstruct), PARAMS(assign), PARAMS(print))		\
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(name, value)					\
+	__TRACE_EVENT_FLAGS(name, value)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 
@@ -129,6 +133,9 @@
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -289,13 +296,19 @@
 
 #undef __array
 #define __array(type, item, len)					\
-	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
-	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+	do {								\
+		mutex_lock(&event_storage_mutex);			\
+		BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);			\
+		snprintf(event_storage, sizeof(event_storage),		\
+			 "%s[%d]", #type, len);				\
+		ret = trace_define_field(event_call, event_storage, #item, \
 				 offsetof(typeof(field), item),		\
 				 sizeof(field.item),			\
 				 is_signed_type(type), FILTER_OTHER);	\
-	if (ret)							\
-		return ret;
+		mutex_unlock(&event_storage_mutex);			\
+		if (ret)						\
+			return ret;					\
+	} while (0);
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)				       \
diff --git a/init/main.c b/init/main.c
index 8646401..ea51770 100644
--- a/init/main.c
+++ b/init/main.c
@@ -67,6 +67,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
+#include <linux/perf_event.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -603,6 +604,8 @@
 				"enabled *very* early, fixing it\n");
 		local_irq_disable();
 	}
+	idr_init_cache();
+	perf_event_init();
 	rcu_init();
 	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
@@ -658,7 +661,6 @@
 	enable_debug_pagealloc();
 	kmemleak_init();
 	debug_objects_mem_init();
-	idr_init_cache();
 	setup_per_cpu_pageset();
 	numa_policy_init();
 	if (late_time_init)
@@ -882,6 +884,7 @@
 	smp_prepare_cpus(setup_max_cpus);
 
 	do_pre_smp_initcalls();
+	lockup_detector_init();
 
 	smp_init();
 	sched_init_smp();
diff --git a/kernel/fork.c b/kernel/fork.c
index 3b159c5..5447dc7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -273,6 +273,7 @@
 
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
+	clear_tsk_need_resched(tsk);
 	stackend = end_of_stack(tsk);
 	*stackend = STACK_END_MAGIC;	/* for overflow detection */
 
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index e532582..086adf2 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -641,7 +641,7 @@
 
 	constraints_initialized = 1;
 
-	perf_pmu_register(&perf_breakpoint);
+	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
 
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9737a76..7663e5d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -354,13 +354,20 @@
 	return p->pre_handler == aggr_pre_handler;
 }
 
+/* Return true(!0) if the kprobe is unused */
+static inline int kprobe_unused(struct kprobe *p)
+{
+	return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
+	       list_empty(&p->list);
+}
+
 /*
  * Keep all fields in the kprobe consistent
  */
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
 {
-	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
-	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+	memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
+	memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
 }
 
 #ifdef CONFIG_OPTPROBES
@@ -384,6 +391,17 @@
 	}
 }
 
+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	arch_remove_optimized_kprobe(op);
+	arch_remove_kprobe(p);
+	kfree(op);
+}
+
 /* Return true(!0) if the kprobe is ready for optimization. */
 static inline int kprobe_optready(struct kprobe *p)
 {
@@ -397,6 +415,33 @@
 	return 0;
 }
 
+/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
+static inline int kprobe_disarmed(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	/* If kprobe is not aggr/opt probe, just return kprobe is disabled */
+	if (!kprobe_aggrprobe(p))
+		return kprobe_disabled(p);
+
+	op = container_of(p, struct optimized_kprobe, kp);
+
+	return kprobe_disabled(p) && list_empty(&op->list);
+}
+
+/* Return true(!0) if the probe is queued on (un)optimizing lists */
+static int __kprobes kprobe_queued(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	if (kprobe_aggrprobe(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		if (!list_empty(&op->list))
+			return 1;
+	}
+	return 0;
+}
+
 /*
  * Return an optimized kprobe whose optimizing code replaces
  * instructions including addr (exclude breakpoint).
@@ -422,30 +467,23 @@
 
 /* Optimization staging list, protected by kprobe_mutex */
 static LIST_HEAD(optimizing_list);
+static LIST_HEAD(unoptimizing_list);
 
 static void kprobe_optimizer(struct work_struct *work);
 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+static DECLARE_COMPLETION(optimizer_comp);
 #define OPTIMIZE_DELAY 5
 
-/* Kprobe jump optimizer */
-static __kprobes void kprobe_optimizer(struct work_struct *work)
+/*
+ * Optimize (replace a breakpoint with a jump) kprobes listed on
+ * optimizing_list.
+ */
+static __kprobes void do_optimize_kprobes(void)
 {
-	struct optimized_kprobe *op, *tmp;
-
-	/* Lock modules while optimizing kprobes */
-	mutex_lock(&module_mutex);
-	mutex_lock(&kprobe_mutex);
-	if (kprobes_all_disarmed || !kprobes_allow_optimization)
-		goto end;
-
-	/*
-	 * Wait for quiesence period to ensure all running interrupts
-	 * are done. Because optprobe may modify multiple instructions
-	 * there is a chance that Nth instruction is interrupted. In that
-	 * case, running interrupt can return to 2nd-Nth byte of jump
-	 * instruction. This wait is for avoiding it.
-	 */
-	synchronize_sched();
+	/* Optimization never be done when disarmed */
+	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+	    list_empty(&optimizing_list))
+		return;
 
 	/*
 	 * The optimization/unoptimization refers online_cpus via
@@ -459,17 +497,111 @@
 	 */
 	get_online_cpus();
 	mutex_lock(&text_mutex);
-	list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
-		WARN_ON(kprobe_disabled(&op->kp));
-		if (arch_optimize_kprobe(op) < 0)
-			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
-		list_del_init(&op->list);
+	arch_optimize_kprobes(&optimizing_list);
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
+}
+
+/*
+ * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
+ * if need) kprobes listed on unoptimizing_list.
+ */
+static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
+{
+	struct optimized_kprobe *op, *tmp;
+
+	/* Unoptimization must be done anytime */
+	if (list_empty(&unoptimizing_list))
+		return;
+
+	/* Ditto to do_optimize_kprobes */
+	get_online_cpus();
+	mutex_lock(&text_mutex);
+	arch_unoptimize_kprobes(&unoptimizing_list, free_list);
+	/* Loop free_list for disarming */
+	list_for_each_entry_safe(op, tmp, free_list, list) {
+		/* Disarm probes if marked disabled */
+		if (kprobe_disabled(&op->kp))
+			arch_disarm_kprobe(&op->kp);
+		if (kprobe_unused(&op->kp)) {
+			/*
+			 * Remove unused probes from hash list. After waiting
+			 * for synchronization, these probes are reclaimed.
+			 * (reclaiming is done by do_free_cleaned_kprobes.)
+			 */
+			hlist_del_rcu(&op->kp.hlist);
+		} else
+			list_del_init(&op->list);
 	}
 	mutex_unlock(&text_mutex);
 	put_online_cpus();
-end:
+}
+
+/* Reclaim all kprobes on the free_list */
+static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
+{
+	struct optimized_kprobe *op, *tmp;
+
+	list_for_each_entry_safe(op, tmp, free_list, list) {
+		BUG_ON(!kprobe_unused(&op->kp));
+		list_del_init(&op->list);
+		free_aggr_kprobe(&op->kp);
+	}
+}
+
+/* Start optimizer after OPTIMIZE_DELAY passed */
+static __kprobes void kick_kprobe_optimizer(void)
+{
+	if (!delayed_work_pending(&optimizing_work))
+		schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+	LIST_HEAD(free_list);
+
+	/* Lock modules while optimizing kprobes */
+	mutex_lock(&module_mutex);
+	mutex_lock(&kprobe_mutex);
+
+	/*
+	 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
+	 * kprobes before waiting for quiesence period.
+	 */
+	do_unoptimize_kprobes(&free_list);
+
+	/*
+	 * Step 2: Wait for quiesence period to ensure all running interrupts
+	 * are done. Because optprobe may modify multiple instructions
+	 * there is a chance that Nth instruction is interrupted. In that
+	 * case, running interrupt can return to 2nd-Nth byte of jump
+	 * instruction. This wait is for avoiding it.
+	 */
+	synchronize_sched();
+
+	/* Step 3: Optimize kprobes after quiesence period */
+	do_optimize_kprobes();
+
+	/* Step 4: Free cleaned kprobes after quiesence period */
+	do_free_cleaned_kprobes(&free_list);
+
 	mutex_unlock(&kprobe_mutex);
 	mutex_unlock(&module_mutex);
+
+	/* Step 5: Kick optimizer again if needed */
+	if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
+		kick_kprobe_optimizer();
+	else
+		/* Wake up all waiters */
+		complete_all(&optimizer_comp);
+}
+
+/* Wait for completing optimization and unoptimization */
+static __kprobes void wait_for_kprobe_optimizer(void)
+{
+	if (delayed_work_pending(&optimizing_work))
+		wait_for_completion(&optimizer_comp);
 }
 
 /* Optimize kprobe if p is ready to be optimized */
@@ -495,28 +627,85 @@
 	/* Check if it is already optimized. */
 	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
 		return;
-
 	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
-	list_add(&op->list, &optimizing_list);
-	if (!delayed_work_pending(&optimizing_work))
-		schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+
+	if (!list_empty(&op->list))
+		/* This is under unoptimizing. Just dequeue the probe */
+		list_del_init(&op->list);
+	else {
+		list_add(&op->list, &optimizing_list);
+		kick_kprobe_optimizer();
+	}
+}
+
+/* Short cut to direct unoptimizing */
+static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+	get_online_cpus();
+	arch_unoptimize_kprobe(op);
+	put_online_cpus();
+	if (kprobe_disabled(&op->kp))
+		arch_disarm_kprobe(&op->kp);
 }
 
 /* Unoptimize a kprobe if p is optimized */
-static __kprobes void unoptimize_kprobe(struct kprobe *p)
+static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
 {
 	struct optimized_kprobe *op;
 
-	if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
-		op = container_of(p, struct optimized_kprobe, kp);
-		if (!list_empty(&op->list))
-			/* Dequeue from the optimization queue */
+	if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
+		return; /* This is not an optprobe nor optimized */
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	if (!kprobe_optimized(p)) {
+		/* Unoptimized or unoptimizing case */
+		if (force && !list_empty(&op->list)) {
+			/*
+			 * Only if this is unoptimizing kprobe and forced,
+			 * forcibly unoptimize it. (No need to unoptimize
+			 * unoptimized kprobe again :)
+			 */
 			list_del_init(&op->list);
-		else
-			/* Replace jump with break */
-			arch_unoptimize_kprobe(op);
-		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+			force_unoptimize_kprobe(op);
+		}
+		return;
 	}
+
+	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+	if (!list_empty(&op->list)) {
+		/* Dequeue from the optimization queue */
+		list_del_init(&op->list);
+		return;
+	}
+	/* Optimized kprobe case */
+	if (force)
+		/* Forcibly update the code: this is a special case */
+		force_unoptimize_kprobe(op);
+	else {
+		list_add(&op->list, &unoptimizing_list);
+		kick_kprobe_optimizer();
+	}
+}
+
+/* Cancel unoptimizing for reusing */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+	struct optimized_kprobe *op;
+
+	BUG_ON(!kprobe_unused(ap));
+	/*
+	 * Unused kprobe MUST be on the way of delayed unoptimizing (means
+	 * there is still a relative jump) and disabled.
+	 */
+	op = container_of(ap, struct optimized_kprobe, kp);
+	if (unlikely(list_empty(&op->list)))
+		printk(KERN_WARNING "Warning: found a stray unused "
+			"aggrprobe@%p\n", ap->addr);
+	/* Enable the probe again */
+	ap->flags &= ~KPROBE_FLAG_DISABLED;
+	/* Optimize it again (remove from op->list) */
+	BUG_ON(!kprobe_optready(ap));
+	optimize_kprobe(ap);
 }
 
 /* Remove optimized instructions */
@@ -525,12 +714,12 @@
 	struct optimized_kprobe *op;
 
 	op = container_of(p, struct optimized_kprobe, kp);
-	if (!list_empty(&op->list)) {
-		/* Dequeue from the optimization queue */
+	if (!list_empty(&op->list))
+		/* Dequeue from the (un)optimization queue */
 		list_del_init(&op->list);
-		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
-	}
-	/* Don't unoptimize, because the target code will be freed. */
+
+	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+	/* Don't touch the code, because it is already freed. */
 	arch_remove_optimized_kprobe(op);
 }
 
@@ -543,16 +732,6 @@
 	arch_prepare_optimized_kprobe(op);
 }
 
-/* Free optimized instructions and optimized_kprobe */
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
-{
-	struct optimized_kprobe *op;
-
-	op = container_of(p, struct optimized_kprobe, kp);
-	arch_remove_optimized_kprobe(op);
-	kfree(op);
-}
-
 /* Allocate new optimized_kprobe and try to prepare optimized instructions */
 static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
@@ -587,7 +766,8 @@
 	op = container_of(ap, struct optimized_kprobe, kp);
 	if (!arch_prepared_optinsn(&op->optinsn)) {
 		/* If failed to setup optimizing, fallback to kprobe */
-		free_aggr_kprobe(ap);
+		arch_remove_optimized_kprobe(op);
+		kfree(op);
 		return;
 	}
 
@@ -631,21 +811,16 @@
 		return;
 
 	kprobes_allow_optimization = false;
-	printk(KERN_INFO "Kprobes globally unoptimized\n");
-	get_online_cpus();	/* For avoiding text_mutex deadlock */
-	mutex_lock(&text_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist) {
 			if (!kprobe_disabled(p))
-				unoptimize_kprobe(p);
+				unoptimize_kprobe(p, false);
 		}
 	}
-
-	mutex_unlock(&text_mutex);
-	put_online_cpus();
-	/* Allow all currently running kprobes to complete */
-	synchronize_sched();
+	/* Wait for unoptimizing completion */
+	wait_for_kprobe_optimizer();
+	printk(KERN_INFO "Kprobes globally unoptimized\n");
 }
 
 int sysctl_kprobes_optimization;
@@ -669,44 +844,60 @@
 }
 #endif /* CONFIG_SYSCTL */
 
+/* Put a breakpoint for a probe. Must be called with text_mutex locked */
 static void __kprobes __arm_kprobe(struct kprobe *p)
 {
-	struct kprobe *old_p;
+	struct kprobe *_p;
 
 	/* Check collision with other optimized kprobes */
-	old_p = get_optimized_kprobe((unsigned long)p->addr);
-	if (unlikely(old_p))
-		unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+	_p = get_optimized_kprobe((unsigned long)p->addr);
+	if (unlikely(_p))
+		/* Fallback to unoptimized kprobe */
+		unoptimize_kprobe(_p, true);
 
 	arch_arm_kprobe(p);
 	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
 }
 
-static void __kprobes __disarm_kprobe(struct kprobe *p)
+/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
+static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
 {
-	struct kprobe *old_p;
+	struct kprobe *_p;
 
-	unoptimize_kprobe(p);	/* Try to unoptimize */
-	arch_disarm_kprobe(p);
+	unoptimize_kprobe(p, false);	/* Try to unoptimize */
 
-	/* If another kprobe was blocked, optimize it. */
-	old_p = get_optimized_kprobe((unsigned long)p->addr);
-	if (unlikely(old_p))
-		optimize_kprobe(old_p);
+	if (!kprobe_queued(p)) {
+		arch_disarm_kprobe(p);
+		/* If another kprobe was blocked, optimize it. */
+		_p = get_optimized_kprobe((unsigned long)p->addr);
+		if (unlikely(_p) && reopt)
+			optimize_kprobe(_p);
+	}
+	/* TODO: reoptimize others after unoptimized this probe */
 }
 
 #else /* !CONFIG_OPTPROBES */
 
 #define optimize_kprobe(p)			do {} while (0)
-#define unoptimize_kprobe(p)			do {} while (0)
+#define unoptimize_kprobe(p, f)			do {} while (0)
 #define kill_optimized_kprobe(p)		do {} while (0)
 #define prepare_optimized_kprobe(p)		do {} while (0)
 #define try_to_optimize_kprobe(p)		do {} while (0)
 #define __arm_kprobe(p)				arch_arm_kprobe(p)
-#define __disarm_kprobe(p)			arch_disarm_kprobe(p)
+#define __disarm_kprobe(p, o)			arch_disarm_kprobe(p)
+#define kprobe_disarmed(p)			kprobe_disabled(p)
+#define wait_for_kprobe_optimizer()		do {} while (0)
+
+/* There should be no unused kprobes can be reused without optimization */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+	printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
+	BUG_ON(kprobe_unused(ap));
+}
 
 static __kprobes void free_aggr_kprobe(struct kprobe *p)
 {
+	arch_remove_kprobe(p);
 	kfree(p);
 }
 
@@ -732,11 +923,10 @@
 /* Disarm a kprobe with text_mutex */
 static void __kprobes disarm_kprobe(struct kprobe *kp)
 {
-	get_online_cpus();	/* For avoiding text_mutex deadlock */
+	/* Ditto */
 	mutex_lock(&text_mutex);
-	__disarm_kprobe(kp);
+	__disarm_kprobe(kp, true);
 	mutex_unlock(&text_mutex);
-	put_online_cpus();
 }
 
 /*
@@ -942,7 +1132,7 @@
 	BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
 
 	if (p->break_handler || p->post_handler)
-		unoptimize_kprobe(ap);	/* Fall back to normal kprobe */
+		unoptimize_kprobe(ap, true);	/* Fall back to normal kprobe */
 
 	if (p->break_handler) {
 		if (ap->break_handler)
@@ -993,19 +1183,21 @@
  * This is the second or subsequent kprobe at the address - handle
  * the intricacies
  */
-static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
+static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
 					  struct kprobe *p)
 {
 	int ret = 0;
-	struct kprobe *ap = old_p;
+	struct kprobe *ap = orig_p;
 
-	if (!kprobe_aggrprobe(old_p)) {
-		/* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
-		ap = alloc_aggr_kprobe(old_p);
+	if (!kprobe_aggrprobe(orig_p)) {
+		/* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
+		ap = alloc_aggr_kprobe(orig_p);
 		if (!ap)
 			return -ENOMEM;
-		init_aggr_kprobe(ap, old_p);
-	}
+		init_aggr_kprobe(ap, orig_p);
+	} else if (kprobe_unused(ap))
+		/* This probe is going to die. Rescue it */
+		reuse_unused_kprobe(ap);
 
 	if (kprobe_gone(ap)) {
 		/*
@@ -1039,23 +1231,6 @@
 	return add_new_kprobe(ap, p);
 }
 
-/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
-static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
-{
-	struct kprobe *kp;
-
-	list_for_each_entry_rcu(kp, &p->list, list) {
-		if (!kprobe_disabled(kp))
-			/*
-			 * There is an active probe on the list.
-			 * We can't disable aggr_kprobe.
-			 */
-			return 0;
-	}
-	p->flags |= KPROBE_FLAG_DISABLED;
-	return 1;
-}
-
 static int __kprobes in_kprobes_functions(unsigned long addr)
 {
 	struct kprobe_blackpoint *kb;
@@ -1098,34 +1273,33 @@
 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
 static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
 {
-	struct kprobe *old_p, *list_p;
+	struct kprobe *ap, *list_p;
 
-	old_p = get_kprobe(p->addr);
-	if (unlikely(!old_p))
+	ap = get_kprobe(p->addr);
+	if (unlikely(!ap))
 		return NULL;
 
-	if (p != old_p) {
-		list_for_each_entry_rcu(list_p, &old_p->list, list)
+	if (p != ap) {
+		list_for_each_entry_rcu(list_p, &ap->list, list)
 			if (list_p == p)
 			/* kprobe p is a valid probe */
 				goto valid;
 		return NULL;
 	}
 valid:
-	return old_p;
+	return ap;
 }
 
 /* Return error if the kprobe is being re-registered */
 static inline int check_kprobe_rereg(struct kprobe *p)
 {
 	int ret = 0;
-	struct kprobe *old_p;
 
 	mutex_lock(&kprobe_mutex);
-	old_p = __get_valid_kprobe(p);
-	if (old_p)
+	if (__get_valid_kprobe(p))
 		ret = -EINVAL;
 	mutex_unlock(&kprobe_mutex);
+
 	return ret;
 }
 
@@ -1229,67 +1403,121 @@
 }
 EXPORT_SYMBOL_GPL(register_kprobe);
 
+/* Check if all probes on the aggrprobe are disabled */
+static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
+{
+	struct kprobe *kp;
+
+	list_for_each_entry_rcu(kp, &ap->list, list)
+		if (!kprobe_disabled(kp))
+			/*
+			 * There is an active probe on the list.
+			 * We can't disable this ap.
+			 */
+			return 0;
+
+	return 1;
+}
+
+/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
+static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
+{
+	struct kprobe *orig_p;
+
+	/* Get an original kprobe for return */
+	orig_p = __get_valid_kprobe(p);
+	if (unlikely(orig_p == NULL))
+		return NULL;
+
+	if (!kprobe_disabled(p)) {
+		/* Disable probe if it is a child probe */
+		if (p != orig_p)
+			p->flags |= KPROBE_FLAG_DISABLED;
+
+		/* Try to disarm and disable this/parent probe */
+		if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
+			disarm_kprobe(orig_p);
+			orig_p->flags |= KPROBE_FLAG_DISABLED;
+		}
+	}
+
+	return orig_p;
+}
+
 /*
  * Unregister a kprobe without a scheduler synchronization.
  */
 static int __kprobes __unregister_kprobe_top(struct kprobe *p)
 {
-	struct kprobe *old_p, *list_p;
+	struct kprobe *ap, *list_p;
 
-	old_p = __get_valid_kprobe(p);
-	if (old_p == NULL)
+	/* Disable kprobe. This will disarm it if needed. */
+	ap = __disable_kprobe(p);
+	if (ap == NULL)
 		return -EINVAL;
 
-	if (old_p == p ||
-	    (kprobe_aggrprobe(old_p) &&
-	     list_is_singular(&old_p->list))) {
+	if (ap == p)
 		/*
-		 * Only probe on the hash list. Disarm only if kprobes are
-		 * enabled and not gone - otherwise, the breakpoint would
-		 * already have been removed. We save on flushing icache.
+		 * This probe is an independent(and non-optimized) kprobe
+		 * (not an aggrprobe). Remove from the hash list.
 		 */
-		if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
-			disarm_kprobe(old_p);
-		hlist_del_rcu(&old_p->hlist);
-	} else {
+		goto disarmed;
+
+	/* Following process expects this probe is an aggrprobe */
+	WARN_ON(!kprobe_aggrprobe(ap));
+
+	if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
+		/*
+		 * !disarmed could be happen if the probe is under delayed
+		 * unoptimizing.
+		 */
+		goto disarmed;
+	else {
+		/* If disabling probe has special handlers, update aggrprobe */
 		if (p->break_handler && !kprobe_gone(p))
-			old_p->break_handler = NULL;
+			ap->break_handler = NULL;
 		if (p->post_handler && !kprobe_gone(p)) {
-			list_for_each_entry_rcu(list_p, &old_p->list, list) {
+			list_for_each_entry_rcu(list_p, &ap->list, list) {
 				if ((list_p != p) && (list_p->post_handler))
 					goto noclean;
 			}
-			old_p->post_handler = NULL;
+			ap->post_handler = NULL;
 		}
 noclean:
+		/*
+		 * Remove from the aggrprobe: this path will do nothing in
+		 * __unregister_kprobe_bottom().
+		 */
 		list_del_rcu(&p->list);
-		if (!kprobe_disabled(old_p)) {
-			try_to_disable_aggr_kprobe(old_p);
-			if (!kprobes_all_disarmed) {
-				if (kprobe_disabled(old_p))
-					disarm_kprobe(old_p);
-				else
-					/* Try to optimize this probe again */
-					optimize_kprobe(old_p);
-			}
-		}
+		if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
+			/*
+			 * Try to optimize this probe again, because post
+			 * handler may have been changed.
+			 */
+			optimize_kprobe(ap);
 	}
 	return 0;
+
+disarmed:
+	BUG_ON(!kprobe_disarmed(ap));
+	hlist_del_rcu(&ap->hlist);
+	return 0;
 }
 
 static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
 {
-	struct kprobe *old_p;
+	struct kprobe *ap;
 
 	if (list_empty(&p->list))
+		/* This is an independent kprobe */
 		arch_remove_kprobe(p);
 	else if (list_is_singular(&p->list)) {
-		/* "p" is the last child of an aggr_kprobe */
-		old_p = list_entry(p->list.next, struct kprobe, list);
+		/* This is the last child of an aggrprobe */
+		ap = list_entry(p->list.next, struct kprobe, list);
 		list_del(&p->list);
-		arch_remove_kprobe(old_p);
-		free_aggr_kprobe(old_p);
+		free_aggr_kprobe(ap);
 	}
+	/* Otherwise, do nothing. */
 }
 
 int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1607,29 +1835,13 @@
 int __kprobes disable_kprobe(struct kprobe *kp)
 {
 	int ret = 0;
-	struct kprobe *p;
 
 	mutex_lock(&kprobe_mutex);
 
-	/* Check whether specified probe is valid. */
-	p = __get_valid_kprobe(kp);
-	if (unlikely(p == NULL)) {
+	/* Disable this kprobe */
+	if (__disable_kprobe(kp) == NULL)
 		ret = -EINVAL;
-		goto out;
-	}
 
-	/* If the probe is already disabled (or gone), just return */
-	if (kprobe_disabled(kp))
-		goto out;
-
-	kp->flags |= KPROBE_FLAG_DISABLED;
-	if (p != kp)
-		/* When kp != p, p is always enabled. */
-		try_to_disable_aggr_kprobe(p);
-
-	if (!kprobes_all_disarmed && kprobe_disabled(p))
-		disarm_kprobe(p);
-out:
 	mutex_unlock(&kprobe_mutex);
 	return ret;
 }
@@ -1927,36 +2139,27 @@
 	mutex_lock(&kprobe_mutex);
 
 	/* If kprobes are already disarmed, just return */
-	if (kprobes_all_disarmed)
-		goto already_disabled;
+	if (kprobes_all_disarmed) {
+		mutex_unlock(&kprobe_mutex);
+		return;
+	}
 
 	kprobes_all_disarmed = true;
 	printk(KERN_INFO "Kprobes globally disabled\n");
 
-	/*
-	 * Here we call get_online_cpus() for avoiding text_mutex deadlock,
-	 * because disarming may also unoptimize kprobes.
-	 */
-	get_online_cpus();
 	mutex_lock(&text_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist) {
 			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
-				__disarm_kprobe(p);
+				__disarm_kprobe(p, false);
 		}
 	}
-
 	mutex_unlock(&text_mutex);
-	put_online_cpus();
 	mutex_unlock(&kprobe_mutex);
-	/* Allow all currently running kprobes to complete */
-	synchronize_sched();
-	return;
 
-already_disabled:
-	mutex_unlock(&kprobe_mutex);
-	return;
+	/* Wait for disarming all kprobes by optimizer */
+	wait_for_kprobe_optimizer();
 }
 
 /*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index eac7e33..11847bf 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
+#include <linux/idr.h>
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
@@ -21,7 +22,9 @@
 #include <linux/dcache.h>
 #include <linux/percpu.h>
 #include <linux/ptrace.h>
+#include <linux/reboot.h>
 #include <linux/vmstat.h>
+#include <linux/device.h>
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
@@ -133,6 +136,28 @@
 	}
 }
 
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_tgid_nr_ns(p, event->ns);
+}
+
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_pid_nr_ns(p, event->ns);
+}
+
 /*
  * If we inherit events we want to return the parent event id
  * to userspace.
@@ -312,9 +337,84 @@
 		ctx->nr_stat++;
 }
 
+/*
+ * Called at perf_event creation and when events are attached/detached from a
+ * group.
+ */
+static void perf_event__read_size(struct perf_event *event)
+{
+	int entry = sizeof(u64); /* value */
+	int size = 0;
+	int nr = 1;
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_ID)
+		entry += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_GROUP) {
+		nr += event->group_leader->nr_siblings;
+		size += sizeof(u64);
+	}
+
+	size += entry * nr;
+	event->read_size = size;
+}
+
+static void perf_event__header_size(struct perf_event *event)
+{
+	struct perf_sample_data *data;
+	u64 sample_type = event->attr.sample_type;
+	u16 size = 0;
+
+	perf_event__read_size(event);
+
+	if (sample_type & PERF_SAMPLE_IP)
+		size += sizeof(data->ip);
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		size += sizeof(data->addr);
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		size += sizeof(data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		size += event->read_size;
+
+	event->header_size = size;
+}
+
+static void perf_event__id_header_size(struct perf_event *event)
+{
+	struct perf_sample_data *data;
+	u64 sample_type = event->attr.sample_type;
+	u16 size = 0;
+
+	if (sample_type & PERF_SAMPLE_TID)
+		size += sizeof(data->tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		size += sizeof(data->time);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		size += sizeof(data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		size += sizeof(data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		size += sizeof(data->cpu_entry);
+
+	event->id_header_size = size;
+}
+
 static void perf_group_attach(struct perf_event *event)
 {
-	struct perf_event *group_leader = event->group_leader;
+	struct perf_event *group_leader = event->group_leader, *pos;
 
 	/*
 	 * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +433,11 @@
 
 	list_add_tail(&event->group_entry, &group_leader->sibling_list);
 	group_leader->nr_siblings++;
+
+	perf_event__header_size(group_leader);
+
+	list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
+		perf_event__header_size(pos);
 }
 
 /*
@@ -391,7 +496,7 @@
 	if (event->group_leader != event) {
 		list_del_init(&event->group_entry);
 		event->group_leader->nr_siblings--;
-		return;
+		goto out;
 	}
 
 	if (!list_empty(&event->group_entry))
@@ -410,6 +515,12 @@
 		/* Inherit group flags from the previous leader */
 		sibling->group_flags = event->group_flags;
 	}
+
+out:
+	perf_event__header_size(event->group_leader);
+
+	list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
+		perf_event__header_size(tmp);
 }
 
 static inline int
@@ -1073,7 +1184,7 @@
 	/*
 	 * not supported on inherited events
 	 */
-	if (event->attr.inherit)
+	if (event->attr.inherit || !is_sampling_event(event))
 		return -EINVAL;
 
 	atomic_add(refresh, &event->event_limit);
@@ -2289,31 +2400,6 @@
 	return perf_event_release_kernel(event);
 }
 
-static int perf_event_read_size(struct perf_event *event)
-{
-	int entry = sizeof(u64); /* value */
-	int size = 0;
-	int nr = 1;
-
-	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		size += sizeof(u64);
-
-	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		size += sizeof(u64);
-
-	if (event->attr.read_format & PERF_FORMAT_ID)
-		entry += sizeof(u64);
-
-	if (event->attr.read_format & PERF_FORMAT_GROUP) {
-		nr += event->group_leader->nr_siblings;
-		size += sizeof(u64);
-	}
-
-	size += entry * nr;
-
-	return size;
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
@@ -2428,7 +2514,7 @@
 	if (event->state == PERF_EVENT_STATE_ERROR)
 		return 0;
 
-	if (count < perf_event_read_size(event))
+	if (count < event->read_size)
 		return -ENOSPC;
 
 	WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -2514,7 +2600,7 @@
 	int ret = 0;
 	u64 value;
 
-	if (!event->attr.sample_period)
+	if (!is_sampling_event(event))
 		return -EINVAL;
 
 	if (copy_from_user(&value, arg, sizeof(value)))
@@ -3305,6 +3391,73 @@
 	} while (len);
 }
 
+static void __perf_event_header__init_id(struct perf_event_header *header,
+					 struct perf_sample_data *data,
+					 struct perf_event *event)
+{
+	u64 sample_type = event->attr.sample_type;
+
+	data->type = sample_type;
+	header->size += event->id_header_size;
+
+	if (sample_type & PERF_SAMPLE_TID) {
+		/* namespace issues */
+		data->tid_entry.pid = perf_event_pid(event, current);
+		data->tid_entry.tid = perf_event_tid(event, current);
+	}
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		data->time = perf_clock();
+
+	if (sample_type & PERF_SAMPLE_ID)
+		data->id = primary_event_id(event);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		data->stream_id = event->id;
+
+	if (sample_type & PERF_SAMPLE_CPU) {
+		data->cpu_entry.cpu	 = raw_smp_processor_id();
+		data->cpu_entry.reserved = 0;
+	}
+}
+
+static void perf_event_header__init_id(struct perf_event_header *header,
+				       struct perf_sample_data *data,
+				       struct perf_event *event)
+{
+	if (event->attr.sample_id_all)
+		__perf_event_header__init_id(header, data, event);
+}
+
+static void __perf_event__output_id_sample(struct perf_output_handle *handle,
+					   struct perf_sample_data *data)
+{
+	u64 sample_type = data->type;
+
+	if (sample_type & PERF_SAMPLE_TID)
+		perf_output_put(handle, data->tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		perf_output_put(handle, data->time);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(handle, data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		perf_output_put(handle, data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		perf_output_put(handle, data->cpu_entry);
+}
+
+static void perf_event__output_id_sample(struct perf_event *event,
+					 struct perf_output_handle *handle,
+					 struct perf_sample_data *sample)
+{
+	if (event->attr.sample_id_all)
+		__perf_event__output_id_sample(handle, sample);
+}
+
 int perf_output_begin(struct perf_output_handle *handle,
 		      struct perf_event *event, unsigned int size,
 		      int nmi, int sample)
@@ -3312,6 +3465,7 @@
 	struct perf_buffer *buffer;
 	unsigned long tail, offset, head;
 	int have_lost;
+	struct perf_sample_data sample_data;
 	struct {
 		struct perf_event_header header;
 		u64			 id;
@@ -3338,8 +3492,12 @@
 		goto out;
 
 	have_lost = local_read(&buffer->lost);
-	if (have_lost)
-		size += sizeof(lost_event);
+	if (have_lost) {
+		lost_event.header.size = sizeof(lost_event);
+		perf_event_header__init_id(&lost_event.header, &sample_data,
+					   event);
+		size += lost_event.header.size;
+	}
 
 	perf_output_get_handle(handle);
 
@@ -3370,11 +3528,11 @@
 	if (have_lost) {
 		lost_event.header.type = PERF_RECORD_LOST;
 		lost_event.header.misc = 0;
-		lost_event.header.size = sizeof(lost_event);
 		lost_event.id          = event->id;
 		lost_event.lost        = local_xchg(&buffer->lost, 0);
 
 		perf_output_put(handle, lost_event);
+		perf_event__output_id_sample(event, handle, &sample_data);
 	}
 
 	return 0;
@@ -3407,28 +3565,6 @@
 	rcu_read_unlock();
 }
 
-static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
-{
-	/*
-	 * only top level events have the pid namespace they were created in
-	 */
-	if (event->parent)
-		event = event->parent;
-
-	return task_tgid_nr_ns(p, event->ns);
-}
-
-static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
-{
-	/*
-	 * only top level events have the pid namespace they were created in
-	 */
-	if (event->parent)
-		event = event->parent;
-
-	return task_pid_nr_ns(p, event->ns);
-}
-
 static void perf_output_read_one(struct perf_output_handle *handle,
 				 struct perf_event *event,
 				 u64 enabled, u64 running)
@@ -3603,62 +3739,17 @@
 {
 	u64 sample_type = event->attr.sample_type;
 
-	data->type = sample_type;
-
 	header->type = PERF_RECORD_SAMPLE;
-	header->size = sizeof(*header);
+	header->size = sizeof(*header) + event->header_size;
 
 	header->misc = 0;
 	header->misc |= perf_misc_flags(regs);
 
-	if (sample_type & PERF_SAMPLE_IP) {
+	__perf_event_header__init_id(header, data, event);
+
+	if (sample_type & PERF_SAMPLE_IP)
 		data->ip = perf_instruction_pointer(regs);
 
-		header->size += sizeof(data->ip);
-	}
-
-	if (sample_type & PERF_SAMPLE_TID) {
-		/* namespace issues */
-		data->tid_entry.pid = perf_event_pid(event, current);
-		data->tid_entry.tid = perf_event_tid(event, current);
-
-		header->size += sizeof(data->tid_entry);
-	}
-
-	if (sample_type & PERF_SAMPLE_TIME) {
-		data->time = perf_clock();
-
-		header->size += sizeof(data->time);
-	}
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		header->size += sizeof(data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID) {
-		data->id = primary_event_id(event);
-
-		header->size += sizeof(data->id);
-	}
-
-	if (sample_type & PERF_SAMPLE_STREAM_ID) {
-		data->stream_id = event->id;
-
-		header->size += sizeof(data->stream_id);
-	}
-
-	if (sample_type & PERF_SAMPLE_CPU) {
-		data->cpu_entry.cpu		= raw_smp_processor_id();
-		data->cpu_entry.reserved	= 0;
-
-		header->size += sizeof(data->cpu_entry);
-	}
-
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		header->size += sizeof(data->period);
-
-	if (sample_type & PERF_SAMPLE_READ)
-		header->size += perf_event_read_size(event);
-
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size = 1;
 
@@ -3722,23 +3813,26 @@
 			struct task_struct *task)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data sample;
 	struct perf_read_event read_event = {
 		.header = {
 			.type = PERF_RECORD_READ,
 			.misc = 0,
-			.size = sizeof(read_event) + perf_event_read_size(event),
+			.size = sizeof(read_event) + event->read_size,
 		},
 		.pid = perf_event_pid(event, task),
 		.tid = perf_event_tid(event, task),
 	};
 	int ret;
 
+	perf_event_header__init_id(&read_event.header, &sample, event);
 	ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
 	if (ret)
 		return;
 
 	perf_output_put(&handle, read_event);
 	perf_output_read(&handle, event);
+	perf_event__output_id_sample(event, &handle, &sample);
 
 	perf_output_end(&handle);
 }
@@ -3768,14 +3862,16 @@
 				     struct perf_task_event *task_event)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data	sample;
 	struct task_struct *task = task_event->task;
-	int size, ret;
+	int ret, size = task_event->event_id.header.size;
 
-	size  = task_event->event_id.header.size;
-	ret = perf_output_begin(&handle, event, size, 0, 0);
+	perf_event_header__init_id(&task_event->event_id.header, &sample, event);
 
+	ret = perf_output_begin(&handle, event,
+				task_event->event_id.header.size, 0, 0);
 	if (ret)
-		return;
+		goto out;
 
 	task_event->event_id.pid = perf_event_pid(event, task);
 	task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3785,7 +3881,11 @@
 
 	perf_output_put(&handle, task_event->event_id);
 
+	perf_event__output_id_sample(event, &handle, &sample);
+
 	perf_output_end(&handle);
+out:
+	task_event->event_id.header.size = size;
 }
 
 static int perf_event_task_match(struct perf_event *event)
@@ -3824,6 +3924,8 @@
 	rcu_read_lock();
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+		if (cpuctx->active_pmu != pmu)
+			goto next;
 		perf_event_task_ctx(&cpuctx->ctx, task_event);
 
 		ctx = task_event->task_ctx;
@@ -3898,11 +4000,16 @@
 				     struct perf_comm_event *comm_event)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data sample;
 	int size = comm_event->event_id.header.size;
-	int ret = perf_output_begin(&handle, event, size, 0, 0);
+	int ret;
+
+	perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
+	ret = perf_output_begin(&handle, event,
+				comm_event->event_id.header.size, 0, 0);
 
 	if (ret)
-		return;
+		goto out;
 
 	comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
 	comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
@@ -3910,7 +4017,12 @@
 	perf_output_put(&handle, comm_event->event_id);
 	perf_output_copy(&handle, comm_event->comm,
 				   comm_event->comm_size);
+
+	perf_event__output_id_sample(event, &handle, &sample);
+
 	perf_output_end(&handle);
+out:
+	comm_event->event_id.header.size = size;
 }
 
 static int perf_event_comm_match(struct perf_event *event)
@@ -3955,10 +4067,11 @@
 	comm_event->comm_size = size;
 
 	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+		if (cpuctx->active_pmu != pmu)
+			goto next;
 		perf_event_comm_ctx(&cpuctx->ctx, comm_event);
 
 		ctxn = pmu->task_ctx_nr;
@@ -4034,11 +4147,15 @@
 				     struct perf_mmap_event *mmap_event)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data sample;
 	int size = mmap_event->event_id.header.size;
-	int ret = perf_output_begin(&handle, event, size, 0, 0);
+	int ret;
 
+	perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
+	ret = perf_output_begin(&handle, event,
+				mmap_event->event_id.header.size, 0, 0);
 	if (ret)
-		return;
+		goto out;
 
 	mmap_event->event_id.pid = perf_event_pid(event, current);
 	mmap_event->event_id.tid = perf_event_tid(event, current);
@@ -4046,7 +4163,12 @@
 	perf_output_put(&handle, mmap_event->event_id);
 	perf_output_copy(&handle, mmap_event->file_name,
 				   mmap_event->file_size);
+
+	perf_event__output_id_sample(event, &handle, &sample);
+
 	perf_output_end(&handle);
+out:
+	mmap_event->event_id.header.size = size;
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
@@ -4144,6 +4266,8 @@
 	rcu_read_lock();
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+		if (cpuctx->active_pmu != pmu)
+			goto next;
 		perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
 					vma->vm_flags & VM_EXEC);
 
@@ -4199,6 +4323,7 @@
 static void perf_log_throttle(struct perf_event *event, int enable)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data sample;
 	int ret;
 
 	struct {
@@ -4220,11 +4345,15 @@
 	if (enable)
 		throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
 
-	ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0);
+	perf_event_header__init_id(&throttle_event.header, &sample, event);
+
+	ret = perf_output_begin(&handle, event,
+				throttle_event.header.size, 1, 0);
 	if (ret)
 		return;
 
 	perf_output_put(&handle, throttle_event);
+	perf_event__output_id_sample(event, &handle, &sample);
 	perf_output_end(&handle);
 }
 
@@ -4240,6 +4369,13 @@
 	struct hw_perf_event *hwc = &event->hw;
 	int ret = 0;
 
+	/*
+	 * Non-sampling counters might still use the PMI to fold short
+	 * hardware counters, ignore those.
+	 */
+	if (unlikely(!is_sampling_event(event)))
+		return 0;
+
 	if (!throttle) {
 		hwc->interrupts++;
 	} else {
@@ -4385,7 +4521,7 @@
 	if (!regs)
 		return;
 
-	if (!hwc->sample_period)
+	if (!is_sampling_event(event))
 		return;
 
 	if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4548,7 +4684,7 @@
 	struct hw_perf_event *hwc = &event->hw;
 	struct hlist_head *head;
 
-	if (hwc->sample_period) {
+	if (is_sampling_event(event)) {
 		hwc->last_period = hwc->sample_period;
 		perf_swevent_set_period(event);
 	}
@@ -4713,7 +4849,7 @@
 		break;
 	}
 
-	if (event_id > PERF_COUNT_SW_MAX)
+	if (event_id >= PERF_COUNT_SW_MAX)
 		return -ENOENT;
 
 	if (!event->parent) {
@@ -4805,15 +4941,6 @@
 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
 		return -ENOENT;
 
-	/*
-	 * Raw tracepoint data is a severe data leak, only allow root to
-	 * have these.
-	 */
-	if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
-			perf_paranoid_tracepoint_raw() &&
-			!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
 	err = perf_trace_init(event);
 	if (err)
 		return err;
@@ -4836,7 +4963,7 @@
 
 static inline void perf_tp_register(void)
 {
-	perf_pmu_register(&perf_tracepoint);
+	perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
 }
 
 static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4926,31 +5053,33 @@
 static void perf_swevent_start_hrtimer(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
+	s64 period;
+
+	if (!is_sampling_event(event))
+		return;
 
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swevent_hrtimer;
-	if (hwc->sample_period) {
-		s64 period = local64_read(&hwc->period_left);
 
-		if (period) {
-			if (period < 0)
-				period = 10000;
+	period = local64_read(&hwc->period_left);
+	if (period) {
+		if (period < 0)
+			period = 10000;
 
-			local64_set(&hwc->period_left, 0);
-		} else {
-			period = max_t(u64, 10000, hwc->sample_period);
-		}
-		__hrtimer_start_range_ns(&hwc->hrtimer,
+		local64_set(&hwc->period_left, 0);
+	} else {
+		period = max_t(u64, 10000, hwc->sample_period);
+	}
+	__hrtimer_start_range_ns(&hwc->hrtimer,
 				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL_PINNED, 0);
-	}
 }
 
 static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (hwc->sample_period) {
+	if (is_sampling_event(event)) {
 		ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
 		local64_set(&hwc->period_left, ktime_to_ns(remaining));
 
@@ -5145,25 +5274,94 @@
 	return NULL;
 }
 
-static void free_pmu_context(void * __percpu cpu_context)
+static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
 {
-	struct pmu *pmu;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_cpu_context *cpuctx;
+
+		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+
+		if (cpuctx->active_pmu == old_pmu)
+			cpuctx->active_pmu = pmu;
+	}
+}
+
+static void free_pmu_context(struct pmu *pmu)
+{
+	struct pmu *i;
 
 	mutex_lock(&pmus_lock);
 	/*
 	 * Like a real lame refcount.
 	 */
-	list_for_each_entry(pmu, &pmus, entry) {
-		if (pmu->pmu_cpu_context == cpu_context)
+	list_for_each_entry(i, &pmus, entry) {
+		if (i->pmu_cpu_context == pmu->pmu_cpu_context) {
+			update_pmu_context(i, pmu);
 			goto out;
+		}
 	}
 
-	free_percpu(cpu_context);
+	free_percpu(pmu->pmu_cpu_context);
 out:
 	mutex_unlock(&pmus_lock);
 }
+static struct idr pmu_idr;
 
-int perf_pmu_register(struct pmu *pmu)
+static ssize_t
+type_show(struct device *dev, struct device_attribute *attr, char *page)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+
+	return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
+}
+
+static struct device_attribute pmu_dev_attrs[] = {
+       __ATTR_RO(type),
+       __ATTR_NULL,
+};
+
+static int pmu_bus_running;
+static struct bus_type pmu_bus = {
+	.name		= "event_source",
+	.dev_attrs	= pmu_dev_attrs,
+};
+
+static void pmu_dev_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+static int pmu_dev_alloc(struct pmu *pmu)
+{
+	int ret = -ENOMEM;
+
+	pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!pmu->dev)
+		goto out;
+
+	device_initialize(pmu->dev);
+	ret = dev_set_name(pmu->dev, "%s", pmu->name);
+	if (ret)
+		goto free_dev;
+
+	dev_set_drvdata(pmu->dev, pmu);
+	pmu->dev->bus = &pmu_bus;
+	pmu->dev->release = pmu_dev_release;
+	ret = device_add(pmu->dev);
+	if (ret)
+		goto free_dev;
+
+out:
+	return ret;
+
+free_dev:
+	put_device(pmu->dev);
+	goto out;
+}
+
+int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
 	int cpu, ret;
 
@@ -5173,13 +5371,38 @@
 	if (!pmu->pmu_disable_count)
 		goto unlock;
 
+	pmu->type = -1;
+	if (!name)
+		goto skip_type;
+	pmu->name = name;
+
+	if (type < 0) {
+		int err = idr_pre_get(&pmu_idr, GFP_KERNEL);
+		if (!err)
+			goto free_pdc;
+
+		err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type);
+		if (err) {
+			ret = err;
+			goto free_pdc;
+		}
+	}
+	pmu->type = type;
+
+	if (pmu_bus_running) {
+		ret = pmu_dev_alloc(pmu);
+		if (ret)
+			goto free_idr;
+	}
+
+skip_type:
 	pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
 	if (pmu->pmu_cpu_context)
 		goto got_cpu_context;
 
 	pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
 	if (!pmu->pmu_cpu_context)
-		goto free_pdc;
+		goto free_dev;
 
 	for_each_possible_cpu(cpu) {
 		struct perf_cpu_context *cpuctx;
@@ -5190,6 +5413,7 @@
 		cpuctx->ctx.pmu = pmu;
 		cpuctx->jiffies_interval = 1;
 		INIT_LIST_HEAD(&cpuctx->rotation_list);
+		cpuctx->active_pmu = pmu;
 	}
 
 got_cpu_context:
@@ -5222,6 +5446,14 @@
 
 	return ret;
 
+free_dev:
+	device_del(pmu->dev);
+	put_device(pmu->dev);
+
+free_idr:
+	if (pmu->type >= PERF_TYPE_MAX)
+		idr_remove(&pmu_idr, pmu->type);
+
 free_pdc:
 	free_percpu(pmu->pmu_disable_count);
 	goto unlock;
@@ -5241,7 +5473,11 @@
 	synchronize_rcu();
 
 	free_percpu(pmu->pmu_disable_count);
-	free_pmu_context(pmu->pmu_cpu_context);
+	if (pmu->type >= PERF_TYPE_MAX)
+		idr_remove(&pmu_idr, pmu->type);
+	device_del(pmu->dev);
+	put_device(pmu->dev);
+	free_pmu_context(pmu);
 }
 
 struct pmu *perf_init_event(struct perf_event *event)
@@ -5250,6 +5486,13 @@
 	int idx;
 
 	idx = srcu_read_lock(&pmus_srcu);
+
+	rcu_read_lock();
+	pmu = idr_find(&pmu_idr, event->attr.type);
+	rcu_read_unlock();
+	if (pmu)
+		goto unlock;
+
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		int ret = pmu->event_init(event);
 		if (!ret)
@@ -5715,6 +5958,12 @@
 	mutex_unlock(&current->perf_event_mutex);
 
 	/*
+	 * Precalculate sample_data sizes
+	 */
+	perf_event__header_size(event);
+	perf_event__id_header_size(event);
+
+	/*
 	 * Drop the reference on the group_event after placing the
 	 * new event on the sibling_list. This ensures destruction
 	 * of the group leader will find the pointer to itself in
@@ -6067,6 +6316,12 @@
 	child_event->overflow_handler = parent_event->overflow_handler;
 
 	/*
+	 * Precalculate sample_data sizes
+	 */
+	perf_event__header_size(child_event);
+	perf_event__id_header_size(child_event);
+
+	/*
 	 * Link it up in the child's context:
 	 */
 	raw_spin_lock_irqsave(&child_ctx->lock, flags);
@@ -6297,7 +6552,7 @@
 	mutex_unlock(&swhash->hlist_mutex);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
 static void perf_pmu_rotate_stop(struct pmu *pmu)
 {
 	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -6351,6 +6606,26 @@
 static inline void perf_event_exit_cpu(int cpu) { }
 #endif
 
+static int
+perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		perf_event_exit_cpu(cpu);
+
+	return NOTIFY_OK;
+}
+
+/*
+ * Run the perf reboot notifier at the very last possible moment so that
+ * the generic watchdog code runs as long as possible.
+ */
+static struct notifier_block perf_reboot_notifier = {
+	.notifier_call = perf_reboot,
+	.priority = INT_MIN,
+};
+
 static int __cpuinit
 perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
@@ -6379,14 +6654,45 @@
 {
 	int ret;
 
+	idr_init(&pmu_idr);
+
 	perf_event_init_all_cpus();
 	init_srcu_struct(&pmus_srcu);
-	perf_pmu_register(&perf_swevent);
-	perf_pmu_register(&perf_cpu_clock);
-	perf_pmu_register(&perf_task_clock);
+	perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
+	perf_pmu_register(&perf_cpu_clock, NULL, -1);
+	perf_pmu_register(&perf_task_clock, NULL, -1);
 	perf_tp_register();
 	perf_cpu_notifier(perf_cpu_notify);
+	register_reboot_notifier(&perf_reboot_notifier);
 
 	ret = init_hw_breakpoint();
 	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 }
+
+static int __init perf_event_sysfs_init(void)
+{
+	struct pmu *pmu;
+	int ret;
+
+	mutex_lock(&pmus_lock);
+
+	ret = bus_register(&pmu_bus);
+	if (ret)
+		goto unlock;
+
+	list_for_each_entry(pmu, &pmus, entry) {
+		if (!pmu->name || pmu->type < 0)
+			continue;
+
+		ret = pmu_dev_alloc(pmu);
+		WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
+	}
+	pmu_bus_running = 1;
+	ret = 0;
+
+unlock:
+	mutex_unlock(&pmus_lock);
+
+	return ret;
+}
+device_initcall(perf_event_sysfs_init);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index baf667b..8c7e483 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -30,7 +30,7 @@
 
 #include "power.h"
 
-#define HIBERNATE_SIG	"LINHIB0001"
+#define HIBERNATE_SIG	"S1SUSPEND"
 
 /*
  *	The swap map is a data structure used for keeping track of each page
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 1b2ea31..c36c3b9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -137,7 +137,7 @@
 	free_all_swap_pages(data->swap);
 	if (data->frozen)
 		thaw_processes();
-	pm_notifier_call_chain(data->mode == O_WRONLY ?
+	pm_notifier_call_chain(data->mode == O_RDONLY ?
 			PM_POST_HIBERNATION : PM_POST_RESTORE);
 	atomic_inc(&snapshot_device_available);
 
diff --git a/kernel/resource.c b/kernel/resource.c
index 9fad33e..798e2fa 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -40,23 +40,6 @@
 
 static DEFINE_RWLOCK(resource_lock);
 
-/*
- * By default, we allocate free space bottom-up.  The architecture can request
- * top-down by clearing this flag.  The user can override the architecture's
- * choice with the "resource_alloc_from_bottom" kernel boot option, but that
- * should only be a debugging tool.
- */
-int resource_alloc_from_bottom = 1;
-
-static __init int setup_alloc_from_bottom(char *s)
-{
-	printk(KERN_INFO
-	       "resource: allocating from bottom-up; please report a bug\n");
-	resource_alloc_from_bottom = 1;
-	return 0;
-}
-early_param("resource_alloc_from_bottom", setup_alloc_from_bottom);
-
 static void *r_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct resource *p = v;
@@ -374,6 +357,10 @@
 	return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
 }
 
+void __weak arch_remove_reservations(struct resource *avail)
+{
+}
+
 static resource_size_t simple_align_resource(void *data,
 					     const struct resource *avail,
 					     resource_size_t size,
@@ -397,74 +384,7 @@
 }
 
 /*
- * Find the resource before "child" in the sibling list of "root" children.
- */
-static struct resource *find_sibling_prev(struct resource *root, struct resource *child)
-{
-	struct resource *this;
-
-	for (this = root->child; this; this = this->sibling)
-		if (this->sibling == child)
-			return this;
-
-	return NULL;
-}
-
-/*
  * Find empty slot in the resource tree given range and alignment.
- * This version allocates from the end of the root resource first.
- */
-static int find_resource_from_top(struct resource *root, struct resource *new,
-				  resource_size_t size, resource_size_t min,
-				  resource_size_t max, resource_size_t align,
-				  resource_size_t (*alignf)(void *,
-						   const struct resource *,
-						   resource_size_t,
-						   resource_size_t),
-				  void *alignf_data)
-{
-	struct resource *this;
-	struct resource tmp, avail, alloc;
-
-	tmp.start = root->end;
-	tmp.end = root->end;
-
-	this = find_sibling_prev(root, NULL);
-	for (;;) {
-		if (this) {
-			if (this->end < root->end)
-				tmp.start = this->end + 1;
-		} else
-			tmp.start = root->start;
-
-		resource_clip(&tmp, min, max);
-
-		/* Check for overflow after ALIGN() */
-		avail = *new;
-		avail.start = ALIGN(tmp.start, align);
-		avail.end = tmp.end;
-		if (avail.start >= tmp.start) {
-			alloc.start = alignf(alignf_data, &avail, size, align);
-			alloc.end = alloc.start + size - 1;
-			if (resource_contains(&avail, &alloc)) {
-				new->start = alloc.start;
-				new->end = alloc.end;
-				return 0;
-			}
-		}
-
-		if (!this || this->start == root->start)
-			break;
-
-		tmp.end = this->start - 1;
-		this = find_sibling_prev(root, this);
-	}
-	return -EBUSY;
-}
-
-/*
- * Find empty slot in the resource tree given range and alignment.
- * This version allocates from the beginning of the root resource first.
  */
 static int find_resource(struct resource *root, struct resource *new,
 			 resource_size_t size, resource_size_t min,
@@ -478,23 +398,24 @@
 	struct resource *this = root->child;
 	struct resource tmp = *new, avail, alloc;
 
+	tmp.flags = new->flags;
 	tmp.start = root->start;
 	/*
-	 * Skip past an allocated resource that starts at 0, since the
-	 * assignment of this->start - 1 to tmp->end below would cause an
-	 * underflow.
+	 * Skip past an allocated resource that starts at 0, since the assignment
+	 * of this->start - 1 to tmp->end below would cause an underflow.
 	 */
 	if (this && this->start == 0) {
 		tmp.start = this->end + 1;
 		this = this->sibling;
 	}
-	for (;;) {
+	for(;;) {
 		if (this)
 			tmp.end = this->start - 1;
 		else
 			tmp.end = root->end;
 
 		resource_clip(&tmp, min, max);
+		arch_remove_reservations(&tmp);
 
 		/* Check for overflow after ALIGN() */
 		avail = *new;
@@ -509,10 +430,8 @@
 				return 0;
 			}
 		}
-
 		if (!this)
 			break;
-
 		tmp.start = this->end + 1;
 		this = this->sibling;
 	}
@@ -545,10 +464,7 @@
 		alignf = simple_align_resource;
 
 	write_lock(&resource_lock);
-	if (resource_alloc_from_bottom)
-		err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
-	else
-		err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data);
+	err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
 	if (err >= 0 && __request_resource(root, new))
 		err = -EBUSY;
 	write_unlock(&resource_lock);
diff --git a/kernel/sched.c b/kernel/sched.c
index dc91a4d..c68cead 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -636,22 +636,18 @@
 
 #endif /* CONFIG_CGROUP_SCHED */
 
-static u64 irq_time_cpu(int cpu);
-static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);
+static void update_rq_clock_task(struct rq *rq, s64 delta);
 
-inline void update_rq_clock(struct rq *rq)
+static void update_rq_clock(struct rq *rq)
 {
-	if (!rq->skip_clock_update) {
-		int cpu = cpu_of(rq);
-		u64 irq_time;
+	s64 delta;
 
-		rq->clock = sched_clock_cpu(cpu);
-		irq_time = irq_time_cpu(cpu);
-		if (rq->clock - irq_time > rq->clock_task)
-			rq->clock_task = rq->clock - irq_time;
+	if (rq->skip_clock_update)
+		return;
 
-		sched_irq_time_avg_update(rq, irq_time);
-	}
+	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
+	rq->clock += delta;
+	update_rq_clock_task(rq, delta);
 }
 
 /*
@@ -1924,10 +1920,9 @@
  * They are read and saved off onto struct rq in update_rq_clock().
  * This may result in other CPU reading this CPU's irq time and can
  * race with irq/account_system_vtime on this CPU. We would either get old
- * or new value (or semi updated value on 32 bit) with a side effect of
- * accounting a slice of irq time to wrong task when irq is in progress
- * while we read rq->clock. That is a worthy compromise in place of having
- * locks on each irq in account_system_time.
+ * or new value with a side effect of accounting a slice of irq time to wrong
+ * task when irq is in progress while we read rq->clock. That is a worthy
+ * compromise in place of having locks on each irq in account_system_time.
  */
 static DEFINE_PER_CPU(u64, cpu_hardirq_time);
 static DEFINE_PER_CPU(u64, cpu_softirq_time);
@@ -1945,19 +1940,58 @@
 	sched_clock_irqtime = 0;
 }
 
-static u64 irq_time_cpu(int cpu)
-{
-	if (!sched_clock_irqtime)
-		return 0;
+#ifndef CONFIG_64BIT
+static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
 
-	return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+static inline void irq_time_write_begin(void)
+{
+	__this_cpu_inc(irq_time_seq.sequence);
+	smp_wmb();
 }
 
+static inline void irq_time_write_end(void)
+{
+	smp_wmb();
+	__this_cpu_inc(irq_time_seq.sequence);
+}
+
+static inline u64 irq_time_read(int cpu)
+{
+	u64 irq_time;
+	unsigned seq;
+
+	do {
+		seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+		irq_time = per_cpu(cpu_softirq_time, cpu) +
+			   per_cpu(cpu_hardirq_time, cpu);
+	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
+
+	return irq_time;
+}
+#else /* CONFIG_64BIT */
+static inline void irq_time_write_begin(void)
+{
+}
+
+static inline void irq_time_write_end(void)
+{
+}
+
+static inline u64 irq_time_read(int cpu)
+{
+	return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+}
+#endif /* CONFIG_64BIT */
+
+/*
+ * Called before incrementing preempt_count on {soft,}irq_enter
+ * and before decrementing preempt_count on {soft,}irq_exit.
+ */
 void account_system_vtime(struct task_struct *curr)
 {
 	unsigned long flags;
+	s64 delta;
 	int cpu;
-	u64 now, delta;
 
 	if (!sched_clock_irqtime)
 		return;
@@ -1965,9 +1999,10 @@
 	local_irq_save(flags);
 
 	cpu = smp_processor_id();
-	now = sched_clock_cpu(cpu);
-	delta = now - per_cpu(irq_start_time, cpu);
-	per_cpu(irq_start_time, cpu) = now;
+	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
+	__this_cpu_add(irq_start_time, delta);
+
+	irq_time_write_begin();
 	/*
 	 * We do not account for softirq time from ksoftirqd here.
 	 * We want to continue accounting softirq time to ksoftirqd thread
@@ -1975,33 +2010,55 @@
 	 * that do not consume any time, but still wants to run.
 	 */
 	if (hardirq_count())
-		per_cpu(cpu_hardirq_time, cpu) += delta;
+		__this_cpu_add(cpu_hardirq_time, delta);
 	else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
-		per_cpu(cpu_softirq_time, cpu) += delta;
+		__this_cpu_add(cpu_softirq_time, delta);
 
+	irq_time_write_end();
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
-static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time)
+static void update_rq_clock_task(struct rq *rq, s64 delta)
 {
-	if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) {
-		u64 delta_irq = curr_irq_time - rq->prev_irq_time;
-		rq->prev_irq_time = curr_irq_time;
-		sched_rt_avg_update(rq, delta_irq);
-	}
+	s64 irq_delta;
+
+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
+
+	/*
+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
+	 * this case when a previous update_rq_clock() happened inside a
+	 * {soft,}irq region.
+	 *
+	 * When this happens, we stop ->clock_task and only update the
+	 * prev_irq_time stamp to account for the part that fit, so that a next
+	 * update will consume the rest. This ensures ->clock_task is
+	 * monotonic.
+	 *
+	 * It does however cause some slight miss-attribution of {soft,}irq
+	 * time, a more accurate solution would be to update the irq_time using
+	 * the current rq->clock timestamp, except that would require using
+	 * atomic ops.
+	 */
+	if (irq_delta > delta)
+		irq_delta = delta;
+
+	rq->prev_irq_time += irq_delta;
+	delta -= irq_delta;
+	rq->clock_task += delta;
+
+	if (irq_delta && sched_feat(NONIRQ_POWER))
+		sched_rt_avg_update(rq, irq_delta);
 }
 
-#else
+#else /* CONFIG_IRQ_TIME_ACCOUNTING */
 
-static u64 irq_time_cpu(int cpu)
+static void update_rq_clock_task(struct rq *rq, s64 delta)
 {
-	return 0;
+	rq->clock_task += delta;
 }
 
-static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }
-
-#endif
+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 #include "sched_idletask.c"
 #include "sched_fair.c"
@@ -2129,7 +2186,7 @@
 	 * A queue event has occurred, and we're going to schedule.  In
 	 * this case, we can save a useless back to back clock update.
 	 */
-	if (test_tsk_need_resched(rq->curr))
+	if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
 		rq->skip_clock_update = 1;
 }
 
@@ -3119,6 +3176,15 @@
 	return delta;
 }
 
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+	load *= exp;
+	load += active * (FIXED_1 - exp);
+	load += 1UL << (FSHIFT - 1);
+	return load >> FSHIFT;
+}
+
 #ifdef CONFIG_NO_HZ
 /*
  * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
@@ -3148,6 +3214,128 @@
 
 	return delta;
 }
+
+/**
+ * fixed_power_int - compute: x^n, in O(log n) time
+ *
+ * @x:         base of the power
+ * @frac_bits: fractional bits of @x
+ * @n:         power to raise @x to.
+ *
+ * By exploiting the relation between the definition of the natural power
+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
+ * (where: n_i \elem {0, 1}, the binary vector representing n),
+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
+ * of course trivially computable in O(log_2 n), the length of our binary
+ * vector.
+ */
+static unsigned long
+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
+{
+	unsigned long result = 1UL << frac_bits;
+
+	if (n) for (;;) {
+		if (n & 1) {
+			result *= x;
+			result += 1UL << (frac_bits - 1);
+			result >>= frac_bits;
+		}
+		n >>= 1;
+		if (!n)
+			break;
+		x *= x;
+		x += 1UL << (frac_bits - 1);
+		x >>= frac_bits;
+	}
+
+	return result;
+}
+
+/*
+ * a1 = a0 * e + a * (1 - e)
+ *
+ * a2 = a1 * e + a * (1 - e)
+ *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
+ *    = a0 * e^2 + a * (1 - e) * (1 + e)
+ *
+ * a3 = a2 * e + a * (1 - e)
+ *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
+ *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
+ *
+ *  ...
+ *
+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
+ *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
+ *    = a0 * e^n + a * (1 - e^n)
+ *
+ * [1] application of the geometric series:
+ *
+ *              n         1 - x^(n+1)
+ *     S_n := \Sum x^i = -------------
+ *             i=0          1 - x
+ */
+static unsigned long
+calc_load_n(unsigned long load, unsigned long exp,
+	    unsigned long active, unsigned int n)
+{
+
+	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
+}
+
+/*
+ * NO_HZ can leave us missing all per-cpu ticks calling
+ * calc_load_account_active(), but since an idle CPU folds its delta into
+ * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
+ * in the pending idle delta if our idle period crossed a load cycle boundary.
+ *
+ * Once we've updated the global active value, we need to apply the exponential
+ * weights adjusted to the number of cycles missed.
+ */
+static void calc_global_nohz(unsigned long ticks)
+{
+	long delta, active, n;
+
+	if (time_before(jiffies, calc_load_update))
+		return;
+
+	/*
+	 * If we crossed a calc_load_update boundary, make sure to fold
+	 * any pending idle changes, the respective CPUs might have
+	 * missed the tick driven calc_load_account_active() update
+	 * due to NO_HZ.
+	 */
+	delta = calc_load_fold_idle();
+	if (delta)
+		atomic_long_add(delta, &calc_load_tasks);
+
+	/*
+	 * If we were idle for multiple load cycles, apply them.
+	 */
+	if (ticks >= LOAD_FREQ) {
+		n = ticks / LOAD_FREQ;
+
+		active = atomic_long_read(&calc_load_tasks);
+		active = active > 0 ? active * FIXED_1 : 0;
+
+		avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
+		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
+		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
+
+		calc_load_update += n * LOAD_FREQ;
+	}
+
+	/*
+	 * Its possible the remainder of the above division also crosses
+	 * a LOAD_FREQ period, the regular check in calc_global_load()
+	 * which comes after this will take care of that.
+	 *
+	 * Consider us being 11 ticks before a cycle completion, and us
+	 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
+	 * age us 4 cycles, and the test in calc_global_load() will
+	 * pick up the final one.
+	 */
+}
 #else
 static void calc_load_account_idle(struct rq *this_rq)
 {
@@ -3157,6 +3345,10 @@
 {
 	return 0;
 }
+
+static void calc_global_nohz(unsigned long ticks)
+{
+}
 #endif
 
 /**
@@ -3174,24 +3366,17 @@
 	loads[2] = (avenrun[2] + offset) << shift;
 }
 
-static unsigned long
-calc_load(unsigned long load, unsigned long exp, unsigned long active)
-{
-	load *= exp;
-	load += active * (FIXED_1 - exp);
-	return load >> FSHIFT;
-}
-
 /*
  * calc_load - update the avenrun load estimates 10 ticks after the
  * CPUs have updated calc_load_tasks.
  */
-void calc_global_load(void)
+void calc_global_load(unsigned long ticks)
 {
-	unsigned long upd = calc_load_update + 10;
 	long active;
 
-	if (time_before(jiffies, upd))
+	calc_global_nohz(ticks);
+
+	if (time_before(jiffies, calc_load_update + 10))
 		return;
 
 	active = atomic_long_read(&calc_load_tasks);
@@ -3845,7 +4030,6 @@
 {
 	if (prev->se.on_rq)
 		update_rq_clock(rq);
-	rq->skip_clock_update = 0;
 	prev->sched_class->put_prev_task(rq, prev);
 }
 
@@ -3903,7 +4087,6 @@
 		hrtick_clear(rq);
 
 	raw_spin_lock_irq(&rq->lock);
-	clear_tsk_need_resched(prev);
 
 	switch_count = &prev->nivcsw;
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@ -3935,6 +4118,8 @@
 
 	put_prev_task(rq, prev);
 	next = pick_next_task(rq);
+	clear_tsk_need_resched(prev);
+	rq->skip_clock_update = 0;
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
@@ -8108,8 +8293,6 @@
 		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 #endif /* SMP */
 
-	perf_event_init();
-
 	scheduler_running = 1;
 }
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5abfa15..4640441 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -745,8 +745,15 @@
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname       = "nmi_watchdog",
+		.data           = &watchdog_enabled,
+		.maxlen         = sizeof (int),
+		.mode           = 0644,
+		.proc_handler   = proc_dowatchdog_enabled,
+	},
 #endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
 		.procname       = "unknown_nmi_panic",
 		.data           = &unknown_nmi_panic,
@@ -754,13 +761,6 @@
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec,
 	},
-	{
-		.procname       = "nmi_watchdog",
-		.data           = &nmi_watchdog_enabled,
-		.maxlen         = sizeof (int),
-		.mode           = 0644,
-		.proc_handler   = proc_nmi_enabled,
-	},
 #endif
 #if defined(CONFIG_X86)
 	{
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 1357c57..4b2545a 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -136,7 +136,6 @@
 	{ CTL_INT,	KERN_IA64_UNALIGNED,		"ignore-unaligned-usertrap" },
 	{ CTL_INT,	KERN_COMPAT_LOG,		"compat-log" },
 	{ CTL_INT,	KERN_MAX_LOCK_DEPTH,		"max_lock_depth" },
-	{ CTL_INT,	KERN_NMI_WATCHDOG,		"nmi_watchdog" },
 	{ CTL_INT,	KERN_PANIC_ON_NMI,		"panic_on_unrecovered_nmi" },
 	{}
 };
diff --git a/kernel/timer.c b/kernel/timer.c
index 68a9ae7..353b922 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1252,6 +1252,12 @@
 	struct tvec_base *base = __get_cpu_var(tvec_bases);
 	unsigned long expires;
 
+	/*
+	 * Pretend that there is no timer pending if the cpu is offline.
+	 * Possible pending timers will be migrated later to an active cpu.
+	 */
+	if (cpu_is_offline(smp_processor_id()))
+		return now + NEXT_TIMER_MAX_DELTA;
 	spin_lock(&base->lock);
 	if (time_before_eq(base->next_timer, base->timer_jiffies))
 		base->next_timer = __next_timer_interrupt(base);
@@ -1319,7 +1325,7 @@
 {
 	jiffies_64 += ticks;
 	update_wall_time();
-	calc_global_load();
+	calc_global_load(ticks);
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c380612..f8cf959 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2338,11 +2338,19 @@
 	return count;
 }
 
+static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
+{
+	if (file->f_mode & FMODE_READ)
+		return seq_lseek(file, offset, origin);
+	else
+		return 0;
+}
+
 static const struct file_operations tracing_fops = {
 	.open		= tracing_open,
 	.read		= seq_read,
 	.write		= tracing_write_stub,
-	.llseek		= seq_lseek,
+	.llseek		= tracing_seek,
 	.release	= tracing_release,
 };
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 39c059c..19a359d 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -21,17 +21,46 @@
 /* Count the events in use (per event id, not per instance) */
 static int	total_ref_count;
 
+static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
+				 struct perf_event *p_event)
+{
+	/* No tracing, just counting, so no obvious leak */
+	if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
+		return 0;
+
+	/* Some events are ok to be traced by non-root users... */
+	if (p_event->attach_state == PERF_ATTACH_TASK) {
+		if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
+			return 0;
+	}
+
+	/*
+	 * ...otherwise raw tracepoint data can be a severe data leak,
+	 * only allow root to have these.
+	 */
+	if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+
 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
 				 struct perf_event *p_event)
 {
 	struct hlist_head __percpu *list;
-	int ret = -ENOMEM;
+	int ret;
 	int cpu;
 
+	ret = perf_trace_event_perm(tp_event, p_event);
+	if (ret)
+		return ret;
+
 	p_event->tp_event = tp_event;
 	if (tp_event->perf_refcount++ > 0)
 		return 0;
 
+	ret = -ENOMEM;
+
 	list = alloc_percpu(struct hlist_head);
 	if (!list)
 		goto fail;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0725eea..35fde09 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,6 +27,12 @@
 
 DEFINE_MUTEX(event_mutex);
 
+DEFINE_MUTEX(event_storage_mutex);
+EXPORT_SYMBOL_GPL(event_storage_mutex);
+
+char event_storage[EVENT_STORAGE_SIZE];
+EXPORT_SYMBOL_GPL(event_storage);
+
 LIST_HEAD(ftrace_events);
 LIST_HEAD(ftrace_common_fields);
 
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4ba44de..4b74d71 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -83,13 +83,19 @@
 
 #undef __array
 #define __array(type, item, len)					\
-	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
-	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+	do {								\
+		BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);			\
+		mutex_lock(&event_storage_mutex);			\
+		snprintf(event_storage, sizeof(event_storage),		\
+			 "%s[%d]", #type, len);				\
+		ret = trace_define_field(event_call, event_storage, #item, \
 				 offsetof(typeof(field), item),		\
 				 sizeof(field.item),			\
 				 is_signed_type(type), FILTER_OTHER);	\
-	if (ret)							\
-		return ret;
+		mutex_unlock(&event_storage_mutex);			\
+		if (ret)						\
+			return ret;					\
+	} while (0);
 
 #undef __array_desc
 #define __array_desc(type, container, item, len)			\
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e3c41a..eb17e14 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -57,6 +57,8 @@
 {
 	if (!strncmp(str, "panic", 5))
 		hardlockup_panic = 1;
+	else if (!strncmp(str, "0", 1))
+		no_watchdog = 1;
 	return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -547,13 +549,13 @@
 	.notifier_call = cpu_callback
 };
 
-static int __init spawn_watchdog_task(void)
+void __init lockup_detector_init(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
 	int err;
 
 	if (no_watchdog)
-		return 0;
+		return;
 
 	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 	WARN_ON(notifier_to_errno(err));
@@ -561,6 +563,5 @@
 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 	register_cpu_notifier(&cpu_nfb);
 
-	return 0;
+	return;
 }
-early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 28b42b9..2d05adb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -173,7 +173,8 @@
 	  An NMI is generated every 60 seconds or so to check for hardlockups.
 
 config HARDLOCKUP_DETECTOR
-	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
+		 !ARCH_HAS_NMI_WATCHDOG
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
 	bool "Panic (Reboot) On Soft Lockups"
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1c7a2ec..b6ff4a1 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -97,11 +97,9 @@
 int ceph_msgr_init(void)
 {
 	ceph_msgr_wq = create_workqueue("ceph-msgr");
-	if (IS_ERR(ceph_msgr_wq)) {
-		int ret = PTR_ERR(ceph_msgr_wq);
-		pr_err("msgr_init failed to create workqueue: %d\n", ret);
-		ceph_msgr_wq = NULL;
-		return ret;
+	if (!ceph_msgr_wq) {
+		pr_err("msgr_init failed to create workqueue\n");
+		return -ENOMEM;
 	}
 	return 0;
 }
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index ac34fee..1a040e6 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -13,7 +13,7 @@
  * build a vector of user pages
  */
 struct page **ceph_get_direct_page_vector(const char __user *data,
-					  int num_pages)
+					  int num_pages, bool write_page)
 {
 	struct page **pages;
 	int rc;
@@ -24,24 +24,27 @@
 
 	down_read(&current->mm->mmap_sem);
 	rc = get_user_pages(current, current->mm, (unsigned long)data,
-			    num_pages, 0, 0, pages, NULL);
+			    num_pages, write_page, 0, pages, NULL);
 	up_read(&current->mm->mmap_sem);
-	if (rc < 0)
+	if (rc < num_pages)
 		goto fail;
 	return pages;
 
 fail:
-	kfree(pages);
+	ceph_put_page_vector(pages, rc > 0 ? rc : 0, false);
 	return ERR_PTR(rc);
 }
 EXPORT_SYMBOL(ceph_get_direct_page_vector);
 
-void ceph_put_page_vector(struct page **pages, int num_pages)
+void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
 {
 	int i;
 
-	for (i = 0; i < num_pages; i++)
+	for (i = 0; i < num_pages; i++) {
+		if (dirty)
+			set_page_dirty_lock(pages[i]);
 		put_page(pages[i]);
+	}
 	kfree(pages);
 }
 EXPORT_SYMBOL(ceph_put_page_vector);
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 5ad25e1..4eb99ab 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -214,17 +214,22 @@
 # The empty.o file is created in the make process in order to determine
 #  the target endianness and word size. It is made before all other C
 #  files, including recordmcount.
-cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then			\
-			$(objtree)/scripts/recordmcount "$(@)";			\
-		    fi;
+sub_cmd_record_mcount =					\
+	if [ $(@) != "scripts/mod/empty.o" ]; then	\
+		$(objtree)/scripts/recordmcount "$(@)";	\
+	fi;
 else
-cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
 	"$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
 	"$(if $(CONFIG_64BIT),64,32)" \
 	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \
 	"$(LD)" "$(NM)" "$(RM)" "$(MV)" \
 	"$(if $(part-of-module),1,0)" "$(@)";
 endif
+cmd_record_mcount = 						\
+	if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then	\
+		$(sub_cmd_record_mcount)			\
+	fi;
 endif
 
 define rule_cc_o_c
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index 58e933a..3966717 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -119,7 +119,7 @@
 
 static void fn_ELF_R_INFO(Elf_Rel *const rp, unsigned sym, unsigned type)
 {
-	rp->r_info = ELF_R_INFO(sym, type);
+	rp->r_info = _w(ELF_R_INFO(sym, type));
 }
 static void (*Elf_r_info)(Elf_Rel *const rp, unsigned sym, unsigned type) = fn_ELF_R_INFO;
 
diff --git a/scripts/tags.sh b/scripts/tags.sh
index 8509bb5..bbbe584 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -125,7 +125,9 @@
 	-I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
 	--extra=+f --c-kinds=-px                                \
 	--regex-asm='/^ENTRY\(([^)]*)\).*/\1/'                  \
-	--regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/'
+	--regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \
+	--regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/'		\
+	--regex-c++='/^DEFINE_EVENT\(([^,)]*).*/trace_\1/'
 
 	all_kconfigs | xargs $1 -a                              \
 	--langdef=kconfig --language-force=kconfig              \
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 2d7d7de..427da45 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10830,7 +10830,8 @@
 {
 	struct alc_spec *spec = codec->spec;
 	struct auto_pin_cfg *cfg = &spec->autocfg;
-	int i, err;
+	int i, err, type;
+	int type_idx = 0;
 	hda_nid_t nid;
 
 	for (i = 0; i < cfg->num_inputs; i++) {
@@ -10839,9 +10840,15 @@
 		nid = cfg->inputs[i].pin;
 		if (get_wcaps(codec, nid) & AC_WCAP_IN_AMP) {
 			char label[32];
+			type = cfg->inputs[i].type;
+			if (i > 0 && type == cfg->inputs[i - 1].type)
+				type_idx++;
+			else
+				type_idx = 0;
 			snprintf(label, sizeof(label), "%s Boost",
 				 hda_get_autocfg_input_label(codec, cfg, i));
-			err = add_control(spec, ALC_CTL_WIDGET_VOL, label, 0,
+			err = add_control(spec, ALC_CTL_WIDGET_VOL, label,
+					  type_idx,
 				  HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_INPUT));
 			if (err < 0)
 				return err;
@@ -14800,6 +14807,8 @@
 enum {
 	ALC269_FIXUP_SONY_VAIO,
 	ALC269_FIXUP_DELL_M101Z,
+	ALC269_FIXUP_LENOVO_EDGE14,
+	ALC269_FIXUP_ASUS_G73JW,
 };
 
 static const struct alc_fixup alc269_fixups[] = {
@@ -14817,11 +14826,22 @@
 			{}
 		}
 	},
+	[ALC269_FIXUP_LENOVO_EDGE14] = {
+		.sku = ALC_FIXUP_SKU_IGNORE,
+	},
+	[ALC269_FIXUP_ASUS_G73JW] = {
+		.pins = (const struct alc_pincfg[]) {
+			{ 0x17, 0x99130111 }, /* subwoofer */
+			{ }
+		}
+	},
 };
 
 static struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO),
 	SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
+	SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_LENOVO_EDGE14),
+	SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
 	{}
 };
 
diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c
index 879dff2..8725d4e 100644
--- a/sound/soc/codecs/wm8580.c
+++ b/sound/soc/codecs/wm8580.c
@@ -161,7 +161,7 @@
 static const u16 wm8580_reg[] = {
 	0x0121, 0x017e, 0x007d, 0x0014, /*R3*/
 	0x0121, 0x017e, 0x007d, 0x0194, /*R7*/
-	0x001c, 0x0002, 0x0002, 0x00c2, /*R11*/
+	0x0010, 0x0002, 0x0002, 0x00c2, /*R11*/
 	0x0182, 0x0082, 0x000a, 0x0024, /*R15*/
 	0x0009, 0x0000, 0x00ff, 0x0000, /*R19*/
 	0x00ff, 0x00ff, 0x00ff, 0x00ff, /*R23*/
diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index fca60a0..9001cc4 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c
@@ -818,7 +818,8 @@
 	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
 
-	return wm8904->deemph;
+	ucontrol->value.enumerated.item[0] = wm8904->deemph;
+	return 0;
 }
 
 static int wm8904_put_deemph(struct snd_kcontrol *kcontrol,
diff --git a/sound/soc/codecs/wm8955.c b/sound/soc/codecs/wm8955.c
index f89ad6c..9cbab8e 100644
--- a/sound/soc/codecs/wm8955.c
+++ b/sound/soc/codecs/wm8955.c
@@ -380,7 +380,8 @@
 	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec);
 
-	return wm8955->deemph;
+	ucontrol->value.enumerated.item[0] = wm8955->deemph;
+	return 0;
 }
 
 static int wm8955_put_deemph(struct snd_kcontrol *kcontrol,
diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c
index 8d5efb3..21986c4 100644
--- a/sound/soc/codecs/wm8960.c
+++ b/sound/soc/codecs/wm8960.c
@@ -138,7 +138,8 @@
 	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct wm8960_priv *wm8960 = snd_soc_codec_get_drvdata(codec);
 
-	return wm8960->deemph;
+	ucontrol->value.enumerated.item[0] = wm8960->deemph;
+	return 0;
 }
 
 static int wm8960_put_deemph(struct snd_kcontrol *kcontrol,
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 75ed649..c721502 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -944,6 +944,9 @@
 		case SND_SOC_DAPM_STREAM_RESUME:
 			sys_power = 1;
 			break;
+		case SND_SOC_DAPM_STREAM_STOP:
+			sys_power = !!codec->active;
+			break;
 		case SND_SOC_DAPM_STREAM_SUSPEND:
 			sys_power = 0;
 			break;
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index b2c6330..6f5a498 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -24,12 +24,47 @@
 --input=::
         Input file name. (default: perf.data)
 
+-d::
+--dsos=<dso[,dso...]>::
+        Only consider symbols in these dsos.
+-s::
+--symbol=<symbol>::
+        Symbol to annotate.
+
+-f::
+--force::
+        Don't complain, do it.
+
+-v::
+--verbose::
+        Be more verbose. (Show symbol address, etc)
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-k::
+--vmlinux=<file>::
+        vmlinux pathname.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel.
+
+-l::
+--print-line::
+        Print matching source lines (may be slow).
+
+-P::
+--full-paths::
+        Don't shorten the displayed pathnames.
+
 --stdio:: Use the stdio interface.
 
 --tui:: Use the TUI interface Use of --tui requires a tty, if one is not
 	present, as when piping to other commands, the stdio interface is
 	used. This interfaces starts by centering on the line with more
-	samples, TAB/UNTAB cycles thru the lines with more samples.
+	samples, TAB/UNTAB cycles through the lines with more samples.
 
 SEE ALSO
 --------
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
index 01b642c..5eaac6f 100644
--- a/tools/perf/Documentation/perf-buildid-list.txt
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -18,6 +18,9 @@
 
 OPTIONS
 -------
+-H::
+--with-hits::
+        Show only DSOs with hits.
 -i::
 --input=::
         Input file name. (default: perf.data)
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 20d97d8..74d7481 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -19,6 +19,18 @@
 
 OPTIONS
 -------
+-M::
+--displacement::
+        Show position displacement relative to baseline.
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel
+
 -d::
 --dsos=::
 	Only consider symbols in these dsos. CSV that understands
@@ -42,7 +54,7 @@
 --field-separator=::
 
 	Use a special separator character and don't pad with spaces, replacing
-	all occurances of this separator in symbol names (and other output)
+	all occurrences of this separator in symbol names (and other output)
 	with a '.' character, that thus it's the only non valid separator.
 
 -v::
@@ -50,6 +62,13 @@
 	Be verbose, for instance, show the raw counts in addition to the
 	diff.
 
+-f::
+--force::
+       Don't complain, do it.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index d004e19..dd84cb2 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -22,7 +22,7 @@
   a performance counter profile of guest os in realtime
   of an arbitrary workload.
 
-  'perf kvm record <command>' to record the performance couinter profile
+  'perf kvm record <command>' to record the performance counter profile
   of an arbitrary workload and save it into a perf data file. If both
   --host and --guest are input, the perf data file name is perf.data.kvm.
   If there is  no --host but --guest, the file name is perf.data.guest.
@@ -40,6 +40,12 @@
 
 OPTIONS
 -------
+-i::
+--input=::
+        Input file name.
+-o::
+--output::
+        Output file name.
 --host=::
         Collect host side performance profile.
 --guest=::
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index b317102..921de25 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -24,6 +24,21 @@
 
   'perf lock report' reports statistical data.
 
+OPTIONS
+-------
+
+-i::
+--input=<file>::
+        Input file name.
+
+-v::
+--verbose::
+        Be more verbose (show symbol address, etc).
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 SEE ALSO
 --------
 linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 62de1b7..86b797a 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -115,9 +115,9 @@
 
 LINE SYNTAX
 -----------
-Line range is descripted by following syntax.
+Line range is described by following syntax.
 
- "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"
+ "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
 
 FUNC specifies the function name of showing lines. 'RLN' is the start line
 number from function entry line, and 'RLN2' is the end line number. As same as
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index a91f9f9..52462ae 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -39,15 +39,24 @@
           be passed as follows: '\mem:addr[:[r][w][x]]'.
           If you want to profile read-write accesses in 0x1000, just set
           'mem:0x1000:rw'.
+
+--filter=<filter>::
+        Event filter.
+
 -a::
-        System-wide collection.
+--all-cpus::
+        System-wide collection from all CPUs.
 
 -l::
         Scale counter values.
 
 -p::
 --pid=::
-	Record events on existing pid.
+	Record events on existing process ID.
+
+-t::
+--tid=::
+        Record events on existing thread ID.
 
 -r::
 --realtime=::
@@ -99,6 +108,11 @@
 --data::
 	Sample addresses.
 
+-T::
+--timestamp::
+	Sample timestamps. Use it with 'perf report -D' to see the timestamps,
+	for instance.
+
 -n::
 --no-samples::
 	Don't sample.
@@ -109,8 +123,8 @@
 
 -C::
 --cpu::
-Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode with inheritance mode on (default), samples are captured only when
 the thread executes on the designated CPUs. Default is to monitor all CPUs.
 
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 12052c9..8ba03d6 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -20,6 +20,11 @@
 -i::
 --input=::
         Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -d::
 --dsos=::
 	Only consider symbols in these dsos. CSV that understands
@@ -27,6 +32,10 @@
 -n::
 --show-nr-samples::
 	Show the number of samples for each symbol
+
+--showcpuutilization::
+        Show sample percentage for different cpu modes.
+
 -T::
 --threads::
 	Show per-thread event counters
@@ -39,12 +48,24 @@
 	Only consider these symbols. CSV that understands
 	file://filename entries.
 
+-U::
+--hide-unresolved::
+        Only display entries resolved to a symbol.
+
 -s::
 --sort=::
 	Sort by key(s): pid, comm, dso, symbol, parent.
 
+-p::
+--parent=<regex>::
+        regex filter to identify parent, see: '--sort parent'
+
+-x::
+--exclude-other::
+        Only display entries with parent-match.
+
 -w::
---field-width=::
+--column-widths=<width[,width...]>::
 	Force each column width to the provided list, for large terminal
 	readability.
 
@@ -52,19 +73,26 @@
 --field-separator=::
 
 	Use a special separator character and don't pad with spaces, replacing
-	all occurances of this separator in symbol names (and other output)
+	all occurrences of this separator in symbol names (and other output)
 	with a '.' character, that thus it's the only non valid separator.
 
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 -g [type,min]::
 --call-graph::
-        Display callchains using type and min percent threshold.
+        Display call chains using type and min percent threshold.
 	type can be either:
-	- flat: single column, linear exposure of callchains.
+	- flat: single column, linear exposure of call chains.
 	- graph: use a graph tree, displaying absolute overhead rates.
 	- fractal: like graph, but displays relative rates. Each branch of
 		 the tree is considered as a new profiled object. +
 	Default: fractal,0.5.
 
+--pretty=<key>::
+        Pretty printing style.  key: normal, raw
+
 --stdio:: Use the stdio interface.
 
 --tui:: Use the TUI interface, that is integrated with annotate and allows
@@ -72,6 +100,25 @@
 	requires a tty, if one is not present, as when piping to other
 	commands, the stdio interface is used.
 
+-k::
+--vmlinux=<file>::
+        vmlinux pathname
+
+--kallsyms=<file>::
+        kallsyms pathname
+
+-m::
+--modules::
+        Load module symbols. WARNING: This should only be used with -k and
+        a LIVE kernel.
+
+-f::
+--force::
+        Don't complain, do it.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 8417644..46822d5 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -8,11 +8,11 @@
 SYNOPSIS
 --------
 [verse]
-'perf sched' {record|latency|replay|trace}
+'perf sched' {record|latency|map|replay|trace}
 
 DESCRIPTION
 -----------
-There are four variants of perf sched:
+There are five variants of perf sched:
 
   'perf sched record <command>' to record the scheduling events
   of an arbitrary workload.
@@ -30,8 +30,22 @@
   of the workload as it occurred when it was recorded - and can repeat
   it a number of times, measuring its performance.)
 
+  'perf sched map' to print a textual context-switching outline of
+  workload captured via perf sched record.  Columns stand for
+  individual CPUs, and the two-letter shortcuts stand for tasks that
+  are running on a CPU. A '*' denotes the CPU that had the event, and
+  a dot signals an idle CPU.
+
 OPTIONS
 -------
+-i::
+--input=<file>::
+        Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -D::
 --dump-raw-trace=::
         Display verbose dump of the sched data.
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
similarity index 90%
rename from tools/perf/Documentation/perf-trace-perl.txt
rename to tools/perf/Documentation/perf-script-perl.txt
index ee6525e..5bb41e5 100644
--- a/tools/perf/Documentation/perf-trace-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -1,19 +1,19 @@
-perf-trace-perl(1)
+perf-script-perl(1)
 ==================
 
 NAME
 ----
-perf-trace-perl - Process trace data with a Perl script
+perf-script-perl - Process trace data with a Perl script
 
 SYNOPSIS
 --------
 [verse]
-'perf trace' [-s [Perl]:script[.pl] ]
+'perf script' [-s [Perl]:script[.pl] ]
 
 DESCRIPTION
 -----------
 
-This perf trace option is used to process perf trace data using perf's
+This perf script option is used to process perf script data using perf's
 built-in Perl interpreter.  It reads and processes the input file and
 displays the results of the trace analysis implemented in the given
 Perl script, if any.
@@ -21,7 +21,7 @@
 STARTER SCRIPTS
 ---------------
 
-You can avoid reading the rest of this document by running 'perf trace
+You can avoid reading the rest of this document by running 'perf script
 -g perl' in the same directory as an existing perf.data trace file.
 That will generate a starter script containing a handler for each of
 the event types in the trace file; it simply prints every available
@@ -30,13 +30,13 @@
 You can also look at the existing scripts in
 ~/libexec/perf-core/scripts/perl for typical examples showing how to
 do basic things like aggregate event data, print results, etc.  Also,
-the check-perf-trace.pl script, while not interesting for its results,
+the check-perf-script.pl script, while not interesting for its results,
 attempts to exercise all of the main scripting features.
 
 EVENT HANDLERS
 --------------
 
-When perf trace is invoked using a trace script, a user-defined
+When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
 ignored (or passed to a 'trace_handled' function, see below) and the
@@ -112,13 +112,13 @@
 SCRIPT LAYOUT
 -------------
 
-Every perf trace Perl script should start by setting up a Perl module
+Every perf script Perl script should start by setting up a Perl module
 search path and 'use'ing a few support modules (see module
 descriptions below):
 
 ----
- use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
- use lib "./Perf-Trace-Util/lib";
+ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib";
+ use lib "./perf-script-Util/lib";
  use Perf::Trace::Core;
  use Perf::Trace::Context;
  use Perf::Trace::Util;
@@ -162,7 +162,7 @@
 ----
 
 The remaining sections provide descriptions of each of the available
-built-in perf trace Perl modules and their associated functions.
+built-in perf script Perl modules and their associated functions.
 
 AVAILABLE MODULES AND FUNCTIONS
 -------------------------------
@@ -170,7 +170,7 @@
 The following sections describe the functions and variables available
 via the various Perf::Trace::* Perl modules.  To use the functions and
 variables from the given module, add the corresponding 'use
-Perf::Trace::XXX' line to your perf trace script.
+Perf::Trace::XXX' line to your perf script script.
 
 Perf::Trace::Core Module
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -204,7 +204,7 @@
 Perf::Trace::Util Module
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-Various utility functions for use with perf trace:
+Various utility functions for use with perf script:
 
   nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
   nsecs_secs($nsecs) - returns whole secs portion given nsecs
@@ -214,4 +214,4 @@
 
 SEE ALSO
 --------
-linkperf:perf-trace[1]
+linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-script-python.txt
similarity index 89%
rename from tools/perf/Documentation/perf-trace-python.txt
rename to tools/perf/Documentation/perf-script-python.txt
index 693be80..36b3827 100644
--- a/tools/perf/Documentation/perf-trace-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -1,19 +1,19 @@
-perf-trace-python(1)
+perf-script-python(1)
 ====================
 
 NAME
 ----
-perf-trace-python - Process trace data with a Python script
+perf-script-python - Process trace data with a Python script
 
 SYNOPSIS
 --------
 [verse]
-'perf trace' [-s [Python]:script[.py] ]
+'perf script' [-s [Python]:script[.py] ]
 
 DESCRIPTION
 -----------
 
-This perf trace option is used to process perf trace data using perf's
+This perf script option is used to process perf script data using perf's
 built-in Python interpreter.  It reads and processes the input file and
 displays the results of the trace analysis implemented in the given
 Python script, if any.
@@ -23,15 +23,15 @@
 
 This section shows the process, start to finish, of creating a working
 Python script that aggregates and extracts useful information from a
-raw perf trace stream.  You can avoid reading the rest of this
+raw perf script stream.  You can avoid reading the rest of this
 document if an example is enough for you; the rest of the document
 provides more details on each step and lists the library functions
 available to script writers.
 
 This example actually details the steps that were used to create the
-'syscall-counts' script you see when you list the available perf trace
-scripts via 'perf trace -l'.  As such, this script also shows how to
-integrate your script into the list of general-purpose 'perf trace'
+'syscall-counts' script you see when you list the available perf script
+scripts via 'perf script -l'.  As such, this script also shows how to
+integrate your script into the list of general-purpose 'perf script'
 scripts listed by that command.
 
 The syscall-counts script is a simple script, but demonstrates all the
@@ -105,31 +105,31 @@
 called perf.data.
 
 Once we have a perf.data file containing our data, we can use the -g
-'perf trace' option to generate a Python script that will contain a
+'perf script' option to generate a Python script that will contain a
 callback handler for each event type found in the perf.data trace
 stream (for more details, see the STARTER SCRIPTS section).
 
 ----
-# perf trace -g python
-generated Python script: perf-trace.py
+# perf script -g python
+generated Python script: perf-script.py
 
 The output file created also in the current directory is named
-perf-trace.py.  Here's the file in its entirety:
+perf-script.py.  Here's the file in its entirety:
 
-# perf trace event handlers, generated by perf trace -g python
+# perf script event handlers, generated by perf script -g python
 # Licensed under the terms of the GNU GPL License version 2
 
 # The common_* event handler fields are the most useful fields common to
 # all events.  They don't necessarily correspond to the 'common_*' fields
 # in the format files.  Those fields not available as handler params can
 # be retrieved using Python functions of the form common_*(context).
-# See the perf-trace-python Documentation for the list of available functions.
+# See the perf-script-python Documentation for the list of available functions.
 
 import os
 import sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+	'/scripts/python/perf-script-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
@@ -160,7 +160,7 @@
 ----
 
 At the top is a comment block followed by some import statements and a
-path append which every perf trace script should include.
+path append which every perf script script should include.
 
 Following that are a couple generated functions, trace_begin() and
 trace_end(), which are called at the beginning and the end of the
@@ -189,8 +189,8 @@
 script and run it to see the default output:
 
 ----
-# mv perf-trace.py syscall-counts.py
-# perf trace -s syscall-counts.py
+# mv perf-script.py syscall-counts.py
+# perf script -s syscall-counts.py
 
 raw_syscalls__sys_enter     1 00840.847582083     7506 perf                  id=1, args=
 raw_syscalls__sys_enter     1 00840.847595764     7506 perf                  id=1, args=
@@ -216,7 +216,7 @@
 import sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+	'/scripts/python/perf-script-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
@@ -279,7 +279,7 @@
 import sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+	'/scripts/python/perf-script-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
@@ -315,7 +315,7 @@
 
 The script can be run just as before:
 
-  # perf trace -s syscall-counts.py
+  # perf script -s syscall-counts.py
 
 So those are the essential steps in writing and running a script.  The
 process can be generalized to any tracepoint or set of tracepoints
@@ -324,17 +324,17 @@
 'perf list' and/or look in /sys/kernel/debug/tracing events for
 detailed event and field info, record the corresponding trace data
 using 'perf record', passing it the list of interesting events,
-generate a skeleton script using 'perf trace -g python' and modify the
+generate a skeleton script using 'perf script -g python' and modify the
 code to aggregate and display it for your particular needs.
 
 After you've done that you may end up with a general-purpose script
 that you want to keep around and have available for future use.  By
 writing a couple of very simple shell scripts and putting them in the
 right place, you can have your script listed alongside the other
-scripts listed by the 'perf trace -l' command e.g.:
+scripts listed by the 'perf script -l' command e.g.:
 
 ----
-root@tropicana:~# perf trace -l
+root@tropicana:~# perf script -l
 List of available trace scripts:
   workqueue-stats                      workqueue stats (ins/exe/create/destroy)
   wakeup-latency                       system-wide min/max/avg wakeup latency
@@ -365,14 +365,14 @@
 The 'report' script is also a shell script with the same base name as
 your script, but with -report appended.  It should also be located in
 the perf/scripts/python/bin directory.  In that script, you write the
-'perf trace -s' command-line needed for running your script:
+'perf script -s' command-line needed for running your script:
 
 ----
 # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
 
 #!/bin/bash
 # description: system-wide syscall counts
-perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py
+perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
 ----
 
 Note that the location of the Python script given in the shell script
@@ -390,17 +390,17 @@
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
 drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
--rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py
-drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
+-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
+drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util
 -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
 ----
 
 Once you've done that (don't forget to do a new 'make install',
-otherwise your script won't show up at run-time), 'perf trace -l'
+otherwise your script won't show up at run-time), 'perf script -l'
 should show a new entry for your script:
 
 ----
-root@tropicana:~# perf trace -l
+root@tropicana:~# perf script -l
 List of available trace scripts:
   workqueue-stats                      workqueue stats (ins/exe/create/destroy)
   wakeup-latency                       system-wide min/max/avg wakeup latency
@@ -409,19 +409,19 @@
   syscall-counts                       system-wide syscall counts
 ----
 
-You can now perform the record step via 'perf trace record':
+You can now perform the record step via 'perf script record':
 
-  # perf trace record syscall-counts
+  # perf script record syscall-counts
 
-and display the output using 'perf trace report':
+and display the output using 'perf script report':
 
-  # perf trace report syscall-counts
+  # perf script report syscall-counts
 
 STARTER SCRIPTS
 ---------------
 
 You can quickly get started writing a script for a particular set of
-trace data by generating a skeleton script using 'perf trace -g
+trace data by generating a skeleton script using 'perf script -g
 python' in the same directory as an existing perf.data trace file.
 That will generate a starter script containing a handler for each of
 the event types in the trace file; it simply prints every available
@@ -430,13 +430,13 @@
 You can also look at the existing scripts in
 ~/libexec/perf-core/scripts/python for typical examples showing how to
 do basic things like aggregate event data, print results, etc.  Also,
-the check-perf-trace.py script, while not interesting for its results,
+the check-perf-script.py script, while not interesting for its results,
 attempts to exercise all of the main scripting features.
 
 EVENT HANDLERS
 --------------
 
-When perf trace is invoked using a trace script, a user-defined
+When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
 ignored (or passed to a 'trace_handled' function, see below) and the
@@ -510,7 +510,7 @@
 SCRIPT LAYOUT
 -------------
 
-Every perf trace Python script should start by setting up a Python
+Every perf script Python script should start by setting up a Python
 module search path and 'import'ing a few support modules (see module
 descriptions below):
 
@@ -519,7 +519,7 @@
  import sys
 
  sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-	      '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+	      '/scripts/python/perf-script-Util/lib/Perf/Trace')
 
  from perf_trace_context import *
  from Core import *
@@ -559,15 +559,15 @@
 ----
 
 The remaining sections provide descriptions of each of the available
-built-in perf trace Python modules and their associated functions.
+built-in perf script Python modules and their associated functions.
 
 AVAILABLE MODULES AND FUNCTIONS
 -------------------------------
 
 The following sections describe the functions and variables available
-via the various perf trace Python modules.  To use the functions and
+via the various perf script Python modules.  To use the functions and
 variables from the given module, add the corresponding 'from XXXX
-import' line to your perf trace script.
+import' line to your perf script script.
 
 Core.py Module
 ~~~~~~~~~~~~~~
@@ -610,7 +610,7 @@
 Util.py Module
 ~~~~~~~~~~~~~~
 
-Various utility functions for use with perf trace:
+Various utility functions for use with perf script:
 
   nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
   nsecs_secs(nsecs) - returns whole secs portion given nsecs
@@ -620,4 +620,4 @@
 
 SEE ALSO
 --------
-linkperf:perf-trace[1]
+linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-script.txt
similarity index 62%
rename from tools/perf/Documentation/perf-trace.txt
rename to tools/perf/Documentation/perf-script.txt
index 26aff6b..29ad942 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -1,71 +1,71 @@
-perf-trace(1)
+perf-script(1)
 =============
 
 NAME
 ----
-perf-trace - Read perf.data (created by perf record) and display trace output
+perf-script - Read perf.data (created by perf record) and display trace output
 
 SYNOPSIS
 --------
 [verse]
-'perf trace' [<options>]
-'perf trace' [<options>] record <script> [<record-options>] <command>
-'perf trace' [<options>] report <script> [script-args]
-'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command>
-'perf trace' [<options>] <top-script> [script-args]
+'perf script' [<options>]
+'perf script' [<options>] record <script> [<record-options>] <command>
+'perf script' [<options>] report <script> [script-args]
+'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
+'perf script' [<options>] <top-script> [script-args]
 
 DESCRIPTION
 -----------
 This command reads the input file and displays the trace recorded.
 
-There are several variants of perf trace:
+There are several variants of perf script:
 
-  'perf trace' to see a detailed trace of the workload that was
+  'perf script' to see a detailed trace of the workload that was
   recorded.
 
   You can also run a set of pre-canned scripts that aggregate and
   summarize the raw trace data in various ways (the list of scripts is
-  available via 'perf trace -l').  The following variants allow you to
+  available via 'perf script -l').  The following variants allow you to
   record and run those scripts:
 
-  'perf trace record <script> <command>' to record the events required
-  for 'perf trace report'.  <script> is the name displayed in the
-  output of 'perf trace --list' i.e. the actual script name minus any
+  'perf script record <script> <command>' to record the events required
+  for 'perf script report'.  <script> is the name displayed in the
+  output of 'perf script --list' i.e. the actual script name minus any
   language extension.  If <command> is not specified, the events are
   recorded using the -a (system-wide) 'perf record' option.
 
-  'perf trace report <script> [args]' to run and display the results
+  'perf script report <script> [args]' to run and display the results
   of <script>.  <script> is the name displayed in the output of 'perf
   trace --list' i.e. the actual script name minus any language
-  extension.  The perf.data output from a previous run of 'perf trace
+  extension.  The perf.data output from a previous run of 'perf script
   record <script>' is used and should be present for this command to
   succeed.  [args] refers to the (mainly optional) args expected by
   the script.
 
-  'perf trace <script> <required-script-args> <command>' to both
+  'perf script <script> <required-script-args> <command>' to both
   record the events required for <script> and to run the <script>
   using 'live-mode' i.e. without writing anything to disk.  <script>
-  is the name displayed in the output of 'perf trace --list' i.e. the
+  is the name displayed in the output of 'perf script --list' i.e. the
   actual script name minus any language extension.  If <command> is
   not specified, the events are recorded using the -a (system-wide)
   'perf record' option.  If <script> has any required args, they
   should be specified before <command>.  This mode doesn't allow for
   optional script args to be specified; if optional script args are
-  desired, they can be specified using separate 'perf trace record'
-  and 'perf trace report' commands, with the stdout of the record step
+  desired, they can be specified using separate 'perf script record'
+  and 'perf script report' commands, with the stdout of the record step
   piped to the stdin of the report script, using the '-o -' and '-i -'
   options of the corresponding commands.
 
-  'perf trace <top-script>' to both record the events required for
+  'perf script <top-script>' to both record the events required for
   <top-script> and to run the <top-script> using 'live-mode'
   i.e. without writing anything to disk.  <top-script> is the name
-  displayed in the output of 'perf trace --list' i.e. the actual
+  displayed in the output of 'perf script --list' i.e. the actual
   script name minus any language extension; a <top-script> is defined
   as any script name ending with the string 'top'.
 
-  [<record-options>] can be passed to the record steps of 'perf trace
+  [<record-options>] can be passed to the record steps of 'perf script
   record' and 'live-mode' variants; this isn't possible however for
-  <top-script> 'live-mode' or 'perf trace report' variants.
+  <top-script> 'live-mode' or 'perf script report' variants.
 
   See the 'SEE ALSO' section for links to language-specific
   information on how to write and run your own trace scripts.
@@ -76,7 +76,7 @@
 	Any command you can specify in a shell.
 
 -D::
---dump-raw-trace=::
+--dump-raw-script=::
         Display verbose dump of the trace data.
 
 -L::
@@ -95,7 +95,7 @@
 
 -g::
 --gen-script=::
-        Generate perf-trace.[ext] starter script for given language,
+        Generate perf-script.[ext] starter script for given language,
         using current perf.data.
 
 -a::
@@ -104,8 +104,15 @@
         normally don't - this option allows the latter to be run in
         system-wide mode.
 
+-i::
+--input=::
+        Input file name.
+
+-d::
+--debug-mode::
+        Do various checks like samples ordering and lost events.
 
 SEE ALSO
 --------
-linkperf:perf-record[1], linkperf:perf-trace-perl[1],
-linkperf:perf-trace-python[1]
+linkperf:perf-record[1], linkperf:perf-script-perl[1],
+linkperf:perf-script-python[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4b3a2d4..b6da7af 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -8,8 +8,8 @@
 SYNOPSIS
 --------
 [verse]
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command>
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
 
 DESCRIPTION
 -----------
@@ -35,24 +35,54 @@
         child tasks do not inherit counters
 -p::
 --pid=<pid>::
-        stat events on existing pid
+        stat events on existing process id
+
+-t::
+--tid=<tid>::
+        stat events on existing thread id
+
 
 -a::
-        system-wide collection
+--all-cpus::
+        system-wide collection from all CPUs
 
 -c::
-        scale counter values
+--scale::
+	scale/normalize counter values
+
+-r::
+--repeat=<n>::
+	repeat command and print average + stddev (max: 100)
 
 -B::
+--big-num::
         print large numbers with thousands' separators according to locale
 
 -C::
 --cpu=::
-Count only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Count only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode, this option is ignored. The -a option is still necessary
 to activate system-wide monitoring. Default is to count on all CPUs.
 
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
+This option is only valid in system-wide mode.
+
+-n::
+--null::
+        null run - don't start any counters
+
+-v::
+--verbose::
+        be more verbose (show counter open errors, etc)
+
+-x SEP::
+--field-separator SEP::
+print counts using a CSV-style output to make it easy to import directly into
+spreadsheets. Columns are separated by the string specified in SEP.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index 1c4b5f5..2c3b462 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -12,7 +12,7 @@
 
 DESCRIPTION
 -----------
-This command does assorted sanity tests, initially thru linked routines but
+This command does assorted sanity tests, initially through linked routines but
 also will look for a directory with more tests in the form of scripts.
 
 OPTIONS
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index 4b17883..d7b79e2 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -38,6 +38,8 @@
 --process::
         Select the processes to display, by name or PID
 
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
 
 SEE ALSO
 --------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 1f96876..f6eb1cd 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -12,7 +12,7 @@
 
 DESCRIPTION
 -----------
-This command generates and displays a performance counter profile in realtime.
+This command generates and displays a performance counter profile in real time.
 
 
 OPTIONS
@@ -27,8 +27,8 @@
 
 -C <cpu-list>::
 --cpu=<cpu>::
-Monitor only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 Default is to monitor all CPUS.
 
 -d <seconds>::
@@ -50,6 +50,10 @@
 --count-filter=<count>::
 	Only display functions with more events than this.
 
+-g::
+--group::
+        Put the counters into a counter group.
+
 -F <freq>::
 --freq=<freq>::
 	Profile at this frequency.
@@ -68,7 +72,11 @@
 
 -p <pid>::
 --pid=<pid>::
-	Profile events on existing pid.
+	Profile events on existing Process ID.
+
+-t <tid>::
+--tid=<tid>::
+        Profile events on existing thread ID.
 
 -r <priority>::
 --realtime=<priority>::
@@ -78,6 +86,18 @@
 --sym-annotate=<symbol>::
         Annotate this symbol.
 
+-K::
+--hide_kernel_symbols::
+        Hide kernel symbols.
+
+-U::
+--hide_user_symbols::
+        Hide user symbols.
+
+-D::
+--dump-symtab::
+        Dump the symbol table used for profiling.
+
 -v::
 --verbose::
 	Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 8c7fc0c..c12659d 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -7,6 +7,7 @@
 lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
+arch/*/lib/memcpy*.S
 include/linux/poison.h
 include/linux/magic.h
 include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d1db0f6..ac6692c 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -185,7 +185,10 @@
         ARCH := x86
 endif
 ifeq ($(ARCH),x86_64)
+	RAW_ARCH := x86_64
         ARCH := x86
+	ARCH_CFLAGS := -DARCH_X86_64
+	ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
 endif
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -375,6 +378,7 @@
 LIB_H += util/include/linux/rbtree.h
 LIB_H += util/include/linux/string.h
 LIB_H += util/include/linux/types.h
+LIB_H += util/include/linux/linkage.h
 LIB_H += util/include/asm/asm-offsets.h
 LIB_H += util/include/asm/bug.h
 LIB_H += util/include/asm/byteorder.h
@@ -383,6 +387,8 @@
 LIB_H += util/include/asm/system.h
 LIB_H += util/include/asm/uaccess.h
 LIB_H += util/include/dwarf-regs.h
+LIB_H += util/include/asm/dwarf2.h
+LIB_H += util/include/asm/cpufeature.h
 LIB_H += perf.h
 LIB_H += util/cache.h
 LIB_H += util/callchain.h
@@ -417,6 +423,7 @@
 LIB_H += util/probe-event.h
 LIB_H += util/pstack.h
 LIB_H += util/cpumap.h
+LIB_H += $(ARCH_INCLUDE)
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
 LIB_OBJS += $(OUTPUT)util/alias.o
@@ -472,6 +479,9 @@
 # Benchmark modules
 BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
 BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(RAW_ARCH),x86_64)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+endif
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
@@ -485,7 +495,7 @@
 BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
 BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
 BUILTIN_OBJS += $(OUTPUT)builtin-top.o
-BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
+BUILTIN_OBJS += $(OUTPUT)builtin-script.o
 BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
 BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
 BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
@@ -507,7 +517,7 @@
 -include config.mak
 
 ifndef NO_DWARF
-FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
+FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
 ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
 	msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
 	NO_DWARF := 1
@@ -554,7 +564,7 @@
 ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
 	msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
 else
-	BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT
+	BASIC_CFLAGS += -DDWARF_SUPPORT
 	EXTLIBS += -lelf -ldw
 	LIB_OBJS += $(OUTPUT)util/probe-finder.o
 endif # PERF_HAVE_DWARF_REGS
@@ -891,13 +901,14 @@
 SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
 PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
 
-LIBS = $(PERFLIBS) $(EXTLIBS)
+LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
 
 BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
 	$(COMPAT_CFLAGS)
 LIB_OBJS += $(COMPAT_OBJS)
 
 ALL_CFLAGS += $(BASIC_CFLAGS)
+ALL_CFLAGS += $(ARCH_CFLAGS)
 ALL_LDFLAGS += $(BASIC_LDFLAGS)
 
 export TAR INSTALL DESTDIR SHELL_PATH
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644
index 0000000..a72e36c
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc)		\
+	extern void *fn(void *, const void *, size_t);
+
+#include "mem-memcpy-x86-64-asm-def.h"
+
+#undef MEMCPY_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644
index 0000000..d588b87
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -0,0 +1,4 @@
+
+MEMCPY_FN(__memcpy,
+	"x86-64-unrolled",
+	"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644
index 0000000..a57b66e
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -0,0 +1,2 @@
+
+#include "../../../arch/x86/lib/memcpy_64.S"
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 38dae74..db82021 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -12,6 +12,7 @@
 #include "../util/parse-options.h"
 #include "../util/header.h"
 #include "bench.h"
+#include "mem-memcpy-arch.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -23,8 +24,10 @@
 
 static const char	*length_str	= "1MB";
 static const char	*routine	= "default";
-static bool		use_clock	= false;
+static bool		use_clock;
 static int		clock_fd;
+static bool		only_prefault;
+static bool		no_prefault;
 
 static const struct option options[] = {
 	OPT_STRING('l', "length", &length_str, "1MB",
@@ -34,19 +37,33 @@
 		    "Specify routine to copy"),
 	OPT_BOOLEAN('c', "clock", &use_clock,
 		    "Use CPU clock for measuring"),
+	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+		    "Show only the result with page faults before memcpy()"),
+	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+		    "Show only the result without page faults before memcpy()"),
 	OPT_END()
 };
 
+typedef void *(*memcpy_t)(void *, const void *, size_t);
+
 struct routine {
 	const char *name;
 	const char *desc;
-	void * (*fn)(void *dst, const void *src, size_t len);
+	memcpy_t fn;
 };
 
 struct routine routines[] = {
 	{ "default",
 	  "Default memcpy() provided by glibc",
 	  memcpy },
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memcpy-x86-64-asm-def.h"
+#undef MEMCPY_FN
+
+#endif
+
 	{ NULL,
 	  NULL,
 	  NULL   }
@@ -89,29 +106,98 @@
 		(double)ts->tv_usec / (double)1000000;
 }
 
+static void alloc_mem(void **dst, void **src, size_t length)
+{
+	*dst = zalloc(length);
+	if (!dst)
+		die("memory allocation failed - maybe length is too large?\n");
+
+	*src = zalloc(length);
+	if (!src)
+		die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
+{
+	u64 clock_start = 0ULL, clock_end = 0ULL;
+	void *src = NULL, *dst = NULL;
+
+	alloc_mem(&src, &dst, len);
+
+	if (prefault)
+		fn(dst, src, len);
+
+	clock_start = get_clock();
+	fn(dst, src, len);
+	clock_end = get_clock();
+
+	free(src);
+	free(dst);
+	return clock_end - clock_start;
+}
+
+static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	void *src = NULL, *dst = NULL;
+
+	alloc_mem(&src, &dst, len);
+
+	if (prefault)
+		fn(dst, src, len);
+
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	fn(dst, src, len);
+	BUG_ON(gettimeofday(&tv_end, NULL));
+
+	timersub(&tv_end, &tv_start, &tv_diff);
+
+	free(src);
+	free(dst);
+	return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do {					\
+		if (x < K)					\
+			printf(" %14lf B/Sec", x);		\
+		else if (x < K * K)				\
+			printf(" %14lfd KB/Sec", x / K);	\
+		else if (x < K * K * K)				\
+			printf(" %14lf MB/Sec", x / K / K);	\
+		else						\
+			printf(" %14lf GB/Sec", x / K / K / K); \
+	} while (0)
+
 int bench_mem_memcpy(int argc, const char **argv,
 		     const char *prefix __used)
 {
 	int i;
-	void *dst, *src;
-	size_t length;
-	double bps = 0.0;
-	struct timeval tv_start, tv_end, tv_diff;
-	u64 clock_start, clock_end, clock_diff;
+	size_t len;
+	double result_bps[2];
+	u64 result_clock[2];
 
-	clock_start = clock_end = clock_diff = 0ULL;
 	argc = parse_options(argc, argv, options,
 			     bench_mem_memcpy_usage, 0);
 
-	tv_diff.tv_sec = 0;
-	tv_diff.tv_usec = 0;
-	length = (size_t)perf_atoll((char *)length_str);
+	if (use_clock)
+		init_clock();
 
-	if ((s64)length <= 0) {
+	len = (size_t)perf_atoll((char *)length_str);
+
+	result_clock[0] = result_clock[1] = 0ULL;
+	result_bps[0] = result_bps[1] = 0.0;
+
+	if ((s64)len <= 0) {
 		fprintf(stderr, "Invalid length:%s\n", length_str);
 		return 1;
 	}
 
+	/* same to without specifying either of prefault and no-prefault */
+	if (only_prefault && no_prefault)
+		only_prefault = no_prefault = false;
+
 	for (i = 0; routines[i].name; i++) {
 		if (!strcmp(routines[i].name, routine))
 			break;
@@ -126,61 +212,80 @@
 		return 1;
 	}
 
-	dst = zalloc(length);
-	if (!dst)
-		die("memory allocation failed - maybe length is too large?\n");
+	if (bench_format == BENCH_FORMAT_DEFAULT)
+		printf("# Copying %s Bytes ...\n\n", length_str);
 
-	src = zalloc(length);
-	if (!src)
-		die("memory allocation failed - maybe length is too large?\n");
-
-	if (bench_format == BENCH_FORMAT_DEFAULT) {
-		printf("# Copying %s Bytes from %p to %p ...\n\n",
-		       length_str, src, dst);
-	}
-
-	if (use_clock) {
-		init_clock();
-		clock_start = get_clock();
+	if (!only_prefault && !no_prefault) {
+		/* show both of results */
+		if (use_clock) {
+			result_clock[0] =
+				do_memcpy_clock(routines[i].fn, len, false);
+			result_clock[1] =
+				do_memcpy_clock(routines[i].fn, len, true);
+		} else {
+			result_bps[0] =
+				do_memcpy_gettimeofday(routines[i].fn,
+						len, false);
+			result_bps[1] =
+				do_memcpy_gettimeofday(routines[i].fn,
+						len, true);
+		}
 	} else {
-		BUG_ON(gettimeofday(&tv_start, NULL));
-	}
-
-	routines[i].fn(dst, src, length);
-
-	if (use_clock) {
-		clock_end = get_clock();
-		clock_diff = clock_end - clock_start;
-	} else {
-		BUG_ON(gettimeofday(&tv_end, NULL));
-		timersub(&tv_end, &tv_start, &tv_diff);
-		bps = (double)((double)length / timeval2double(&tv_diff));
+		if (use_clock) {
+			result_clock[pf] =
+				do_memcpy_clock(routines[i].fn,
+						len, only_prefault);
+		} else {
+			result_bps[pf] =
+				do_memcpy_gettimeofday(routines[i].fn,
+						len, only_prefault);
+		}
 	}
 
 	switch (bench_format) {
 	case BENCH_FORMAT_DEFAULT:
-		if (use_clock) {
-			printf(" %14lf Clock/Byte\n",
-			       (double)clock_diff / (double)length);
-		} else {
-			if (bps < K)
-				printf(" %14lf B/Sec\n", bps);
-			else if (bps < K * K)
-				printf(" %14lfd KB/Sec\n", bps / 1024);
-			else if (bps < K * K * K)
-				printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
-			else {
-				printf(" %14lf GB/Sec\n",
-				       bps / 1024 / 1024 / 1024);
+		if (!only_prefault && !no_prefault) {
+			if (use_clock) {
+				printf(" %14lf Clock/Byte\n",
+					(double)result_clock[0]
+					/ (double)len);
+				printf(" %14lf Clock/Byte (with prefault)\n",
+					(double)result_clock[1]
+					/ (double)len);
+			} else {
+				print_bps(result_bps[0]);
+				printf("\n");
+				print_bps(result_bps[1]);
+				printf(" (with prefault)\n");
 			}
+		} else {
+			if (use_clock) {
+				printf(" %14lf Clock/Byte",
+					(double)result_clock[pf]
+					/ (double)len);
+			} else
+				print_bps(result_bps[pf]);
+
+			printf("%s\n", only_prefault ? " (with prefault)" : "");
 		}
 		break;
 	case BENCH_FORMAT_SIMPLE:
-		if (use_clock) {
-			printf("%14lf\n",
-			       (double)clock_diff / (double)length);
-		} else
-			printf("%lf\n", bps);
+		if (!only_prefault && !no_prefault) {
+			if (use_clock) {
+				printf("%lf %lf\n",
+					(double)result_clock[0] / (double)len,
+					(double)result_clock[1] / (double)len);
+			} else {
+				printf("%lf %lf\n",
+					result_bps[0], result_bps[1]);
+			}
+		} else {
+			if (use_clock) {
+				printf("%lf\n", (double)result_clock[pf]
+					/ (double)len);
+			} else
+				printf("%lf\n", result_bps[pf]);
+		}
 		break;
 	default:
 		/* reaching this means there's some disaster: */
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6d5604d8d..c056cdc 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -58,12 +58,12 @@
 	return hist_entry__inc_addr_samples(he, al->addr);
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+				struct perf_session *session)
 {
 	struct addr_location al;
-	struct sample_data data;
 
-	if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+	if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;
@@ -375,6 +375,8 @@
 	.mmap	= event__process_mmap,
 	.comm	= event__process_comm,
 	.fork	= event__process_task,
+	.ordered_samples = true,
+	.ordering_requires_timestamps = true,
 };
 
 static int __cmd_annotate(void)
@@ -382,7 +384,7 @@
 	int ret;
 	struct perf_session *session;
 
-	session = perf_session__new(input_name, O_RDONLY, force, false);
+	session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
 	if (session == NULL)
 		return -ENOMEM;
 
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 44a47e1..3b06f9c 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -39,7 +39,8 @@
 	int err = -1;
 	struct perf_session *session;
 
-	session = perf_session__new(input_name, O_RDONLY, force, false);
+	session = perf_session__new(input_name, O_RDONLY, force, false,
+				    &build_id__mark_dso_hit_ops);
 	if (session == NULL)
 		return -1;
 
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index fca1d44..3153e49 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -30,12 +30,13 @@
 	return -ENOMEM;
 }
 
-static int diff__process_sample_event(event_t *event, struct perf_session *session)
+static int diff__process_sample_event(event_t *event,
+				      struct sample_data *sample,
+				      struct perf_session *session)
 {
 	struct addr_location al;
-	struct sample_data data = { .period = 1, };
 
-	if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+	if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;
@@ -44,12 +45,12 @@
 	if (al.filtered || al.sym == NULL)
 		return 0;
 
-	if (hists__add_entry(&session->hists, &al, data.period)) {
+	if (hists__add_entry(&session->hists, &al, sample->period)) {
 		pr_warning("problem incrementing symbol period, skipping event\n");
 		return -1;
 	}
 
-	session->hists.stats.total_period += data.period;
+	session->hists.stats.total_period += sample->period;
 	return 0;
 }
 
@@ -60,6 +61,8 @@
 	.exit	= event__process_task,
 	.fork	= event__process_task,
 	.lost	= event__process_lost,
+	.ordered_samples = true,
+	.ordering_requires_timestamps = true,
 };
 
 static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -141,8 +144,8 @@
 	int ret, i;
 	struct perf_session *session[2];
 
-	session[0] = perf_session__new(input_old, O_RDONLY, force, false);
-	session[1] = perf_session__new(input_new, O_RDONLY, force, false);
+	session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops);
+	session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops);
 	if (session[0] == NULL || session[1] == NULL)
 		return -ENOMEM;
 
@@ -173,7 +176,7 @@
 static const struct option options[] = {
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('m', "displacement", &show_displacement,
+	OPT_BOOLEAN('M', "displacement", &show_displacement,
 		    "Show position displacement relative to baseline"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
@@ -191,6 +194,8 @@
 	OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
 		   "separator for columns, no spaces will be added between "
 		   "columns '.' is reserved."),
+	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+		    "Look for files with symbols relative to this directory"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 8e3e47b..0c78ffa 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,8 +16,8 @@
 static char		const *input_name = "-";
 static bool		inject_build_ids;
 
-static int event__repipe(event_t *event __used,
-			 struct perf_session *session __used)
+static int event__repipe_synth(event_t *event,
+			       struct perf_session *session __used)
 {
 	uint32_t size;
 	void *buf = event;
@@ -36,22 +36,30 @@
 	return 0;
 }
 
-static int event__repipe_mmap(event_t *self, struct perf_session *session)
+static int event__repipe(event_t *event, struct sample_data *sample __used,
+			 struct perf_session *session)
+{
+	return event__repipe_synth(event, session);
+}
+
+static int event__repipe_mmap(event_t *self, struct sample_data *sample,
+			      struct perf_session *session)
 {
 	int err;
 
-	err = event__process_mmap(self, session);
-	event__repipe(self, session);
+	err = event__process_mmap(self, sample, session);
+	event__repipe(self, sample, session);
 
 	return err;
 }
 
-static int event__repipe_task(event_t *self, struct perf_session *session)
+static int event__repipe_task(event_t *self, struct sample_data *sample,
+			      struct perf_session *session)
 {
 	int err;
 
-	err = event__process_task(self, session);
-	event__repipe(self, session);
+	err = event__process_task(self, sample, session);
+	event__repipe(self, sample, session);
 
 	return err;
 }
@@ -61,7 +69,7 @@
 {
 	int err;
 
-	event__repipe(self, session);
+	event__repipe_synth(self, session);
 	err = event__process_tracing_data(self, session);
 
 	return err;
@@ -111,7 +119,8 @@
 	return 0;
 }
 
-static int event__inject_buildid(event_t *event, struct perf_session *session)
+static int event__inject_buildid(event_t *event, struct sample_data *sample,
+				 struct perf_session *session)
 {
 	struct addr_location al;
 	struct thread *thread;
@@ -146,7 +155,7 @@
 	}
 
 repipe:
-	event__repipe(event, session);
+	event__repipe(event, sample, session);
 	return 0;
 }
 
@@ -160,10 +169,10 @@
 	.read		= event__repipe,
 	.throttle	= event__repipe,
 	.unthrottle	= event__repipe,
-	.attr		= event__repipe,
-	.event_type 	= event__repipe,
-	.tracing_data 	= event__repipe,
-	.build_id 	= event__repipe,
+	.attr		= event__repipe_synth,
+	.event_type 	= event__repipe_synth,
+	.tracing_data 	= event__repipe_synth,
+	.build_id 	= event__repipe_synth,
 };
 
 extern volatile int session_done;
@@ -187,7 +196,7 @@
 		inject_ops.tracing_data	= event__repipe_tracing_data;
 	}
 
-	session = perf_session__new(input_name, O_RDONLY, false, true);
+	session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
 	if (session == NULL)
 		return -ENOMEM;
 
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 31f60a2..def7ddc 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -304,22 +304,11 @@
 	}
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+				struct perf_session *session)
 {
-	struct sample_data data;
-	struct thread *thread;
+	struct thread *thread = perf_session__findnew(session, event->ip.pid);
 
-	memset(&data, 0, sizeof(data));
-	data.time = -1;
-	data.cpu = -1;
-	data.period = 1;
-
-	event__parse_sample(event, session->sample_type, &data);
-
-	dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-		    data.pid, data.tid, data.ip, data.period);
-
-	thread = perf_session__findnew(session, event->ip.pid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
@@ -328,8 +317,8 @@
 
 	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-	process_raw_event(event, data.raw_data, data.cpu,
-			  data.time, thread);
+	process_raw_event(event, sample->raw_data, sample->cpu,
+			  sample->time, thread);
 
 	return 0;
 }
@@ -492,7 +481,8 @@
 static int __cmd_kmem(void)
 {
 	int err = -EINVAL;
-	struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+							 0, false, &event_ops);
 	if (session == NULL)
 		return -ENOMEM;
 
@@ -747,6 +737,9 @@
 	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[i] = strdup(record_args[i]);
 
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 821c158..b9c6e54 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -834,22 +834,18 @@
 		die("Unknown type of information\n");
 }
 
-static int process_sample_event(event_t *self, struct perf_session *s)
+static int process_sample_event(event_t *self, struct sample_data *sample,
+				struct perf_session *s)
 {
-	struct sample_data data;
-	struct thread *thread;
+	struct thread *thread = perf_session__findnew(s, sample->tid);
 
-	bzero(&data, sizeof(data));
-	event__parse_sample(self, s->sample_type, &data);
-
-	thread = perf_session__findnew(s, data.tid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			self->header.type);
 		return -1;
 	}
 
-	process_raw_event(data.raw_data, data.cpu, data.time, thread);
+	process_raw_event(sample->raw_data, sample->cpu, sample->time, thread);
 
 	return 0;
 }
@@ -862,7 +858,7 @@
 
 static int read_events(void)
 {
-	session = perf_session__new(input_name, O_RDONLY, 0, false);
+	session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
 	if (!session)
 		die("Initializing perf session failed\n");
 
@@ -947,6 +943,9 @@
 	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[i] = strdup(record_args[i]);
 
@@ -982,9 +981,9 @@
 				usage_with_options(report_usage, report_options);
 		}
 		__cmd_report();
-	} else if (!strcmp(argv[0], "trace")) {
-		/* Aliased to 'perf trace' */
-		return cmd_trace(argc, argv, prefix);
+	} else if (!strcmp(argv[0], "script")) {
+		/* Aliased to 'perf script' */
+		return cmd_script(argc, argv, prefix);
 	} else if (!strcmp(argv[0], "info")) {
 		if (argc) {
 			argc = parse_options(argc, argv,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 564491f..50efbd5 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -36,6 +36,7 @@
 
 static u64			user_interval			= ULLONG_MAX;
 static u64			default_interval		=      0;
+static u64			sample_type;
 
 static int			nr_cpus				=      0;
 static unsigned int		page_size;
@@ -48,6 +49,7 @@
 static int			group				=      0;
 static int			realtime_prio			=      0;
 static bool			raw_samples			=  false;
+static bool			sample_id_all_avail		=   true;
 static bool			system_wide			=  false;
 static pid_t			target_pid			=     -1;
 static pid_t			target_tid			=     -1;
@@ -60,7 +62,9 @@
 static bool			inherit_stat			=  false;
 static bool			no_samples			=  false;
 static bool			sample_address			=  false;
+static bool			sample_time			=  false;
 static bool			no_buildid			=  false;
+static bool			no_buildid_cache		=  false;
 
 static long			samples				=      0;
 static u64			bytes_written			=      0;
@@ -128,6 +132,7 @@
 }
 
 static int process_synthesized_event(event_t *event,
+				     struct sample_data *sample __used,
 				     struct perf_session *self __used)
 {
 	write_output(event, event->header.size);
@@ -238,6 +243,19 @@
 		u64 time_running;
 		u64 id;
 	} read_data;
+	/*
+ 	 * Check if parse_single_tracepoint_event has already asked for
+ 	 * PERF_SAMPLE_TIME.
+ 	 *
+	 * XXX this is kludgy but short term fix for problems introduced by
+	 * eac23d1c that broke 'perf script' by having different sample_types
+	 * when using multiple tracepoint events when we use a perf binary
+	 * that tries to use sample_id_all on an older kernel.
+ 	 *
+ 	 * We need to move counter creation to perf_session, support
+ 	 * different sample_types, etc.
+ 	 */
+	bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
 
 	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
 				  PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -280,6 +298,10 @@
 	if (system_wide)
 		attr->sample_type	|= PERF_SAMPLE_CPU;
 
+	if (sample_id_all_avail &&
+	    (sample_time || system_wide || !no_inherit || cpu_list))
+		attr->sample_type	|= PERF_SAMPLE_TIME;
+
 	if (raw_samples) {
 		attr->sample_type	|= PERF_SAMPLE_TIME;
 		attr->sample_type	|= PERF_SAMPLE_RAW;
@@ -293,6 +315,8 @@
 		attr->disabled = 1;
 		attr->enable_on_exec = 1;
 	}
+retry_sample_id:
+	attr->sample_id_all = sample_id_all_avail ? 1 : 0;
 
 	for (thread_index = 0; thread_index < thread_num; thread_index++) {
 try_again:
@@ -309,6 +333,15 @@
 			else if (err ==  ENODEV && cpu_list) {
 				die("No such device - did you specify"
 					" an out-of-range profile CPU?\n");
+			} else if (err == EINVAL && sample_id_all_avail) {
+				/*
+				 * Old kernel, no attr->sample_id_type_all field
+				 */
+				sample_id_all_avail = false;
+				if (!sample_time && !raw_samples && !time_needed)
+					attr->sample_type &= ~PERF_SAMPLE_TIME;
+
+				goto retry_sample_id;
 			}
 
 			/*
@@ -326,7 +359,7 @@
 				goto try_again;
 			}
 			printf("\n");
-			error("perfcounter syscall returned with %d (%s)\n",
+			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
 					fd[nr_cpu][counter][thread_index], strerror(err));
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -406,6 +439,9 @@
 			}
 		}
 	}
+
+	if (!sample_type)
+		sample_type = attr->sample_type;
 }
 
 static void open_counters(int cpu)
@@ -437,7 +473,8 @@
 	if (!pipe_output) {
 		session->header.data_size += bytes_written;
 
-		process_buildids();
+		if (!no_buildid)
+			process_buildids();
 		perf_header__write(&session->header, output, true);
 		perf_session__delete(session);
 		symbol__exit();
@@ -552,12 +589,15 @@
 	}
 
 	session = perf_session__new(output_name, O_WRONLY,
-				    write_mode == WRITE_FORCE, false);
+				    write_mode == WRITE_FORCE, false, NULL);
 	if (session == NULL) {
 		pr_err("Not enough memory for reading perf file header\n");
 		return -1;
 	}
 
+	if (!no_buildid)
+		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
+
 	if (!file_new) {
 		err = perf_header__read(session, output);
 		if (err < 0)
@@ -639,6 +679,8 @@
 			open_counters(cpumap[i]);
 	}
 
+	perf_session__set_sample_type(session, sample_type);
+
 	if (pipe_output) {
 		err = perf_header__write_pipe(output);
 		if (err < 0)
@@ -651,6 +693,8 @@
 
 	post_processing_offset = lseek(output, 0, SEEK_CUR);
 
+	perf_session__set_sample_id_all(session, sample_id_all_avail);
+
 	if (pipe_output) {
 		err = event__synthesize_attrs(&session->header,
 					      process_synthesized_event,
@@ -831,10 +875,13 @@
 		    "per thread counts"),
 	OPT_BOOLEAN('d', "data", &sample_address,
 		    "Sample addresses"),
+	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
 	OPT_BOOLEAN('n', "no-samples", &no_samples,
 		    "don't sample"),
-	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid,
+	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
 		    "do not update the buildid cache"),
+	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
+		    "do not collect buildids in perf.data"),
 	OPT_END()
 };
 
@@ -859,7 +906,8 @@
 	}
 
 	symbol__init();
-	if (no_buildid)
+
+	if (no_buildid_cache || no_buildid)
 		disable_buildid_cache();
 
 	if (!nr_counters) {
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5de405d..75183a4 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -150,13 +150,13 @@
 	return 0;
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+				struct perf_session *session)
 {
-	struct sample_data data = { .period = 1, };
 	struct addr_location al;
 	struct perf_event_attr *attr;
 
-	if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+	if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 			event->header.type);
 		return -1;
@@ -165,14 +165,14 @@
 	if (al.filtered || (hide_unresolved && al.sym == NULL))
 		return 0;
 
-	if (perf_session__add_hist_entry(session, &al, &data)) {
+	if (perf_session__add_hist_entry(session, &al, sample)) {
 		pr_debug("problem incrementing symbol period, skipping event\n");
 		return -1;
 	}
 
-	attr = perf_header__find_attr(data.id, &session->header);
+	attr = perf_header__find_attr(sample->id, &session->header);
 
-	if (add_event_total(session, &data, attr)) {
+	if (add_event_total(session, sample, attr)) {
 		pr_debug("problem adding event period\n");
 		return -1;
 	}
@@ -180,7 +180,8 @@
 	return 0;
 }
 
-static int process_read_event(event_t *event, struct perf_session *session __used)
+static int process_read_event(event_t *event, struct sample_data *sample __used,
+			      struct perf_session *session __used)
 {
 	struct perf_event_attr *attr;
 
@@ -243,6 +244,8 @@
 	.event_type = event__process_event_type,
 	.tracing_data = event__process_tracing_data,
 	.build_id = event__process_build_id,
+	.ordered_samples = true,
+	.ordering_requires_timestamps = true,
 };
 
 extern volatile int session_done;
@@ -307,7 +310,7 @@
 
 	signal(SIGINT, sig_handler);
 
-	session = perf_session__new(input_name, O_RDONLY, force, false);
+	session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
 	if (session == NULL)
 		return -ENOMEM;
 
@@ -442,6 +445,8 @@
 		    "dump raw trace in ASCII"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
+	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+		   "file", "kallsyms pathname"),
 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
 	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
 		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
@@ -478,6 +483,8 @@
 		   "columns '.' is reserved."),
 	OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
 		    "Only display entries resolved to a symbol"),
+	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+		    "Look for files with symbols relative to this directory"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 55f3b5d..7a4ebeb 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1606,25 +1606,15 @@
 		process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+				struct perf_session *session)
 {
-	struct sample_data data;
 	struct thread *thread;
 
 	if (!(session->sample_type & PERF_SAMPLE_RAW))
 		return 0;
 
-	memset(&data, 0, sizeof(data));
-	data.time = -1;
-	data.cpu = -1;
-	data.period = -1;
-
-	event__parse_sample(event, session->sample_type, &data);
-
-	dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-		    data.pid, data.tid, data.ip, data.period);
-
-	thread = perf_session__findnew(session, data.pid);
+	thread = perf_session__findnew(session, sample->pid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
@@ -1633,10 +1623,11 @@
 
 	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-	if (profile_cpu != -1 && profile_cpu != (int)data.cpu)
+	if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
 		return 0;
 
-	process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread);
+	process_raw_event(event, session, sample->raw_data, sample->cpu,
+			  sample->time, thread);
 
 	return 0;
 }
@@ -1652,7 +1643,8 @@
 static int read_events(void)
 {
 	int err = -EINVAL;
-	struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+							 0, false, &event_ops);
 	if (session == NULL)
 		return -ENOMEM;
 
@@ -1869,6 +1861,9 @@
 	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+	if (rec_argv)
+		return -ENOMEM;
+
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[i] = strdup(record_args[i]);
 
@@ -1888,10 +1883,10 @@
 		usage_with_options(sched_usage, sched_options);
 
 	/*
-	 * Aliased to 'perf trace' for now:
+	 * Aliased to 'perf script' for now:
 	 */
-	if (!strcmp(argv[0], "trace"))
-		return cmd_trace(argc, argv, prefix);
+	if (!strcmp(argv[0], "script"))
+		return cmd_script(argc, argv, prefix);
 
 	symbol__init();
 	if (!strncmp(argv[0], "rec", 3)) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-script.c
similarity index 83%
rename from tools/perf/builtin-trace.c
rename to tools/perf/builtin-script.c
index 86cfe38..43480fd 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-script.c
@@ -56,29 +56,18 @@
 
 static int cleanup_scripting(void)
 {
-	pr_debug("\nperf trace script stopped\n");
+	pr_debug("\nperf script stopped\n");
 
 	return scripting_ops->stop_script();
 }
 
 static char const		*input_name = "perf.data";
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+				struct perf_session *session)
 {
-	struct sample_data data;
-	struct thread *thread;
+	struct thread *thread = perf_session__findnew(session, event->ip.pid);
 
-	memset(&data, 0, sizeof(data));
-	data.time = -1;
-	data.cpu = -1;
-	data.period = 1;
-
-	event__parse_sample(event, session->sample_type, &data);
-
-	dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-		    data.pid, data.tid, data.ip, data.period);
-
-	thread = perf_session__findnew(session, event->ip.pid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
@@ -87,13 +76,13 @@
 
 	if (session->sample_type & PERF_SAMPLE_RAW) {
 		if (debug_mode) {
-			if (data.time < last_timestamp) {
+			if (sample->time < last_timestamp) {
 				pr_err("Samples misordered, previous: %llu "
 					"this: %llu\n", last_timestamp,
-					data.time);
+					sample->time);
 				nr_unordered++;
 			}
-			last_timestamp = data.time;
+			last_timestamp = sample->time;
 			return 0;
 		}
 		/*
@@ -101,18 +90,19 @@
 		 * field, although it should be the same than this perf
 		 * event pid
 		 */
-		scripting_ops->process_event(data.cpu, data.raw_data,
-					     data.raw_size,
-					     data.time, thread->comm);
+		scripting_ops->process_event(sample->cpu, sample->raw_data,
+					     sample->raw_size,
+					     sample->time, thread->comm);
 	}
 
-	session->hists.stats.total_period += data.period;
+	session->hists.stats.total_period += sample->period;
 	return 0;
 }
 
 static u64 nr_lost;
 
-static int process_lost_event(event_t *event, struct perf_session *session __used)
+static int process_lost_event(event_t *event, struct sample_data *sample __used,
+			      struct perf_session *session __used)
 {
 	nr_lost += event->lost.lost;
 
@@ -127,6 +117,7 @@
 	.tracing_data = event__process_tracing_data,
 	.build_id = event__process_build_id,
 	.lost = process_lost_event,
+	.ordering_requires_timestamps = true,
 	.ordered_samples = true,
 };
 
@@ -137,7 +128,7 @@
 	session_done = 1;
 }
 
-static int __cmd_trace(struct perf_session *session)
+static int __cmd_script(struct perf_session *session)
 {
 	int ret;
 
@@ -247,7 +238,7 @@
 
 	fprintf(stderr, "\n");
 	fprintf(stderr, "Scripting language extensions (used in "
-		"perf trace -s [spec:]script.[spec]):\n\n");
+		"perf script -s [spec:]script.[spec]):\n\n");
 
 	list_for_each_entry(s, &script_specs, node)
 		fprintf(stderr, "  %-42s [%s]\n", s->spec, s->ops->name);
@@ -301,17 +292,34 @@
 	return 0;
 }
 
-#define for_each_lang(scripts_dir, lang_dirent, lang_next)		\
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+static int is_directory(const char *base_path, const struct dirent *dent)
+{
+	char path[PATH_MAX];
+	struct stat st;
+
+	sprintf(path, "%s/%s", base_path, dent->d_name);
+	if (stat(path, &st))
+		return 0;
+
+	return S_ISDIR(st.st_mode);
+}
+
+#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
 	while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) &&	\
 	       lang_next)						\
-		if (lang_dirent.d_type == DT_DIR &&			\
+		if ((lang_dirent.d_type == DT_DIR ||			\
+		     (lang_dirent.d_type == DT_UNKNOWN &&		\
+		      is_directory(scripts_path, &lang_dirent))) &&	\
 		    (strcmp(lang_dirent.d_name, ".")) &&		\
 		    (strcmp(lang_dirent.d_name, "..")))
 
-#define for_each_script(lang_dir, script_dirent, script_next)		\
+#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
 	while (!readdir_r(lang_dir, &script_dirent, &script_next) &&	\
 	       script_next)						\
-		if (script_dirent.d_type != DT_DIR)
+		if (script_dirent.d_type != DT_DIR &&			\
+		    (script_dirent.d_type != DT_UNKNOWN ||		\
+		     !is_directory(lang_path, &script_dirent)))
 
 
 #define RECORD_SUFFIX			"-record"
@@ -380,10 +388,10 @@
 	return NULL;
 }
 
-static char *ends_with(char *str, const char *suffix)
+static const char *ends_with(const char *str, const char *suffix)
 {
 	size_t suffix_len = strlen(suffix);
-	char *p = str;
+	const char *p = str;
 
 	if (strlen(str) > suffix_len) {
 		p = str + strlen(str) - suffix_len;
@@ -466,16 +474,16 @@
 	if (!scripts_dir)
 		return -1;
 
-	for_each_lang(scripts_dir, lang_dirent, lang_next) {
+	for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
 		snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
 			 lang_dirent.d_name);
 		lang_dir = opendir(lang_path);
 		if (!lang_dir)
 			continue;
 
-		for_each_script(lang_dir, script_dirent, script_next) {
+		for_each_script(lang_path, lang_dir, script_dirent, script_next) {
 			script_root = strdup(script_dirent.d_name);
-			str = ends_with(script_root, REPORT_SUFFIX);
+			str = (char *)ends_with(script_root, REPORT_SUFFIX);
 			if (str) {
 				*str = '\0';
 				desc = script_desc__findnew(script_root);
@@ -514,16 +522,16 @@
 	if (!scripts_dir)
 		return NULL;
 
-	for_each_lang(scripts_dir, lang_dirent, lang_next) {
+	for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
 		snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
 			 lang_dirent.d_name);
 		lang_dir = opendir(lang_path);
 		if (!lang_dir)
 			continue;
 
-		for_each_script(lang_dir, script_dirent, script_next) {
+		for_each_script(lang_path, lang_dir, script_dirent, script_next) {
 			__script_root = strdup(script_dirent.d_name);
-			str = ends_with(__script_root, suffix);
+			str = (char *)ends_with(__script_root, suffix);
 			if (str) {
 				*str = '\0';
 				if (strcmp(__script_root, script_root))
@@ -543,7 +551,7 @@
 
 static bool is_top_script(const char *script_path)
 {
-	return ends_with((char *)script_path, "top") == NULL ? false : true;
+	return ends_with(script_path, "top") == NULL ? false : true;
 }
 
 static int has_required_arg(char *script_path)
@@ -569,12 +577,12 @@
 	return n_args;
 }
 
-static const char * const trace_usage[] = {
-	"perf trace [<options>]",
-	"perf trace [<options>] record <script> [<record-options>] <command>",
-	"perf trace [<options>] report <script> [script-args]",
-	"perf trace [<options>] <script> [<record-options>] <command>",
-	"perf trace [<options>] <top-script> [script-args]",
+static const char * const script_usage[] = {
+	"perf script [<options>]",
+	"perf script [<options>] record <script> [<record-options>] <command>",
+	"perf script [<options>] report <script> [script-args]",
+	"perf script [<options>] <script> [<record-options>] <command>",
+	"perf script [<options>] <top-script> [script-args]",
 	NULL
 };
 
@@ -591,7 +599,7 @@
 		     "script file name (lang:script name, script name, or *)",
 		     parse_scriptname),
 	OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
-		   "generate perf-trace.xx script in specified language"),
+		   "generate perf-script.xx script in specified language"),
 	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
 	OPT_BOOLEAN('d', "debug-mode", &debug_mode,
@@ -614,7 +622,7 @@
 	return argc != 0;
 }
 
-int cmd_trace(int argc, const char **argv, const char *prefix __used)
+int cmd_script(int argc, const char **argv, const char *prefix __used)
 {
 	char *rec_script_path = NULL;
 	char *rep_script_path = NULL;
@@ -626,7 +634,7 @@
 
 	setup_scripting();
 
-	argc = parse_options(argc, argv, options, trace_usage,
+	argc = parse_options(argc, argv, options, script_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 
 	if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
@@ -640,7 +648,7 @@
 		if (!rep_script_path) {
 			fprintf(stderr,
 				"Please specify a valid report script"
-				"(see 'perf trace -l' for listing)\n");
+				"(see 'perf script -l' for listing)\n");
 			return -1;
 		}
 	}
@@ -658,8 +666,8 @@
 
 		if (!rec_script_path && !rep_script_path) {
 			fprintf(stderr, " Couldn't find script %s\n\n See perf"
-				" trace -l for available scripts.\n", argv[0]);
-			usage_with_options(trace_usage, options);
+				" script -l for available scripts.\n", argv[0]);
+			usage_with_options(script_usage, options);
 		}
 
 		if (is_top_script(argv[0])) {
@@ -671,9 +679,9 @@
 			rec_args = (argc - 1) - rep_args;
 			if (rec_args < 0) {
 				fprintf(stderr, " %s script requires options."
-					"\n\n See perf trace -l for available "
+					"\n\n See perf script -l for available "
 					"scripts and options.\n", argv[0]);
-				usage_with_options(trace_usage, options);
+				usage_with_options(script_usage, options);
 			}
 		}
 
@@ -772,7 +780,7 @@
 	if (!script_name)
 		setup_pager();
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false);
+	session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops);
 	if (session == NULL)
 		return -ENOMEM;
 
@@ -806,7 +814,7 @@
 			return -1;
 		}
 
-		err = scripting_ops->generate_script("perf-trace");
+		err = scripting_ops->generate_script("perf-script");
 		goto out;
 	}
 
@@ -814,10 +822,10 @@
 		err = scripting_ops->start_script(script_name, argc, argv);
 		if (err)
 			goto out;
-		pr_debug("perf trace started with script %s\n\n", script_name);
+		pr_debug("perf script started with script %s\n\n", script_name);
 	}
 
-	err = __cmd_trace(session);
+	err = __cmd_script(session);
 
 	perf_session__delete(session);
 	cleanup_scripting();
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a6b4d44..7ff746d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -52,6 +52,8 @@
 #include <math.h>
 #include <locale.h>
 
+#define DEFAULT_SEPARATOR	" "
+
 static struct perf_event_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
@@ -75,20 +77,30 @@
 static int			run_count			=  1;
 static bool			no_inherit			= false;
 static bool			scale				=  true;
+static bool			no_aggr				= false;
 static pid_t			target_pid			= -1;
 static pid_t			target_tid			= -1;
 static pid_t			*all_tids			=  NULL;
 static int			thread_num			=  0;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
-static bool			big_num				=  false;
+static bool			big_num				=  true;
+static int			big_num_opt			=  -1;
 static const char		*cpu_list;
+static const char		*csv_sep			= NULL;
+static bool			csv_output			= false;
 
 
 static int			*fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static int			event_scaled[MAX_COUNTERS];
 
+static struct {
+	u64 val;
+	u64 ena;
+	u64 run;
+} cpu_counts[MAX_NR_CPUS][MAX_COUNTERS];
+
 static volatile int done = 0;
 
 struct stats
@@ -136,19 +148,19 @@
 }
 
 struct stats			event_res_stats[MAX_COUNTERS][3];
-struct stats			runtime_nsecs_stats;
+struct stats			runtime_nsecs_stats[MAX_NR_CPUS];
+struct stats			runtime_cycles_stats[MAX_NR_CPUS];
+struct stats			runtime_branches_stats[MAX_NR_CPUS];
 struct stats			walltime_nsecs_stats;
-struct stats			runtime_cycles_stats;
-struct stats			runtime_branches_stats;
 
 #define MATCH_EVENT(t, c, counter)			\
 	(attrs[counter].type == PERF_TYPE_##t &&	\
 	 attrs[counter].config == PERF_COUNT_##c)
 
 #define ERR_PERF_OPEN \
-"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
+"counter %d, sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information."
 
-static int create_perf_stat_counter(int counter)
+static int create_perf_stat_counter(int counter, bool *perm_err)
 {
 	struct perf_event_attr *attr = attrs + counter;
 	int thread;
@@ -164,11 +176,14 @@
 		for (cpu = 0; cpu < nr_cpus; cpu++) {
 			fd[cpu][counter][0] = sys_perf_event_open(attr,
 					-1, cpumap[cpu], -1, 0);
-			if (fd[cpu][counter][0] < 0)
-				pr_debug(ERR_PERF_OPEN, counter,
+			if (fd[cpu][counter][0] < 0) {
+				if (errno == EPERM || errno == EACCES)
+					*perm_err = true;
+				error(ERR_PERF_OPEN, counter,
 					 fd[cpu][counter][0], strerror(errno));
-			else
+			} else {
 				++ncreated;
+			}
 		}
 	} else {
 		attr->inherit = !no_inherit;
@@ -179,12 +194,15 @@
 		for (thread = 0; thread < thread_num; thread++) {
 			fd[0][counter][thread] = sys_perf_event_open(attr,
 				all_tids[thread], -1, -1, 0);
-			if (fd[0][counter][thread] < 0)
-				pr_debug(ERR_PERF_OPEN, counter,
+			if (fd[0][counter][thread] < 0) {
+				if (errno == EPERM || errno == EACCES)
+					*perm_err = true;
+				error(ERR_PERF_OPEN, counter,
 					 fd[0][counter][thread],
 					 strerror(errno));
-			else
+			} else {
 				++ncreated;
+			}
 		}
 	}
 
@@ -205,8 +223,9 @@
 
 /*
  * Read out the results of a single counter:
+ * aggregate counts across CPUs in system-wide mode
  */
-static void read_counter(int counter)
+static void read_counter_aggr(int counter)
 {
 	u64 count[3], single_count[3];
 	int cpu;
@@ -264,11 +283,58 @@
 	 * Save the full runtime - to allow normalization during printout:
 	 */
 	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
-		update_stats(&runtime_nsecs_stats, count[0]);
+		update_stats(&runtime_nsecs_stats[0], count[0]);
 	if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
-		update_stats(&runtime_cycles_stats, count[0]);
+		update_stats(&runtime_cycles_stats[0], count[0]);
 	if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
-		update_stats(&runtime_branches_stats, count[0]);
+		update_stats(&runtime_branches_stats[0], count[0]);
+}
+
+/*
+ * Read out the results of a single counter:
+ * do not aggregate counts across CPUs in system-wide mode
+ */
+static void read_counter(int counter)
+{
+	u64 count[3];
+	int cpu;
+	size_t res, nv;
+
+	count[0] = count[1] = count[2] = 0;
+
+	nv = scale ? 3 : 1;
+
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+
+		if (fd[cpu][counter][0] < 0)
+			continue;
+
+		res = read(fd[cpu][counter][0], count, nv * sizeof(u64));
+
+		assert(res == nv * sizeof(u64));
+
+		close(fd[cpu][counter][0]);
+		fd[cpu][counter][0] = -1;
+
+		if (scale) {
+			if (count[2] == 0) {
+				count[0] = 0;
+			} else if (count[2] < count[1]) {
+				count[0] = (unsigned long long)
+				((double)count[0] * count[1] / count[2] + 0.5);
+			}
+		}
+		cpu_counts[cpu][counter].val = count[0]; /* scaled count */
+		cpu_counts[cpu][counter].ena = count[1];
+		cpu_counts[cpu][counter].run = count[2];
+
+		if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
+			update_stats(&runtime_nsecs_stats[cpu], count[0]);
+		if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
+			update_stats(&runtime_cycles_stats[cpu], count[0]);
+		if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
+			update_stats(&runtime_branches_stats[cpu], count[0]);
+	}
 }
 
 static int run_perf_stat(int argc __used, const char **argv)
@@ -277,6 +343,7 @@
 	int status = 0;
 	int counter, ncreated = 0;
 	int child_ready_pipe[2], go_pipe[2];
+	bool perm_err = false;
 	const bool forks = (argc > 0);
 	char buf;
 
@@ -335,12 +402,15 @@
 	}
 
 	for (counter = 0; counter < nr_counters; counter++)
-		ncreated += create_perf_stat_counter(counter);
+		ncreated += create_perf_stat_counter(counter, &perm_err);
 
-	if (ncreated == 0) {
-		pr_err("No permission to collect %sstats.\n"
-		       "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n",
-		       system_wide ? "system-wide " : "");
+	if (ncreated < nr_counters) {
+		if (perm_err)
+			error("You may not have permission to collect %sstats.\n"
+			      "\t Consider tweaking"
+			      " /proc/sys/kernel/perf_event_paranoid or running as root.",
+			      system_wide ? "system-wide " : "");
+		die("Not all events could be opened.\n");
 		if (child_pid != -1)
 			kill(child_pid, SIGTERM);
 		return -1;
@@ -362,9 +432,13 @@
 
 	update_stats(&walltime_nsecs_stats, t1 - t0);
 
-	for (counter = 0; counter < nr_counters; counter++)
-		read_counter(counter);
-
+	if (no_aggr) {
+		for (counter = 0; counter < nr_counters; counter++)
+			read_counter(counter);
+	} else {
+		for (counter = 0; counter < nr_counters; counter++)
+			read_counter_aggr(counter);
+	}
 	return WEXITSTATUS(status);
 }
 
@@ -377,11 +451,21 @@
 			100 * stddev_stats(&event_res_stats[counter][0]) / avg);
 }
 
-static void nsec_printout(int counter, double avg)
+static void nsec_printout(int cpu, int counter, double avg)
 {
 	double msecs = avg / 1e6;
+	char cpustr[16] = { '\0', };
+	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
 
-	fprintf(stderr, " %18.6f  %-24s", msecs, event_name(counter));
+	if (no_aggr)
+		sprintf(cpustr, "CPU%*d%s",
+			csv_output ? 0 : -4,
+			cpumap[cpu], csv_sep);
+
+	fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter));
+
+	if (csv_output)
+		return;
 
 	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
 		fprintf(stderr, " # %10.3f CPUs ",
@@ -389,33 +473,49 @@
 	}
 }
 
-static void abs_printout(int counter, double avg)
+static void abs_printout(int cpu, int counter, double avg)
 {
 	double total, ratio = 0.0;
+	char cpustr[16] = { '\0', };
+	const char *fmt;
 
-	if (big_num)
-		fprintf(stderr, " %'18.0f  %-24s", avg, event_name(counter));
+	if (csv_output)
+		fmt = "%s%.0f%s%s";
+	else if (big_num)
+		fmt = "%s%'18.0f%s%-24s";
 	else
-		fprintf(stderr, " %18.0f  %-24s", avg, event_name(counter));
+		fmt = "%s%18.0f%s%-24s";
+
+	if (no_aggr)
+		sprintf(cpustr, "CPU%*d%s",
+			csv_output ? 0 : -4,
+			cpumap[cpu], csv_sep);
+	else
+		cpu = 0;
+
+	fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter));
+
+	if (csv_output)
+		return;
 
 	if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
-		total = avg_stats(&runtime_cycles_stats);
+		total = avg_stats(&runtime_cycles_stats[cpu]);
 
 		if (total)
 			ratio = avg / total;
 
 		fprintf(stderr, " # %10.3f IPC  ", ratio);
 	} else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
-			runtime_branches_stats.n != 0) {
-		total = avg_stats(&runtime_branches_stats);
+			runtime_branches_stats[cpu].n != 0) {
+		total = avg_stats(&runtime_branches_stats[cpu]);
 
 		if (total)
 			ratio = avg * 100 / total;
 
 		fprintf(stderr, " # %10.3f %%    ", ratio);
 
-	} else if (runtime_nsecs_stats.n != 0) {
-		total = avg_stats(&runtime_nsecs_stats);
+	} else if (runtime_nsecs_stats[cpu].n != 0) {
+		total = avg_stats(&runtime_nsecs_stats[cpu]);
 
 		if (total)
 			ratio = 1000.0 * avg / total;
@@ -426,22 +526,29 @@
 
 /*
  * Print out the results of a single counter:
+ * aggregated counts in system-wide mode
  */
-static void print_counter(int counter)
+static void print_counter_aggr(int counter)
 {
 	double avg = avg_stats(&event_res_stats[counter][0]);
 	int scaled = event_scaled[counter];
 
 	if (scaled == -1) {
-		fprintf(stderr, " %18s  %-24s\n",
-			"<not counted>", event_name(counter));
+		fprintf(stderr, "%*s%s%-24s\n",
+			csv_output ? 0 : 18,
+			"<not counted>", csv_sep, event_name(counter));
 		return;
 	}
 
 	if (nsec_counter(counter))
-		nsec_printout(counter, avg);
+		nsec_printout(-1, counter, avg);
 	else
-		abs_printout(counter, avg);
+		abs_printout(-1, counter, avg);
+
+	if (csv_output) {
+		fputc('\n', stderr);
+		return;
+	}
 
 	print_noise(counter, avg);
 
@@ -458,40 +565,91 @@
 	fprintf(stderr, "\n");
 }
 
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(int counter)
+{
+	u64 ena, run, val;
+	int cpu;
+
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		val = cpu_counts[cpu][counter].val;
+		ena = cpu_counts[cpu][counter].ena;
+		run = cpu_counts[cpu][counter].run;
+		if (run == 0 || ena == 0) {
+			fprintf(stderr, "CPU%*d%s%*s%s%-24s",
+				csv_output ? 0 : -4,
+				cpumap[cpu], csv_sep,
+				csv_output ? 0 : 18,
+				"<not counted>", csv_sep,
+				event_name(counter));
+
+			fprintf(stderr, "\n");
+			continue;
+		}
+
+		if (nsec_counter(counter))
+			nsec_printout(cpu, counter, val);
+		else
+			abs_printout(cpu, counter, val);
+
+		if (!csv_output) {
+			print_noise(counter, 1.0);
+
+			if (run != ena) {
+				fprintf(stderr, "  (scaled from %.2f%%)",
+					100.0 * run / ena);
+			}
+		}
+		fprintf(stderr, "\n");
+	}
+}
+
 static void print_stat(int argc, const char **argv)
 {
 	int i, counter;
 
 	fflush(stdout);
 
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Performance counter stats for ");
-	if(target_pid == -1 && target_tid == -1) {
-		fprintf(stderr, "\'%s", argv[0]);
-		for (i = 1; i < argc; i++)
-			fprintf(stderr, " %s", argv[i]);
-	} else if (target_pid != -1)
-		fprintf(stderr, "process id \'%d", target_pid);
-	else
-		fprintf(stderr, "thread id \'%d", target_tid);
+	if (!csv_output) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, " Performance counter stats for ");
+		if(target_pid == -1 && target_tid == -1) {
+			fprintf(stderr, "\'%s", argv[0]);
+			for (i = 1; i < argc; i++)
+				fprintf(stderr, " %s", argv[i]);
+		} else if (target_pid != -1)
+			fprintf(stderr, "process id \'%d", target_pid);
+		else
+			fprintf(stderr, "thread id \'%d", target_tid);
 
-	fprintf(stderr, "\'");
-	if (run_count > 1)
-		fprintf(stderr, " (%d runs)", run_count);
-	fprintf(stderr, ":\n\n");
+		fprintf(stderr, "\'");
+		if (run_count > 1)
+			fprintf(stderr, " (%d runs)", run_count);
+		fprintf(stderr, ":\n\n");
+	}
 
-	for (counter = 0; counter < nr_counters; counter++)
-		print_counter(counter);
+	if (no_aggr) {
+		for (counter = 0; counter < nr_counters; counter++)
+			print_counter(counter);
+	} else {
+		for (counter = 0; counter < nr_counters; counter++)
+			print_counter_aggr(counter);
+	}
 
-	fprintf(stderr, "\n");
-	fprintf(stderr, " %18.9f  seconds time elapsed",
-			avg_stats(&walltime_nsecs_stats)/1e9);
-	if (run_count > 1) {
-		fprintf(stderr, "   ( +- %7.3f%% )",
+	if (!csv_output) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, " %18.9f  seconds time elapsed",
+				avg_stats(&walltime_nsecs_stats)/1e9);
+		if (run_count > 1) {
+			fprintf(stderr, "   ( +- %7.3f%% )",
 				100*stddev_stats(&walltime_nsecs_stats) /
 				avg_stats(&walltime_nsecs_stats));
+		}
+		fprintf(stderr, "\n\n");
 	}
-	fprintf(stderr, "\n\n");
 }
 
 static volatile int signr = -1;
@@ -521,6 +679,13 @@
 	NULL
 };
 
+static int stat__set_big_num(const struct option *opt __used,
+			     const char *s __used, int unset)
+{
+	big_num_opt = unset ? 0 : 1;
+	return 0;
+}
+
 static const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
 		     "event selector. use 'perf list' to list available events",
@@ -541,10 +706,15 @@
 		    "repeat command and print average + stddev (max: 100)"),
 	OPT_BOOLEAN('n', "null", &null_run,
 		    "null run - dont start any counters"),
-	OPT_BOOLEAN('B', "big-num", &big_num,
-		    "print large numbers with thousands\' separators"),
+	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
+			   "print large numbers with thousands\' separators",
+			   stat__set_big_num),
 	OPT_STRING('C', "cpu", &cpu_list, "cpu",
 		    "list of cpus to monitor in system-wide"),
+	OPT_BOOLEAN('A', "no-aggr", &no_aggr,
+		    "disable CPU count aggregation"),
+	OPT_STRING('x', "field-separator", &csv_sep, "separator",
+		   "print counts with custom separator"),
 	OPT_END()
 };
 
@@ -557,11 +727,34 @@
 
 	argc = parse_options(argc, argv, options, stat_usage,
 		PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (csv_sep)
+		csv_output = true;
+	else
+		csv_sep = DEFAULT_SEPARATOR;
+
+	/*
+	 * let the spreadsheet do the pretty-printing
+	 */
+	if (csv_output) {
+		/* User explicitely passed -B? */
+		if (big_num_opt == 1) {
+			fprintf(stderr, "-B option not supported with -x\n");
+			usage_with_options(stat_usage, options);
+		} else /* Nope, so disable big number formatting */
+			big_num = false;
+	} else if (big_num_opt == 0) /* User passed --no-big-num */
+		big_num = false;
+
 	if (!argc && target_pid == -1 && target_tid == -1)
 		usage_with_options(stat_usage, options);
 	if (run_count <= 0)
 		usage_with_options(stat_usage, options);
 
+	/* no_aggr is for system-wide only */
+	if (no_aggr && !system_wide)
+		usage_with_options(stat_usage, options);
+
 	/* Set attrs and nr_counters if no event is selected and !null_run */
 	if (!null_run && !nr_counters) {
 		memcpy(attrs, default_attrs, sizeof(default_attrs));
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 035b9fa..e0c3f47 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -119,10 +119,16 @@
 	 * end addresses too.
 	 */
 	for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
-		struct symbol *pair;
+		struct symbol *pair, *first_pair;
+		bool backwards = true;
 
 		sym  = rb_entry(nd, struct symbol, rb_node);
-		pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+
+		if (sym->start == sym->end)
+			continue;
+
+		first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+		pair = first_pair;
 
 		if (pair && pair->start == sym->start) {
 next_pair:
@@ -143,8 +149,10 @@
 				pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n",
 					 sym->start, sym->name, sym->end, pair->end);
 			} else {
-				struct rb_node *nnd = rb_prev(&pair->rb_node);
-
+				struct rb_node *nnd;
+detour:
+				nnd = backwards ? rb_prev(&pair->rb_node) :
+						  rb_next(&pair->rb_node);
 				if (nnd) {
 					struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
 
@@ -153,6 +161,13 @@
 						goto next_pair;
 					}
 				}
+
+				if (backwards) {
+					backwards = false;
+					pair = first_pair;
+					goto detour;
+				}
+
 				pr_debug("%#Lx: diff name v: %s k: %s\n",
 					 sym->start, sym->name, pair->name);
 			}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 9bcc38f..d75084b 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -272,19 +272,22 @@
 static u64 cpus_pstate_start_times[MAX_CPUS];
 static u64 cpus_pstate_state[MAX_CPUS];
 
-static int process_comm_event(event_t *event, struct perf_session *session __used)
+static int process_comm_event(event_t *event, struct sample_data *sample __used,
+			      struct perf_session *session __used)
 {
 	pid_set_comm(event->comm.tid, event->comm.comm);
 	return 0;
 }
 
-static int process_fork_event(event_t *event, struct perf_session *session __used)
+static int process_fork_event(event_t *event, struct sample_data *sample __used,
+			      struct perf_session *session __used)
 {
 	pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
 	return 0;
 }
 
-static int process_exit_event(event_t *event, struct perf_session *session __used)
+static int process_exit_event(event_t *event, struct sample_data *sample __used,
+			      struct perf_session *session __used)
 {
 	pid_exit(event->fork.pid, event->fork.time);
 	return 0;
@@ -470,24 +473,21 @@
 }
 
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event __used,
+				struct sample_data *sample,
+				struct perf_session *session)
 {
-	struct sample_data data;
 	struct trace_entry *te;
 
-	memset(&data, 0, sizeof(data));
-
-	event__parse_sample(event, session->sample_type, &data);
-
 	if (session->sample_type & PERF_SAMPLE_TIME) {
-		if (!first_time || first_time > data.time)
-			first_time = data.time;
-		if (last_time < data.time)
-			last_time = data.time;
+		if (!first_time || first_time > sample->time)
+			first_time = sample->time;
+		if (last_time < sample->time)
+			last_time = sample->time;
 	}
 
-	te = (void *)data.raw_data;
-	if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) {
+	te = (void *)sample->raw_data;
+	if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) {
 		char *event_str;
 		struct power_entry *pe;
 
@@ -499,19 +499,19 @@
 			return 0;
 
 		if (strcmp(event_str, "power:power_start") == 0)
-			c_state_start(pe->cpu_id, data.time, pe->value);
+			c_state_start(pe->cpu_id, sample->time, pe->value);
 
 		if (strcmp(event_str, "power:power_end") == 0)
-			c_state_end(pe->cpu_id, data.time);
+			c_state_end(pe->cpu_id, sample->time);
 
 		if (strcmp(event_str, "power:power_frequency") == 0)
-			p_state_change(pe->cpu_id, data.time, pe->value);
+			p_state_change(pe->cpu_id, sample->time, pe->value);
 
 		if (strcmp(event_str, "sched:sched_wakeup") == 0)
-			sched_wakeup(data.cpu, data.time, data.pid, te);
+			sched_wakeup(sample->cpu, sample->time, sample->pid, te);
 
 		if (strcmp(event_str, "sched:sched_switch") == 0)
-			sched_switch(data.cpu, data.time, te);
+			sched_switch(sample->cpu, sample->time, te);
 	}
 	return 0;
 }
@@ -937,7 +937,8 @@
 
 static int __cmd_timechart(void)
 {
-	struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+							 0, false, &event_ops);
 	int ret = -EINVAL;
 
 	if (session == NULL)
@@ -989,6 +990,9 @@
 	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[i] = strdup(record_args[i]);
 
@@ -1018,6 +1022,8 @@
 	OPT_CALLBACK('p', "process", NULL, "process",
 		      "process selector. Pass a pid or process name.",
 		       parse_process),
+	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+		    "Look for files with symbols relative to this directory"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index dd62580..ae15f04 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -977,12 +977,12 @@
 }
 
 static void event__process_sample(const event_t *self,
-				 struct perf_session *session, int counter)
+				  struct sample_data *sample,
+				  struct perf_session *session, int counter)
 {
 	u64 ip = self->ip.ip;
 	struct sym_entry *syme;
 	struct addr_location al;
-	struct sample_data data;
 	struct machine *machine;
 	u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
@@ -1025,7 +1025,7 @@
 	if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
 		exact_samples++;
 
-	if (event__preprocess_sample(self, session, &al, &data,
+	if (event__preprocess_sample(self, session, &al, sample,
 				     symbol_filter) < 0 ||
 	    al.filtered)
 		return;
@@ -1105,6 +1105,7 @@
 	unsigned int head = mmap_read_head(md);
 	unsigned int old = md->prev;
 	unsigned char *data = md->base + page_size;
+	struct sample_data sample;
 	int diff;
 
 	/*
@@ -1152,10 +1153,11 @@
 			event = &event_copy;
 		}
 
+		event__parse_sample(event, self, &sample);
 		if (event->header.type == PERF_RECORD_SAMPLE)
-			event__process_sample(event, self, md->counter);
+			event__process_sample(event, &sample, self, md->counter);
 		else
-			event__process(event, self);
+			event__process(event, &sample, self);
 		old += size;
 	}
 
@@ -1214,7 +1216,9 @@
 			int err = errno;
 
 			if (err == EPERM || err == EACCES)
-				die("No permission - are you root?\n");
+				die("Permission error - are you root?\n"
+					"\t Consider tweaking"
+					" /proc/sys/kernel/perf_event_paranoid.\n");
 			/*
 			 * If it's cycles then fall back to hrtimer
 			 * based cpu-clock-tick sw counter, which
@@ -1231,7 +1235,7 @@
 				goto try_again;
 			}
 			printf("\n");
-			error("perfcounter syscall returned with %d (%s)\n",
+			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
 					fd[i][counter][thread_index], strerror(err));
 			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 			exit(-1);
@@ -1268,7 +1272,7 @@
 	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
 	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
 	 */
-	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false);
+	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
 	if (session == NULL)
 		return -ENOMEM;
 
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 921245b..c7798c7 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -27,7 +27,7 @@
 extern int cmd_stat(int argc, const char **argv, const char *prefix);
 extern int cmd_timechart(int argc, const char **argv, const char *prefix);
 extern int cmd_top(int argc, const char **argv, const char *prefix);
-extern int cmd_trace(int argc, const char **argv, const char *prefix);
+extern int cmd_script(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
 extern int cmd_probe(int argc, const char **argv, const char *prefix);
 extern int cmd_kmem(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 949d77f..16b5088 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -16,7 +16,7 @@
 perf-stat			mainporcelain common
 perf-timechart			mainporcelain common
 perf-top			mainporcelain common
-perf-trace			mainporcelain common
+perf-script			mainporcelain common
 perf-probe			mainporcelain common
 perf-kmem			mainporcelain common
 perf-lock			mainporcelain common
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
index b253db6..b041ca6 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/feature-tests.mak
@@ -9,8 +9,8 @@
 ifndef NO_DWARF
 define SOURCE_DWARF
 #include <dwarf.h>
-#include <libdw.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
 #ifndef _ELFUTILS_PREREQ
 #error
 #endif
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index cdd6c03..595d0f4 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -323,7 +323,7 @@
 		{ "top",	cmd_top,	0 },
 		{ "annotate",	cmd_annotate,	0 },
 		{ "version",	cmd_version,	0 },
-		{ "trace",	cmd_trace,	0 },
+		{ "script",	cmd_script,	0 },
 		{ "sched",	cmd_sched,	0 },
 		{ "probe",	cmd_probe,	0 },
 		{ "kmem",	cmd_kmem,	0 },
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
index 01a64ad..790ceba 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
@@ -8,7 +8,7 @@
 
 #line 1 "Context.xs"
 /*
- * Context.xs.  XS interfaces for perf trace.
+ * Context.xs.  XS interfaces for perf script.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
index 549cf04..c1e2ed1 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
@@ -1,5 +1,5 @@
 /*
- * Context.xs.  XS interfaces for perf trace.
+ * Context.xs.  XS interfaces for perf script.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
@@ -23,7 +23,7 @@
 #include "perl.h"
 #include "XSUB.h"
 #include "../../../perf.h"
-#include "../../../util/trace-event.h"
+#include "../../../util/script-event.h"
 
 MODULE = Perf::Trace::Context		PACKAGE = Perf::Trace::Context
 PROTOTYPES: ENABLE
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README
index 9a97076..2f0c7f3 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/README
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/README
@@ -1,7 +1,7 @@
 Perf-Trace-Util version 0.01
 ============================
 
-This module contains utility functions for use with perf trace.
+This module contains utility functions for use with perf script.
 
 Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
 that the core perf support for Perl calls on and should always be
@@ -33,7 +33,7 @@
 
 INSTALLATION
 
-Building perf with perf trace Perl scripting should install this
+Building perf with perf script Perl scripting should install this
 module in the right place.
 
 You should make sure libperl and ExtUtils/Embed.pm are installed first
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
index 6c7f365..4e2f6039 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
@@ -34,7 +34,7 @@
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
index 9df376a..9158458 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
@@ -163,7 +163,7 @@
 __END__
 =head1 NAME
 
-Perf::Trace::Core - Perl extension for perf trace
+Perf::Trace::Core - Perl extension for perf script
 
 =head1 SYNOPSIS
 
@@ -171,7 +171,7 @@
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
index d94b40c8..0535001 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -65,7 +65,7 @@
 __END__
 =head1 NAME
 
-Perf::Trace::Util - Perl extension for perf trace
+Perf::Trace::Util - Perl extension for perf script
 
 =head1 SYNOPSIS
 
@@ -73,7 +73,7 @@
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report
index 4028d92..9f83cc1 100644
--- a/tools/perf/scripts/perl/bin/failed-syscalls-report
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-report
@@ -7,4 +7,4 @@
 	shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
index ba25f4d..77200b3 100644
--- a/tools/perf/scripts/perl/bin/rw-by-file-report
+++ b/tools/perf/scripts/perl/bin/rw-by-file-report
@@ -7,7 +7,4 @@
 fi
 comm=$1
 shift
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
index 641a3f5..a27b9f3 100644
--- a/tools/perf/scripts/perl/bin/rw-by-pid-report
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-report
@@ -1,6 +1,3 @@
 #!/bin/bash
 # description: system-wide r/w activity
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
diff --git a/tools/perf/scripts/perl/bin/rwtop-report b/tools/perf/scripts/perl/bin/rwtop-report
index 4918dba..83e11ec 100644
--- a/tools/perf/scripts/perl/bin/rwtop-report
+++ b/tools/perf/scripts/perl/bin/rwtop-report
@@ -17,7 +17,4 @@
     interval=$1
     shift
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
index 49052eb..889e813 100644
--- a/tools/perf/scripts/perl/bin/wakeup-latency-report
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-report
@@ -1,6 +1,3 @@
 #!/bin/bash
 # description: system-wide min/max/avg wakeup latency
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
index df0c65f..6d91411 100644
--- a/tools/perf/scripts/perl/bin/workqueue-stats-report
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-report
@@ -1,7 +1,3 @@
 #!/bin/bash
 # description: workqueue stats (ins/exe/create/destroy)
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
-
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
index 4e7dc0a..4e7076c 100644
--- a/tools/perf/scripts/perl/check-perf-trace.pl
+++ b/tools/perf/scripts/perl/check-perf-trace.pl
@@ -1,4 +1,4 @@
-# perf trace event handlers, generated by perf trace -g perl
+# perf script event handlers, generated by perf script -g perl
 # (c) 2009, Tom Zanussi <tzanussi@gmail.com>
 # Licensed under the terms of the GNU GPL License version 2
 
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
index 2a39097..74844ee 100644
--- a/tools/perf/scripts/perl/rw-by-file.pl
+++ b/tools/perf/scripts/perl/rw-by-file.pl
@@ -18,7 +18,7 @@
 use Perf::Trace::Core;
 use Perf::Trace::Util;
 
-my $usage = "perf trace -s rw-by-file.pl <comm>\n";
+my $usage = "perf script -s rw-by-file.pl <comm>\n";
 
 my $for_comm = shift or die $usage;
 
diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl
index b84b126..a8eaff5 100644
--- a/tools/perf/scripts/perl/workqueue-stats.pl
+++ b/tools/perf/scripts/perl/workqueue-stats.pl
@@ -10,7 +10,7 @@
 #     workqueue:workqueue_destruction -e workqueue:workqueue_execution
 #     -e workqueue:workqueue_insertion
 #
-#   perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl
+#   perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl
 
 use 5.010000;
 use strict;
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 957085d..315067b 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -1,5 +1,5 @@
 /*
- * Context.c.  Python interfaces for perf trace.
+ * Context.c.  Python interfaces for perf script.
  *
  * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
  *
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
index aad7525..de7211e 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -1,4 +1,4 @@
-# Core.py - Python extension for perf trace, core functions
+# Core.py - Python extension for perf script, core functions
 #
 # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
 #
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
index ae9a56e..fdd92f6 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
@@ -1,4 +1,4 @@
-# SchedGui.py - Python extension for perf trace, basic GUI code for
+# SchedGui.py - Python extension for perf script, basic GUI code for
 #		traces drawing and overview.
 #
 # Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com>
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index 13cc02b..15c8400 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -1,4 +1,4 @@
-# Util.py - Python extension for perf trace, miscellaneous utility code
+# Util.py - Python extension for perf script, miscellaneous utility code
 #
 # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
 #
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
index 0358702..fda5096 100644
--- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
@@ -7,4 +7,4 @@
 	shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/futex-contention-report b/tools/perf/scripts/python/bin/futex-contention-report
index c826813..6c44271 100644
--- a/tools/perf/scripts/python/bin/futex-contention-report
+++ b/tools/perf/scripts/python/bin/futex-contention-report
@@ -1,4 +1,4 @@
 #!/bin/bash
 # description: futext contention measurement
 
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report
index 4ad361b..8f75929 100644
--- a/tools/perf/scripts/python/bin/netdev-times-report
+++ b/tools/perf/scripts/python/bin/netdev-times-report
@@ -2,4 +2,4 @@
 # description: display a process of packet and processing time
 # args: [tx] [rx] [dev=] [debug]
 
-perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
+perf script -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
diff --git a/tools/perf/scripts/python/bin/sched-migration-report b/tools/perf/scripts/python/bin/sched-migration-report
index df1791f..68b037a 100644
--- a/tools/perf/scripts/python/bin/sched-migration-report
+++ b/tools/perf/scripts/python/bin/sched-migration-report
@@ -1,3 +1,3 @@
 #!/bin/bash
 # description: sched migration overview
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
diff --git a/tools/perf/scripts/python/bin/sctop-report b/tools/perf/scripts/python/bin/sctop-report
index 36b409c..c32db29 100644
--- a/tools/perf/scripts/python/bin/sctop-report
+++ b/tools/perf/scripts/python/bin/sctop-report
@@ -21,4 +21,4 @@
     interval=$1
     shift
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
index 4eb88c9..16eb8d6 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
@@ -7,4 +7,4 @@
 	shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report
index cb2f9c5..0f0e9d4 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-report
@@ -7,4 +7,4 @@
 	shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
diff --git a/tools/perf/scripts/python/check-perf-trace.py b/tools/perf/scripts/python/check-perf-trace.py
index d9f7893..4647a76 100644
--- a/tools/perf/scripts/python/check-perf-trace.py
+++ b/tools/perf/scripts/python/check-perf-trace.py
@@ -1,4 +1,4 @@
-# perf trace event handlers, generated by perf trace -g python
+# perf script event handlers, generated by perf script -g python
 # (c) 2010, Tom Zanussi <tzanussi@gmail.com>
 # Licensed under the terms of the GNU GPL License version 2
 #
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py
index acd7848..85805fa 100644
--- a/tools/perf/scripts/python/failed-syscalls-by-pid.py
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -15,7 +15,7 @@
 from Core import *
 from Util import *
 
-usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";
+usage = "perf script -s syscall-counts-by-pid.py [comm|pid]\n";
 
 for_comm = None
 for_pid = None
diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py
index b934383..74d55ec 100644
--- a/tools/perf/scripts/python/sched-migration.py
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -4,7 +4,7 @@
 #
 # Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
 #
-# perf trace event handlers have been generated by perf trace -g python
+# perf script event handlers have been generated by perf script -g python
 #
 # This software is distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py
index 7a6ec2c..42c267e 100644
--- a/tools/perf/scripts/python/sctop.py
+++ b/tools/perf/scripts/python/sctop.py
@@ -17,7 +17,7 @@
 from Core import *
 from Util import *
 
-usage = "perf trace -s sctop.py [comm] [interval]\n";
+usage = "perf script -s sctop.py [comm] [interval]\n";
 
 for_comm = None
 default_interval = 3
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py
index d1ee3ec..c64d1c5 100644
--- a/tools/perf/scripts/python/syscall-counts-by-pid.py
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -14,7 +14,7 @@
 from Core import *
 from Util import syscall_name
 
-usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
+usage = "perf script -s syscall-counts-by-pid.py [comm]\n";
 
 for_comm = None
 for_pid = None
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py
index ea183dc..b435d3f 100644
--- a/tools/perf/scripts/python/syscall-counts.py
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -15,7 +15,7 @@
 from Core import *
 from Util import syscall_name
 
-usage = "perf trace -s syscall-counts.py [comm]\n";
+usage = "perf script -s syscall-counts.py [comm]\n";
 
 for_comm = None
 
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e437edb..deffb8c 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -14,7 +14,9 @@
 #include <linux/kernel.h>
 #include "debug.h"
 
-static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
+static int build_id__mark_dso_hit(event_t *event,
+				  struct sample_data *sample __used,
+				  struct perf_session *session)
 {
 	struct addr_location al;
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -35,7 +37,8 @@
 	return 0;
 }
 
-static int event__exit_del_thread(event_t *self, struct perf_session *session)
+static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
+				  struct perf_session *session)
 {
 	struct thread *thread = perf_session__findnew(session, self->fork.tid);
 
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index c8d81b0..01bbe8e 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -46,20 +46,16 @@
 	return ret;
 }
 
-static int dump_printf_color(const char *fmt, const char *color, ...)
+#ifdef NO_NEWT_SUPPORT
+void ui__warning(const char *format, ...)
 {
 	va_list args;
-	int ret = 0;
 
-	if (dump_trace) {
-		va_start(args, color);
-		ret = color_vfprintf(stdout, color, fmt, args);
-		va_end(args);
-	}
-
-	return ret;
+	va_start(args, format);
+	vfprintf(stderr, format, args);
+	va_end(args);
 }
-
+#endif
 
 void trace_event(event_t *event)
 {
@@ -70,29 +66,29 @@
 	if (!dump_trace)
 		return;
 
-	dump_printf(".");
-	dump_printf_color("\n. ... raw event: size %d bytes\n", color,
-			  event->header.size);
+	printf(".");
+	color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+		      event->header.size);
 
 	for (i = 0; i < event->header.size; i++) {
 		if ((i & 15) == 0) {
-			dump_printf(".");
-			dump_printf_color("  %04x: ", color, i);
+			printf(".");
+			color_fprintf(stdout, color, "  %04x: ", i);
 		}
 
-		dump_printf_color(" %02x", color, raw_event[i]);
+		color_fprintf(stdout, color, " %02x", raw_event[i]);
 
 		if (((i & 15) == 15) || i == event->header.size-1) {
-			dump_printf_color("  ", color);
+			color_fprintf(stdout, color, "  ");
 			for (j = 0; j < 15-(i & 15); j++)
-				dump_printf_color("   ", color);
+				color_fprintf(stdout, color, "   ");
 			for (j = i & ~15; j <= i; j++) {
-				dump_printf_color("%c", color,
-						isprint(raw_event[j]) ?
-						raw_event[j] : '.');
+				color_fprintf(stdout, color, "%c",
+					      isprint(raw_event[j]) ?
+					      raw_event[j] : '.');
 			}
-			dump_printf_color("\n", color);
+			color_fprintf(stdout, color, "\n");
 		}
 	}
-	dump_printf(".\n");
+	printf(".\n");
 }
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 7b51408..ca35fd6 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -35,4 +35,6 @@
 #include "ui/progress.h"
 #endif
 
+void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
+
 #endif	/* __PERF_DEBUG_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index dab9e75..2302ec0 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -7,7 +7,7 @@
 #include "strlist.h"
 #include "thread.h"
 
-const char *event__name[] = {
+static const char *event__name[] = {
 	[0]			 = "TOTAL",
 	[PERF_RECORD_MMAP]	 = "MMAP",
 	[PERF_RECORD_LOST]	 = "LOST",
@@ -22,13 +22,31 @@
 	[PERF_RECORD_HEADER_EVENT_TYPE]	 = "EVENT_TYPE",
 	[PERF_RECORD_HEADER_TRACING_DATA]	 = "TRACING_DATA",
 	[PERF_RECORD_HEADER_BUILD_ID]	 = "BUILD_ID",
+	[PERF_RECORD_FINISHED_ROUND]	 = "FINISHED_ROUND",
 };
 
-static pid_t event__synthesize_comm(pid_t pid, int full,
+const char *event__get_event_name(unsigned int id)
+{
+	if (id >= ARRAY_SIZE(event__name))
+		return "INVALID";
+	if (!event__name[id])
+		return "UNKNOWN";
+	return event__name[id];
+}
+
+static struct sample_data synth_sample = {
+	.pid	   = -1,
+	.tid	   = -1,
+	.time	   = -1,
+	.stream_id = -1,
+	.cpu	   = -1,
+	.period	   = 1,
+};
+
+static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
 				    event__handler_t process,
 				    struct perf_session *session)
 {
-	event_t ev;
 	char filename[PATH_MAX];
 	char bf[BUFSIZ];
 	FILE *fp;
@@ -49,34 +67,39 @@
 		return 0;
 	}
 
-	memset(&ev.comm, 0, sizeof(ev.comm));
-	while (!ev.comm.comm[0] || !ev.comm.pid) {
-		if (fgets(bf, sizeof(bf), fp) == NULL)
-			goto out_failure;
+	memset(&event->comm, 0, sizeof(event->comm));
+
+	while (!event->comm.comm[0] || !event->comm.pid) {
+		if (fgets(bf, sizeof(bf), fp) == NULL) {
+			pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
+			goto out;
+		}
 
 		if (memcmp(bf, "Name:", 5) == 0) {
 			char *name = bf + 5;
 			while (*name && isspace(*name))
 				++name;
 			size = strlen(name) - 1;
-			memcpy(ev.comm.comm, name, size++);
+			memcpy(event->comm.comm, name, size++);
 		} else if (memcmp(bf, "Tgid:", 5) == 0) {
 			char *tgids = bf + 5;
 			while (*tgids && isspace(*tgids))
 				++tgids;
-			tgid = ev.comm.pid = atoi(tgids);
+			tgid = event->comm.pid = atoi(tgids);
 		}
 	}
 
-	ev.comm.header.type = PERF_RECORD_COMM;
+	event->comm.header.type = PERF_RECORD_COMM;
 	size = ALIGN(size, sizeof(u64));
-	ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size);
-
+	memset(event->comm.comm + size, 0, session->id_hdr_size);
+	event->comm.header.size = (sizeof(event->comm) -
+				(sizeof(event->comm.comm) - size) +
+				session->id_hdr_size);
 	if (!full) {
-		ev.comm.tid = pid;
+		event->comm.tid = pid;
 
-		process(&ev, session);
-		goto out_fclose;
+		process(event, &synth_sample, session);
+		goto out;
 	}
 
 	snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -91,22 +114,19 @@
 		if (*end)
 			continue;
 
-		ev.comm.tid = pid;
+		event->comm.tid = pid;
 
-		process(&ev, session);
+		process(event, &synth_sample, session);
 	}
+
 	closedir(tasks);
-
-out_fclose:
+out:
 	fclose(fp);
-	return tgid;
 
-out_failure:
-	pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
-	return -1;
+	return tgid;
 }
 
-static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
+static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
 					 event__handler_t process,
 					 struct perf_session *session)
 {
@@ -124,29 +144,25 @@
 		return -1;
 	}
 
+	event->header.type = PERF_RECORD_MMAP;
+	/*
+	 * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+	 */
+	event->header.misc = PERF_RECORD_MISC_USER;
+
 	while (1) {
 		char bf[BUFSIZ], *pbf = bf;
-		event_t ev = {
-			.header = {
-				.type = PERF_RECORD_MMAP,
-				/*
-				 * Just like the kernel, see __perf_event_mmap
-				 * in kernel/perf_event.c
-				 */
-				.misc = PERF_RECORD_MISC_USER,
-			 },
-		};
 		int n;
 		size_t size;
 		if (fgets(bf, sizeof(bf), fp) == NULL)
 			break;
 
 		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = hex2u64(pbf, &ev.mmap.start);
+		n = hex2u64(pbf, &event->mmap.start);
 		if (n < 0)
 			continue;
 		pbf += n + 1;
-		n = hex2u64(pbf, &ev.mmap.len);
+		n = hex2u64(pbf, &event->mmap.len);
 		if (n < 0)
 			continue;
 		pbf += n + 3;
@@ -161,19 +177,21 @@
 				continue;
 
 			pbf += 3;
-			n = hex2u64(pbf, &ev.mmap.pgoff);
+			n = hex2u64(pbf, &event->mmap.pgoff);
 
 			size = strlen(execname);
 			execname[size - 1] = '\0'; /* Remove \n */
-			memcpy(ev.mmap.filename, execname, size);
+			memcpy(event->mmap.filename, execname, size);
 			size = ALIGN(size, sizeof(u64));
-			ev.mmap.len -= ev.mmap.start;
-			ev.mmap.header.size = (sizeof(ev.mmap) -
-					       (sizeof(ev.mmap.filename) - size));
-			ev.mmap.pid = tgid;
-			ev.mmap.tid = pid;
+			event->mmap.len -= event->mmap.start;
+			event->mmap.header.size = (sizeof(event->mmap) -
+					        (sizeof(event->mmap.filename) - size));
+			memset(event->mmap.filename + size, 0, session->id_hdr_size);
+			event->mmap.header.size += session->id_hdr_size;
+			event->mmap.pid = tgid;
+			event->mmap.tid = pid;
 
-			process(&ev, session);
+			process(event, &synth_sample, session);
 		}
 	}
 
@@ -187,20 +205,27 @@
 {
 	struct rb_node *nd;
 	struct map_groups *kmaps = &machine->kmaps;
-	u16 misc;
+	event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+	if (event == NULL) {
+		pr_debug("Not enough memory synthesizing mmap event "
+			 "for kernel modules\n");
+		return -1;
+	}
+
+	event->header.type = PERF_RECORD_MMAP;
 
 	/*
 	 * kernel uses 0 for user space maps, see kernel/perf_event.c
 	 * __perf_event_mmap
 	 */
 	if (machine__is_host(machine))
-		misc = PERF_RECORD_MISC_KERNEL;
+		event->header.misc = PERF_RECORD_MISC_KERNEL;
 	else
-		misc = PERF_RECORD_MISC_GUEST_KERNEL;
+		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
 
 	for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
 	     nd; nd = rb_next(nd)) {
-		event_t ev;
 		size_t size;
 		struct map *pos = rb_entry(nd, struct map, rb_node);
 
@@ -208,39 +233,78 @@
 			continue;
 
 		size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
-		memset(&ev, 0, sizeof(ev));
-		ev.mmap.header.misc = misc;
-		ev.mmap.header.type = PERF_RECORD_MMAP;
-		ev.mmap.header.size = (sizeof(ev.mmap) -
-				        (sizeof(ev.mmap.filename) - size));
-		ev.mmap.start = pos->start;
-		ev.mmap.len   = pos->end - pos->start;
-		ev.mmap.pid   = machine->pid;
+		event->mmap.header.type = PERF_RECORD_MMAP;
+		event->mmap.header.size = (sizeof(event->mmap) -
+				        (sizeof(event->mmap.filename) - size));
+		memset(event->mmap.filename + size, 0, session->id_hdr_size);
+		event->mmap.header.size += session->id_hdr_size;
+		event->mmap.start = pos->start;
+		event->mmap.len   = pos->end - pos->start;
+		event->mmap.pid   = machine->pid;
 
-		memcpy(ev.mmap.filename, pos->dso->long_name,
+		memcpy(event->mmap.filename, pos->dso->long_name,
 		       pos->dso->long_name_len + 1);
-		process(&ev, session);
+		process(event, &synth_sample, session);
 	}
 
+	free(event);
 	return 0;
 }
 
+static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
+				      pid_t pid, event__handler_t process,
+				      struct perf_session *session)
+{
+	pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
+					    session);
+	if (tgid == -1)
+		return -1;
+	return event__synthesize_mmap_events(mmap_event, pid, tgid,
+					     process, session);
+}
+
 int event__synthesize_thread(pid_t pid, event__handler_t process,
 			     struct perf_session *session)
 {
-	pid_t tgid = event__synthesize_comm(pid, 1, process, session);
-	if (tgid == -1)
-		return -1;
-	return event__synthesize_mmap_events(pid, tgid, process, session);
+	event_t *comm_event, *mmap_event;
+	int err = -1;
+
+	comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+	if (comm_event == NULL)
+		goto out;
+
+	mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+	if (mmap_event == NULL)
+		goto out_free_comm;
+
+	err = __event__synthesize_thread(comm_event, mmap_event, pid,
+					 process, session);
+	free(mmap_event);
+out_free_comm:
+	free(comm_event);
+out:
+	return err;
 }
 
-void event__synthesize_threads(event__handler_t process,
-			       struct perf_session *session)
+int event__synthesize_threads(event__handler_t process,
+			      struct perf_session *session)
 {
 	DIR *proc;
 	struct dirent dirent, *next;
+	event_t *comm_event, *mmap_event;
+	int err = -1;
+
+	comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+	if (comm_event == NULL)
+		goto out;
+
+	mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+	if (mmap_event == NULL)
+		goto out_free_comm;
 
 	proc = opendir("/proc");
+	if (proc == NULL)
+		goto out_free_mmap;
 
 	while (!readdir_r(proc, &dirent, &next) && next) {
 		char *end;
@@ -249,10 +313,18 @@
 		if (*end) /* only interested in proper numerical dirents */
 			continue;
 
-		event__synthesize_thread(pid, process, session);
+		__event__synthesize_thread(comm_event, mmap_event, pid,
+					   process, session);
 	}
 
 	closedir(proc);
+	err = 0;
+out_free_mmap:
+	free(mmap_event);
+out_free_comm:
+	free(comm_event);
+out:
+	return err;
 }
 
 struct process_symbol_args {
@@ -260,7 +332,8 @@
 	u64	   start;
 };
 
-static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
+static int find_symbol_cb(void *arg, const char *name, char type,
+			  u64 start, u64 end __used)
 {
 	struct process_symbol_args *args = arg;
 
@@ -286,18 +359,20 @@
 	char path[PATH_MAX];
 	char name_buff[PATH_MAX];
 	struct map *map;
-
-	event_t ev = {
-		.header = {
-			.type = PERF_RECORD_MMAP,
-		},
-	};
+	int err;
 	/*
 	 * We should get this from /sys/kernel/sections/.text, but till that is
 	 * available use this, and after it is use this as a fallback for older
 	 * kernels.
 	 */
 	struct process_symbol_args args = { .name = symbol_name, };
+	event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+	if (event == NULL) {
+		pr_debug("Not enough memory synthesizing mmap event "
+			 "for kernel modules\n");
+		return -1;
+	}
 
 	mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
 	if (machine__is_host(machine)) {
@@ -305,10 +380,10 @@
 		 * kernel uses PERF_RECORD_MISC_USER for user space maps,
 		 * see kernel/perf_event.c __perf_event_mmap
 		 */
-		ev.header.misc = PERF_RECORD_MISC_KERNEL;
+		event->header.misc = PERF_RECORD_MISC_KERNEL;
 		filename = "/proc/kallsyms";
 	} else {
-		ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
 		if (machine__is_default_guest(machine))
 			filename = (char *) symbol_conf.default_guest_kallsyms;
 		else {
@@ -321,17 +396,21 @@
 		return -ENOENT;
 
 	map = machine->vmlinux_maps[MAP__FUNCTION];
-	size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
+	size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
 			"%s%s", mmap_name, symbol_name) + 1;
 	size = ALIGN(size, sizeof(u64));
-	ev.mmap.header.size = (sizeof(ev.mmap) -
-			(sizeof(ev.mmap.filename) - size));
-	ev.mmap.pgoff = args.start;
-	ev.mmap.start = map->start;
-	ev.mmap.len   = map->end - ev.mmap.start;
-	ev.mmap.pid   = machine->pid;
+	event->mmap.header.type = PERF_RECORD_MMAP;
+	event->mmap.header.size = (sizeof(event->mmap) -
+			(sizeof(event->mmap.filename) - size) + session->id_hdr_size);
+	event->mmap.pgoff = args.start;
+	event->mmap.start = map->start;
+	event->mmap.len   = map->end - event->mmap.start;
+	event->mmap.pid   = machine->pid;
 
-	return process(&ev, session);
+	err = process(event, &synth_sample, session);
+	free(event);
+
+	return err;
 }
 
 static void thread__comm_adjust(struct thread *self, struct hists *hists)
@@ -361,7 +440,8 @@
 	return 0;
 }
 
-int event__process_comm(event_t *self, struct perf_session *session)
+int event__process_comm(event_t *self, struct sample_data *sample __used,
+			struct perf_session *session)
 {
 	struct thread *thread = perf_session__findnew(session, self->comm.tid);
 
@@ -376,7 +456,8 @@
 	return 0;
 }
 
-int event__process_lost(event_t *self, struct perf_session *session)
+int event__process_lost(event_t *self, struct sample_data *sample __used,
+			struct perf_session *session)
 {
 	dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
 	session->hists.stats.total_lost += self->lost.lost;
@@ -392,7 +473,7 @@
 	 * a zero sized synthesized MMAP event for the kernel.
 	 */
 	if (maps[MAP__FUNCTION]->end == 0)
-		maps[MAP__FUNCTION]->end = ~0UL;
+		maps[MAP__FUNCTION]->end = ~0ULL;
 }
 
 static int event__process_kernel_mmap(event_t *self,
@@ -485,7 +566,8 @@
 	return -1;
 }
 
-int event__process_mmap(event_t *self, struct perf_session *session)
+int event__process_mmap(event_t *self, struct sample_data *sample __used,
+			struct perf_session *session)
 {
 	struct machine *machine;
 	struct thread *thread;
@@ -526,7 +608,8 @@
 	return 0;
 }
 
-int event__process_task(event_t *self, struct perf_session *session)
+int event__process_task(event_t *self, struct sample_data *sample __used,
+			struct perf_session *session)
 {
 	struct thread *thread = perf_session__findnew(session, self->fork.tid);
 	struct thread *parent = perf_session__findnew(session, self->fork.ptid);
@@ -548,18 +631,19 @@
 	return 0;
 }
 
-int event__process(event_t *event, struct perf_session *session)
+int event__process(event_t *event, struct sample_data *sample,
+		   struct perf_session *session)
 {
 	switch (event->header.type) {
 	case PERF_RECORD_COMM:
-		event__process_comm(event, session);
+		event__process_comm(event, sample, session);
 		break;
 	case PERF_RECORD_MMAP:
-		event__process_mmap(event, session);
+		event__process_mmap(event, sample, session);
 		break;
 	case PERF_RECORD_FORK:
 	case PERF_RECORD_EXIT:
-		event__process_task(event, session);
+		event__process_task(event, sample, session);
 		break;
 	default:
 		break;
@@ -674,32 +758,8 @@
 			     symbol_filter_t filter)
 {
 	u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-	struct thread *thread;
+	struct thread *thread = perf_session__findnew(session, self->ip.pid);
 
-	event__parse_sample(self, session->sample_type, data);
-
-	dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
-		    self->header.misc, data->pid, data->tid, data->ip,
-		    data->period, data->cpu);
-
-	if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
-		unsigned int i;
-
-		dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
-
-		if (!ip_callchain__valid(data->callchain, self)) {
-			pr_debug("call-chain problem with event, "
-				 "skipping it.\n");
-			goto out_filtered;
-		}
-
-		if (dump_trace) {
-			for (i = 0; i < data->callchain->nr; i++)
-				dump_printf("..... %2d: %016Lx\n",
-					    i, data->callchain->ips[i]);
-		}
-	}
-	thread = perf_session__findnew(session, self->ip.pid);
 	if (thread == NULL)
 		return -1;
 
@@ -766,9 +826,65 @@
 	return 0;
 }
 
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data)
+static int event__parse_id_sample(const event_t *event,
+				  struct perf_session *session,
+				  struct sample_data *sample)
 {
-	const u64 *array = event->sample.array;
+	const u64 *array;
+	u64 type;
+
+	sample->cpu = sample->pid = sample->tid = -1;
+	sample->stream_id = sample->id = sample->time = -1ULL;
+
+	if (!session->sample_id_all)
+		return 0;
+
+	array = event->sample.array;
+	array += ((event->header.size -
+		   sizeof(event->header)) / sizeof(u64)) - 1;
+	type = session->sample_type;
+
+	if (type & PERF_SAMPLE_CPU) {
+		u32 *p = (u32 *)array;
+		sample->cpu = *p;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		sample->stream_id = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_ID) {
+		sample->id = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_TIME) {
+		sample->time = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		u32 *p = (u32 *)array;
+		sample->pid = p[0];
+		sample->tid = p[1];
+	}
+
+	return 0;
+}
+
+int event__parse_sample(const event_t *event, struct perf_session *session,
+			struct sample_data *data)
+{
+	const u64 *array;
+	u64 type;
+
+	if (event->header.type != PERF_RECORD_SAMPLE)
+		return event__parse_id_sample(event, session, data);
+
+	array = event->sample.array;
+	type = session->sample_type;
 
 	if (type & PERF_SAMPLE_IP) {
 		data->ip = event->ip.ip;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8e790da..2b7e919 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -85,6 +85,7 @@
 };
 
 enum perf_user_event_type { /* above any possible kernel type */
+	PERF_RECORD_USER_TYPE_START		= 64,
 	PERF_RECORD_HEADER_ATTR			= 64,
 	PERF_RECORD_HEADER_EVENT_TYPE		= 65,
 	PERF_RECORD_HEADER_TRACING_DATA		= 66,
@@ -135,12 +136,15 @@
 
 struct perf_session;
 
-typedef int (*event__handler_t)(event_t *event, struct perf_session *session);
+typedef int (*event__handler_synth_t)(event_t *event, 
+				      struct perf_session *session);
+typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
+				struct perf_session *session);
 
 int event__synthesize_thread(pid_t pid, event__handler_t process,
 			     struct perf_session *session);
-void event__synthesize_threads(event__handler_t process,
-			       struct perf_session *session);
+int event__synthesize_threads(event__handler_t process,
+			      struct perf_session *session);
 int event__synthesize_kernel_mmap(event__handler_t process,
 				struct perf_session *session,
 				struct machine *machine,
@@ -150,18 +154,24 @@
 			      struct perf_session *session,
 			      struct machine *machine);
 
-int event__process_comm(event_t *self, struct perf_session *session);
-int event__process_lost(event_t *self, struct perf_session *session);
-int event__process_mmap(event_t *self, struct perf_session *session);
-int event__process_task(event_t *self, struct perf_session *session);
-int event__process(event_t *event, struct perf_session *session);
+int event__process_comm(event_t *self, struct sample_data *sample,
+			struct perf_session *session);
+int event__process_lost(event_t *self, struct sample_data *sample,
+			struct perf_session *session);
+int event__process_mmap(event_t *self, struct sample_data *sample,
+			struct perf_session *session);
+int event__process_task(event_t *self, struct sample_data *sample,
+			struct perf_session *session);
+int event__process(event_t *event, struct sample_data *sample,
+		   struct perf_session *session);
 
 struct addr_location;
 int event__preprocess_sample(const event_t *self, struct perf_session *session,
 			     struct addr_location *al, struct sample_data *data,
 			     symbol_filter_t filter);
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data);
+int event__parse_sample(const event_t *event, struct perf_session *session,
+			struct sample_data *sample);
 
-extern const char *event__name[];
+const char *event__get_event_name(unsigned int id);
 
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 64a85ba..16a1602 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -152,6 +152,11 @@
 	set_bit(feat, self->adds_features);
 }
 
+void perf_header__clear_feat(struct perf_header *self, int feat)
+{
+	clear_bit(feat, self->adds_features);
+}
+
 bool perf_header__has_feat(const struct perf_header *self, int feat)
 {
 	return test_bit(feat, self->adds_features);
@@ -431,8 +436,10 @@
 	int idx = 0, err;
 
 	session = container_of(self, struct perf_session, header);
-	if (perf_session__read_build_ids(session, true))
-		perf_header__set_feat(self, HEADER_BUILD_ID);
+
+	if (perf_header__has_feat(self, HEADER_BUILD_ID &&
+	    !perf_session__read_build_ids(session, true)))
+		perf_header__clear_feat(self, HEADER_BUILD_ID);
 
 	nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
 	if (!nr_sections)
@@ -939,6 +946,24 @@
 	return type;
 }
 
+bool perf_header__sample_id_all(const struct perf_header *header)
+{
+	bool value = false, first = true;
+	int i;
+
+	for (i = 0; i < header->attrs; i++) {
+		struct perf_header_attr *attr = header->attr[i];
+
+		if (first) {
+			value = attr->attr.sample_id_all;
+			first = false;
+		} else if (value != attr->attr.sample_id_all)
+			die("non matching sample_id_all");
+	}
+
+	return value;
+}
+
 struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header)
 {
@@ -985,21 +1010,23 @@
 
 	ev = malloc(size);
 
+	if (ev == NULL)
+		return -ENOMEM;
+
 	ev->attr.attr = *attr;
 	memcpy(ev->attr.id, id, ids * sizeof(u64));
 
 	ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
 	ev->attr.header.size = size;
 
-	err = process(ev, session);
+	err = process(ev, NULL, session);
 
 	free(ev);
 
 	return err;
 }
 
-int event__synthesize_attrs(struct perf_header *self,
-			    event__handler_t process,
+int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
 			    struct perf_session *session)
 {
 	struct perf_header_attr	*attr;
@@ -1069,7 +1096,7 @@
 	ev.event_type.header.size = sizeof(ev.event_type) -
 		(sizeof(ev.event_type.event_type.name) - size);
 
-	err = process(&ev, session);
+	err = process(&ev, NULL, session);
 
 	return err;
 }
@@ -1124,7 +1151,7 @@
 	ev.tracing_data.header.size = sizeof(ev.tracing_data);
 	ev.tracing_data.size = aligned_size;
 
-	process(&ev, session);
+	process(&ev, NULL, session);
 
 	err = read_tracing_data(fd, pattrs, nb_events);
 	write_padded(fd, NULL, 0, padding);
@@ -1184,7 +1211,7 @@
 	ev.build_id.header.size = sizeof(ev.build_id) + len;
 	memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
 
-	err = process(&ev, session);
+	err = process(&ev, NULL, session);
 
 	return err;
 }
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 402ac24..6335965 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -81,9 +81,11 @@
 int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
 
 u64 perf_header__sample_type(struct perf_header *header);
+bool perf_header__sample_id_all(const struct perf_header *header);
 struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header);
 void perf_header__set_feat(struct perf_header *self, int feat);
+void perf_header__clear_feat(struct perf_header *self, int feat);
 bool perf_header__has_feat(const struct perf_header *self, int feat);
 
 int perf_header__process_sections(struct perf_header *self, int fd,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 2022e87..d503670 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1092,6 +1092,12 @@
 	FILE *file;
 	int err = 0;
 	u64 len;
+	char symfs_filename[PATH_MAX];
+
+	if (filename) {
+		snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+			 symbol_conf.symfs, filename);
+	}
 
 	if (filename == NULL) {
 		if (dso->has_build_id) {
@@ -1100,9 +1106,9 @@
 			return -ENOMEM;
 		}
 		goto fallback;
-	} else if (readlink(filename, command, sizeof(command)) < 0 ||
+	} else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
 		   strstr(command, "[kernel.kallsyms]") ||
-		   access(filename, R_OK)) {
+		   access(symfs_filename, R_OK)) {
 		free(filename);
 fallback:
 		/*
@@ -1111,6 +1117,8 @@
 		 * DSO is the same as when 'perf record' ran.
 		 */
 		filename = dso->long_name;
+		snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+			 symbol_conf.symfs, filename);
 		free_filename = false;
 	}
 
@@ -1137,7 +1145,7 @@
 		 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
 		 map__rip_2objdump(map, sym->start),
 		 map__rip_2objdump(map, sym->end),
-		 filename, filename);
+		 symfs_filename, filename);
 
 	pr_debug("Executing: %s\n", command);
 
@@ -1168,10 +1176,13 @@
 	size_t ret = 0;
 
 	for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
-		if (!event__name[i])
+		const char *name = event__get_event_name(i);
+
+		if (!strcmp(name, "UNKNOWN"))
 			continue;
-		ret += fprintf(fp, "%10s events: %10d\n",
-			       event__name[i], self->stats.nr_events[i]);
+
+		ret += fprintf(fp, "%16s events: %10d\n", name,
+			       self->stats.nr_events[i]);
 	}
 
 	return ret;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 587d375..ee78985 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -52,8 +52,10 @@
 struct events_stats {
 	u64 total_period;
 	u64 total_lost;
+	u64 total_invalid_chains;
 	u32 nr_events[PERF_RECORD_HEADER_MAX];
 	u32 nr_unknown_events;
+	u32 nr_invalid_chains;
 };
 
 enum hist_column {
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644
index 0000000..acffd5e
--- /dev/null
+++ b/tools/perf/util/include/asm/cpufeature.h
@@ -0,0 +1,9 @@
+
+#ifndef PERF_CPUFEATURE_H
+#define PERF_CPUFEATURE_H
+
+/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define X86_FEATURE_REP_GOOD 0
+
+#endif	/* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644
index 0000000..bb4198e
--- /dev/null
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -0,0 +1,11 @@
+
+#ifndef PERF_DWARF2_H
+#define PERF_DWARF2_H
+
+/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define CFI_STARTPROC
+#define CFI_ENDPROC
+
+#endif	/* PERF_DWARF2_H */
+
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index bb4ac2e..8be0b96 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -13,6 +13,11 @@
 	addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
 }
 
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+	addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
+}
+
 static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
 {
 	return ((1UL << (nr % BITS_PER_LONG)) &
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644
index 0000000..06387cf
--- /dev/null
+++ b/tools/perf/util/include/linux/linkage.h
@@ -0,0 +1,13 @@
+
+#ifndef PERF_LINUX_LINKAGE_H_
+#define PERF_LINUX_LINKAGE_H_
+
+/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
+
+#define ENTRY(name)				\
+	.globl name;				\
+	name:
+
+#define ENDPROC(name)
+
+#endif	/* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4af5bd5..c305305a 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -434,7 +434,7 @@
 	id = atoll(id_buf);
 	attr->config = id;
 	attr->type = PERF_TYPE_TRACEPOINT;
-	*strp = evt_name + evt_length;
+	*strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
 
 	attr->sample_type |= PERF_SAMPLE_RAW;
 	attr->sample_type |= PERF_SAMPLE_TIME;
@@ -495,7 +495,7 @@
 				    struct perf_event_attr *attr)
 {
 	const char *evt_name;
-	char *flags;
+	char *flags = NULL, *comma_loc;
 	char sys_name[MAX_EVENT_LENGTH];
 	unsigned int sys_length, evt_length;
 
@@ -514,6 +514,11 @@
 	sys_name[sys_length] = '\0';
 	evt_name = evt_name + 1;
 
+	comma_loc = strchr(evt_name, ',');
+	if (comma_loc) {
+		/* take the event name up to the comma */
+		evt_name = strndup(evt_name, comma_loc - evt_name);
+	}
 	flags = strchr(evt_name, ':');
 	if (flags) {
 		/* split it out: */
@@ -524,9 +529,8 @@
 	evt_length = strlen(evt_name);
 	if (evt_length >= MAX_EVENT_LENGTH)
 		return EVT_FAILED;
-
 	if (strpbrk(evt_name, "*?")) {
-		*strp = evt_name + evt_length;
+		*strp += strlen(sys_name) + evt_length;
 		return parse_multiple_tracepoint_event(sys_name, evt_name,
 						       flags);
 	} else
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index c7d72dc..abc31a1 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -119,6 +119,10 @@
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
 #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
+#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
+	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
+	.value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
+	.flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
 
 /* parse_options() will filter out the processed options and leave the
  * non-option argments in argv[].
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 3b6a529..d3af30d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -95,7 +95,7 @@
 		goto out;
 
 	if (machine__create_kernel_maps(&machine) < 0) {
-		pr_debug("machine__create_kernel_maps ");
+		pr_debug("machine__create_kernel_maps() failed.\n");
 		goto out;
 	}
 out:
@@ -140,7 +140,8 @@
 {
 	const char *path = kernel_get_module_path(module);
 	if (!path) {
-		pr_err("Failed to find path of %s module", module ?: "kernel");
+		pr_err("Failed to find path of %s module.\n",
+		       module ?: "kernel");
 		return -ENOENT;
 	}
 	pr_debug("Try to open %s\n", path);
@@ -217,7 +218,7 @@
 		pr_warning("Warning: No dwarf info found in the vmlinux - "
 			"please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
 		if (!need_dwarf) {
-			pr_debug("Trying to use symbols.\nn");
+			pr_debug("Trying to use symbols.\n");
 			return 0;
 		}
 	}
@@ -286,42 +287,49 @@
 #define LINEBUF_SIZE 256
 #define NR_ADDITIONAL_LINES 2
 
-static int show_one_line(FILE *fp, int l, bool skip, bool show_num)
+static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
 {
 	char buf[LINEBUF_SIZE];
-	const char *color = PERF_COLOR_BLUE;
+	const char *color = show_num ? "" : PERF_COLOR_BLUE;
+	const char *prefix = NULL;
 
-	if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
-		goto error;
-	if (!skip) {
-		if (show_num)
-			fprintf(stdout, "%7d  %s", l, buf);
-		else
-			color_fprintf(stdout, color, "         %s", buf);
-	}
-
-	while (strlen(buf) == LINEBUF_SIZE - 1 &&
-	       buf[LINEBUF_SIZE - 2] != '\n') {
+	do {
 		if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
 			goto error;
-		if (!skip) {
-			if (show_num)
-				fprintf(stdout, "%s", buf);
-			else
-				color_fprintf(stdout, color, "%s", buf);
+		if (skip)
+			continue;
+		if (!prefix) {
+			prefix = show_num ? "%7d  " : "         ";
+			color_fprintf(stdout, color, prefix, l);
 		}
-	}
+		color_fprintf(stdout, color, "%s", buf);
 
-	return 0;
+	} while (strchr(buf, '\n') == NULL);
+
+	return 1;
 error:
-	if (feof(fp))
-		pr_warning("Source file is shorter than expected.\n");
-	else
+	if (ferror(fp)) {
 		pr_warning("File read error: %s\n", strerror(errno));
-
-	return -1;
+		return -1;
+	}
+	return 0;
 }
 
+static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+	int rv = __show_one_line(fp, l, skip, show_num);
+	if (rv == 0) {
+		pr_warning("Source file is shorter than expected.\n");
+		rv = -1;
+	}
+	return rv;
+}
+
+#define show_one_line_with_num(f,l)	_show_one_line(f,l,false,true)
+#define show_one_line(f,l)		_show_one_line(f,l,false,false)
+#define skip_one_line(f,l)		_show_one_line(f,l,true,false)
+#define show_one_line_or_eof(f,l)	__show_one_line(f,l,false,false)
+
 /*
  * Show line-range always requires debuginfo to find source file and
  * line number.
@@ -370,7 +378,7 @@
 		fprintf(stdout, "<%s:%d>\n", lr->function,
 			lr->start - lr->offset);
 	else
-		fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
+		fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
 
 	fp = fopen(lr->path, "r");
 	if (fp == NULL) {
@@ -379,26 +387,30 @@
 		return -errno;
 	}
 	/* Skip to starting line number */
-	while (l < lr->start && ret >= 0)
-		ret = show_one_line(fp, l++, true, false);
-	if (ret < 0)
-		goto end;
+	while (l < lr->start) {
+		ret = skip_one_line(fp, l++);
+		if (ret < 0)
+			goto end;
+	}
 
 	list_for_each_entry(ln, &lr->line_list, list) {
-		while (ln->line > l && ret >= 0)
-			ret = show_one_line(fp, (l++) - lr->offset,
-					    false, false);
-		if (ret >= 0)
-			ret = show_one_line(fp, (l++) - lr->offset,
-					    false, true);
+		for (; ln->line > l; l++) {
+			ret = show_one_line(fp, l - lr->offset);
+			if (ret < 0)
+				goto end;
+		}
+		ret = show_one_line_with_num(fp, l++ - lr->offset);
 		if (ret < 0)
 			goto end;
 	}
 
 	if (lr->end == INT_MAX)
 		lr->end = l + NR_ADDITIONAL_LINES;
-	while (l <= lr->end && !feof(fp) && ret >= 0)
-		ret = show_one_line(fp, (l++) - lr->offset, false, false);
+	while (l <= lr->end) {
+		ret = show_one_line_or_eof(fp, l++ - lr->offset);
+		if (ret <= 0)
+			break;
+	}
 end:
 	fclose(fp);
 	return ret;
@@ -457,7 +469,7 @@
 
 	fd = open_vmlinux(module);
 	if (fd < 0) {
-		pr_warning("Failed to open debuginfo file.\n");
+		pr_warning("Failed to open debug information file.\n");
 		return fd;
 	}
 
@@ -517,56 +529,87 @@
 }
 #endif
 
+static int parse_line_num(char **ptr, int *val, const char *what)
+{
+	const char *start = *ptr;
+
+	errno = 0;
+	*val = strtol(*ptr, ptr, 0);
+	if (errno || *ptr == start) {
+		semantic_error("'%s' is not a valid number.\n", what);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Stuff 'lr' according to the line range described by 'arg'.
+ * The line range syntax is described by:
+ *
+ *         SRC[:SLN[+NUM|-ELN]]
+ *         FNC[:SLN[+NUM|-ELN]]
+ */
 int parse_line_range_desc(const char *arg, struct line_range *lr)
 {
-	const char *ptr;
-	char *tmp;
-	/*
-	 * <Syntax>
-	 * SRC:SLN[+NUM|-ELN]
-	 * FUNC[:SLN[+NUM|-ELN]]
-	 */
-	ptr = strchr(arg, ':');
-	if (ptr) {
-		lr->start = (int)strtoul(ptr + 1, &tmp, 0);
-		if (*tmp == '+') {
-			lr->end = lr->start + (int)strtoul(tmp + 1, &tmp, 0);
-			lr->end--;	/*
-					 * Adjust the number of lines here.
-					 * If the number of lines == 1, the
-					 * the end of line should be equal to
-					 * the start of line.
-					 */
-		} else if (*tmp == '-')
-			lr->end = (int)strtoul(tmp + 1, &tmp, 0);
-		else
-			lr->end = INT_MAX;
+	char *range, *name = strdup(arg);
+	int err;
+
+	if (!name)
+		return -ENOMEM;
+
+	lr->start = 0;
+	lr->end = INT_MAX;
+
+	range = strchr(name, ':');
+	if (range) {
+		*range++ = '\0';
+
+		err = parse_line_num(&range, &lr->start, "start line");
+		if (err)
+			goto err;
+
+		if (*range == '+' || *range == '-') {
+			const char c = *range++;
+
+			err = parse_line_num(&range, &lr->end, "end line");
+			if (err)
+				goto err;
+
+			if (c == '+') {
+				lr->end += lr->start;
+				/*
+				 * Adjust the number of lines here.
+				 * If the number of lines == 1, the
+				 * the end of line should be equal to
+				 * the start of line.
+				 */
+				lr->end--;
+			}
+		}
+
 		pr_debug("Line range is %d to %d\n", lr->start, lr->end);
+
+		err = -EINVAL;
 		if (lr->start > lr->end) {
 			semantic_error("Start line must be smaller"
 				       " than end line.\n");
-			return -EINVAL;
+			goto err;
 		}
-		if (*tmp != '\0') {
-			semantic_error("Tailing with invalid character '%d'.\n",
-				       *tmp);
-			return -EINVAL;
+		if (*range != '\0') {
+			semantic_error("Tailing with invalid str '%s'.\n", range);
+			goto err;
 		}
-		tmp = strndup(arg, (ptr - arg));
-	} else {
-		tmp = strdup(arg);
-		lr->end = INT_MAX;
 	}
 
-	if (tmp == NULL)
-		return -ENOMEM;
-
-	if (strchr(tmp, '.'))
-		lr->file = tmp;
+	if (strchr(name, '.'))
+		lr->file = name;
 	else
-		lr->function = tmp;
+		lr->function = name;
 
 	return 0;
+err:
+	free(name);
+	return err;
 }
 
 /* Check the name is good for event/group */
@@ -690,39 +733,40 @@
 
 	/* Exclusion check */
 	if (pp->lazy_line && pp->line) {
-		semantic_error("Lazy pattern can't be used with line number.");
+		semantic_error("Lazy pattern can't be used with"
+			       " line number.\n");
 		return -EINVAL;
 	}
 
 	if (pp->lazy_line && pp->offset) {
-		semantic_error("Lazy pattern can't be used with offset.");
+		semantic_error("Lazy pattern can't be used with offset.\n");
 		return -EINVAL;
 	}
 
 	if (pp->line && pp->offset) {
-		semantic_error("Offset can't be used with line number.");
+		semantic_error("Offset can't be used with line number.\n");
 		return -EINVAL;
 	}
 
 	if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
 		semantic_error("File always requires line number or "
-			       "lazy pattern.");
+			       "lazy pattern.\n");
 		return -EINVAL;
 	}
 
 	if (pp->offset && !pp->function) {
-		semantic_error("Offset requires an entry function.");
+		semantic_error("Offset requires an entry function.\n");
 		return -EINVAL;
 	}
 
 	if (pp->retprobe && !pp->function) {
-		semantic_error("Return probe requires an entry function.");
+		semantic_error("Return probe requires an entry function.\n");
 		return -EINVAL;
 	}
 
 	if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
 		semantic_error("Offset/Line/Lazy pattern can't be used with "
-			       "return probe.");
+			       "return probe.\n");
 		return -EINVAL;
 	}
 
@@ -996,7 +1040,7 @@
 
 	return tmp - buf;
 error:
-	pr_debug("Failed to synthesize perf probe argument: %s",
+	pr_debug("Failed to synthesize perf probe argument: %s\n",
 		 strerror(-ret));
 	return ret;
 }
@@ -1024,13 +1068,13 @@
 			goto error;
 	}
 	if (pp->file) {
-		len = strlen(pp->file) - 31;
-		if (len < 0)
-			len = 0;
-		tmp = strchr(pp->file + len, '/');
-		if (!tmp)
-			tmp = pp->file + len;
-		ret = e_snprintf(file, 32, "@%s", tmp + 1);
+		tmp = pp->file;
+		len = strlen(tmp);
+		if (len > 30) {
+			tmp = strchr(pp->file + len - 30, '/');
+			tmp = tmp ? tmp + 1 : pp->file + len - 30;
+		}
+		ret = e_snprintf(file, 32, "@%s", tmp);
 		if (ret <= 0)
 			goto error;
 	}
@@ -1046,7 +1090,7 @@
 
 	return buf;
 error:
-	pr_debug("Failed to synthesize perf probe point: %s",
+	pr_debug("Failed to synthesize perf probe point: %s\n",
 		 strerror(-ret));
 	if (buf)
 		free(buf);
@@ -1787,7 +1831,7 @@
 
 	ret = e_snprintf(buf, 128, "%s:%s", group, event);
 	if (ret < 0) {
-		pr_err("Failed to copy event.");
+		pr_err("Failed to copy event.\n");
 		return ret;
 	}
 
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 3991d73..90b6292 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -627,8 +627,8 @@
 	regs = get_arch_regstr(regn);
 	if (!regs) {
 		/* This should be a bug in DWARF or this tool */
-		pr_warning("Mapping for DWARF register number %u "
-			   "missing on this architecture.", regn);
+		pr_warning("Mapping for the register number %u "
+			   "missing on this architecture.\n", regn);
 		return -ERANGE;
 	}
 
@@ -674,13 +674,14 @@
 		if (ret != DW_TAG_pointer_type &&
 		    ret != DW_TAG_array_type) {
 			pr_warning("Failed to cast into string: "
-				   "%s(%s) is not a pointer nor array.",
+				   "%s(%s) is not a pointer nor array.\n",
 				   dwarf_diename(vr_die), dwarf_diename(&type));
 			return -EINVAL;
 		}
 		if (ret == DW_TAG_pointer_type) {
 			if (die_get_real_type(&type, &type) == NULL) {
-				pr_warning("Failed to get a type information.");
+				pr_warning("Failed to get a type"
+					   " information.\n");
 				return -ENOENT;
 			}
 			while (*ref_ptr)
@@ -695,7 +696,7 @@
 		if (!die_compare_name(&type, "char") &&
 		    !die_compare_name(&type, "unsigned char")) {
 			pr_warning("Failed to cast into string: "
-				   "%s is not (unsigned) char *.",
+				   "%s is not (unsigned) char *.\n",
 				   dwarf_diename(vr_die));
 			return -EINVAL;
 		}
@@ -805,8 +806,8 @@
 			return -EINVAL;
 		}
 		if (field->name[0] == '[') {
-			pr_err("Semantic error: %s is not a pointor nor array.",
-			       varname);
+			pr_err("Semantic error: %s is not a pointor"
+			       " nor array.\n", varname);
 			return -EINVAL;
 		}
 		if (field->ref) {
@@ -953,7 +954,7 @@
 	name = dwarf_diename(sp_die);
 	if (name) {
 		if (dwarf_entrypc(sp_die, &eaddr) != 0) {
-			pr_warning("Failed to get entry pc of %s\n",
+			pr_warning("Failed to get entry address of %s\n",
 				   dwarf_diename(sp_die));
 			return -ENOENT;
 		}
@@ -969,7 +970,7 @@
 	if (retprobe) {
 		if (eaddr != paddr) {
 			pr_warning("Return probe must be on the head of"
-				   " a real function\n");
+				   " a real function.\n");
 			return -EINVAL;
 		}
 		tp->retprobe = true;
@@ -1008,7 +1009,7 @@
 		Dwarf_Frame *frame;
 		if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
 		    dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
-			pr_warning("Failed to get CFA on 0x%jx\n",
+			pr_warning("Failed to get call frame on 0x%jx\n",
 				   (uintmax_t)pf->addr);
 			return -ENOENT;
 		}
@@ -1035,7 +1036,7 @@
 	int ret = 0;
 
 	if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
-		pr_warning("No source lines found in this CU.\n");
+		pr_warning("No source lines found.\n");
 		return -ENOENT;
 	}
 
@@ -1137,7 +1138,7 @@
 	}
 
 	if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
-		pr_warning("No source lines found in this CU.\n");
+		pr_warning("No source lines found.\n");
 		return -ENOENT;
 	}
 
@@ -1195,7 +1196,7 @@
 	else {
 		/* Get probe address */
 		if (dwarf_entrypc(in_die, &addr) != 0) {
-			pr_warning("Failed to get entry pc of %s.\n",
+			pr_warning("Failed to get entry address of %s.\n",
 				   dwarf_diename(in_die));
 			param->retval = -ENOENT;
 			return DWARF_CB_ABORT;
@@ -1236,8 +1237,8 @@
 			param->retval = find_probe_point_lazy(sp_die, pf);
 		else {
 			if (dwarf_entrypc(sp_die, &pf->addr) != 0) {
-				pr_warning("Failed to get entry pc of %s.\n",
-					   dwarf_diename(sp_die));
+				pr_warning("Failed to get entry address of "
+					   "%s.\n", dwarf_diename(sp_die));
 				param->retval = -ENOENT;
 				return DWARF_CB_ABORT;
 			}
@@ -1279,7 +1280,7 @@
 
 	dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
 	if (!dbg) {
-		pr_warning("No dwarf info found in the vmlinux - "
+		pr_warning("No debug information found in the vmlinux - "
 			"please rebuild with CONFIG_DEBUG_INFO=y.\n");
 		return -EBADF;
 	}
@@ -1524,7 +1525,7 @@
 	/* Open the live linux kernel */
 	dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
 	if (!dbg) {
-		pr_warning("No dwarf info found in the vmlinux - "
+		pr_warning("No debug information found in the vmlinux - "
 			"please rebuild with CONFIG_DEBUG_INFO=y.\n");
 		ret = -EINVAL;
 		goto end;
@@ -1534,7 +1535,8 @@
 	addr += bias;
 	/* Find cu die */
 	if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
-		pr_warning("No CU DIE is found at %lx\n", addr);
+		pr_warning("Failed to find debug information for address %lx\n",
+			   addr);
 		ret = -EINVAL;
 		goto end;
 	}
@@ -1659,7 +1661,7 @@
 
 	line_list__init(&lf->lr->line_list);
 	if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
-		pr_warning("No source lines found in this CU.\n");
+		pr_warning("No source lines found.\n");
 		return -ENOENT;
 	}
 
@@ -1784,7 +1786,7 @@
 
 	dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
 	if (!dbg) {
-		pr_warning("No dwarf info found in the vmlinux - "
+		pr_warning("No debug information found in the vmlinux - "
 			"please rebuild with CONFIG_DEBUG_INFO=y.\n");
 		return -EBADF;
 	}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index bba69d4..beaefc3 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,9 +34,9 @@
 				  bool externs);
 
 #include <dwarf.h>
-#include <libdw.h>
-#include <libdwfl.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
 
 struct probe_finder {
 	struct perf_probe_event	*pev;		/* Target probe event */
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index b059dc5..9368081 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -1,5 +1,5 @@
 /*
- * trace-event-perl.  Feed perf trace events to an embedded Perl interpreter.
+ * trace-event-perl.  Feed perf script events to an embedded Perl interpreter.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
@@ -411,8 +411,8 @@
 		return -1;
 	}
 
-	fprintf(ofp, "# perf trace event handlers, "
-		"generated by perf trace -g perl\n");
+	fprintf(ofp, "# perf script event handlers, "
+		"generated by perf script -g perl\n");
 
 	fprintf(ofp, "# Licensed under the terms of the GNU GPL"
 		" License version 2\n\n");
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 33a6325..c6d9933 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -442,8 +442,8 @@
 		fprintf(stderr, "couldn't open %s\n", fname);
 		return -1;
 	}
-	fprintf(ofp, "# perf trace event handlers, "
-		"generated by perf trace -g python\n");
+	fprintf(ofp, "# perf script event handlers, "
+		"generated by perf script -g python\n");
 
 	fprintf(ofp, "# Licensed under the terms of the GNU GPL"
 		" License version 2\n\n");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index fa9d652..0f7e544 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -65,9 +65,49 @@
 	return -1;
 }
 
+static void perf_session__id_header_size(struct perf_session *session)
+{
+       struct sample_data *data;
+       u64 sample_type = session->sample_type;
+       u16 size = 0;
+
+	if (!session->sample_id_all)
+		goto out;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               size += sizeof(data->tid) * 2;
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               size += sizeof(data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               size += sizeof(data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               size += sizeof(data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               size += sizeof(data->cpu) * 2;
+out:
+       session->id_hdr_size = size;
+}
+
+void perf_session__set_sample_id_all(struct perf_session *session, bool value)
+{
+	session->sample_id_all = value;
+	perf_session__id_header_size(session);
+}
+
+void perf_session__set_sample_type(struct perf_session *session, u64 type)
+{
+	session->sample_type = type;
+}
+
 void perf_session__update_sample_type(struct perf_session *self)
 {
 	self->sample_type = perf_header__sample_type(&self->header);
+	self->sample_id_all = perf_header__sample_id_all(&self->header);
+	perf_session__id_header_size(self);
 }
 
 int perf_session__create_kernel_maps(struct perf_session *self)
@@ -85,7 +125,9 @@
 	machines__destroy_guest_kernel_maps(&self->machines);
 }
 
-struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe)
+struct perf_session *perf_session__new(const char *filename, int mode,
+				       bool force, bool repipe,
+				       struct perf_event_ops *ops)
 {
 	size_t len = filename ? strlen(filename) + 1 : 0;
 	struct perf_session *self = zalloc(sizeof(*self) + len);
@@ -101,10 +143,20 @@
 	INIT_LIST_HEAD(&self->dead_threads);
 	self->hists_tree = RB_ROOT;
 	self->last_match = NULL;
-	self->mmap_window = 32;
+	/*
+	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
+	 * slices. On 32bit we use 32MB.
+	 */
+#if BITS_PER_LONG == 64
+	self->mmap_window = ULLONG_MAX;
+#else
+	self->mmap_window = 32 * 1024 * 1024ULL;
+#endif
 	self->machines = RB_ROOT;
 	self->repipe = repipe;
-	INIT_LIST_HEAD(&self->ordered_samples.samples_head);
+	INIT_LIST_HEAD(&self->ordered_samples.samples);
+	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
+	INIT_LIST_HEAD(&self->ordered_samples.to_free);
 	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
 
 	if (mode == O_RDONLY) {
@@ -120,6 +172,13 @@
 	}
 
 	perf_session__update_sample_type(self);
+
+	if (ops && ops->ordering_requires_timestamps &&
+	    ops->ordered_samples && !self->sample_id_all) {
+		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
+		ops->ordered_samples = false;
+	}
+
 out:
 	return self;
 out_free:
@@ -230,7 +289,15 @@
 	return syms;
 }
 
+static int process_event_synth_stub(event_t *event __used,
+				    struct perf_session *session __used)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
 static int process_event_stub(event_t *event __used,
+			      struct sample_data *sample __used,
 			      struct perf_session *session __used)
 {
 	dump_printf(": unhandled!\n");
@@ -262,7 +329,7 @@
 	if (handler->exit == NULL)
 		handler->exit = process_event_stub;
 	if (handler->lost == NULL)
-		handler->lost = process_event_stub;
+		handler->lost = event__process_lost;
 	if (handler->read == NULL)
 		handler->read = process_event_stub;
 	if (handler->throttle == NULL)
@@ -270,13 +337,13 @@
 	if (handler->unthrottle == NULL)
 		handler->unthrottle = process_event_stub;
 	if (handler->attr == NULL)
-		handler->attr = process_event_stub;
+		handler->attr = process_event_synth_stub;
 	if (handler->event_type == NULL)
-		handler->event_type = process_event_stub;
+		handler->event_type = process_event_synth_stub;
 	if (handler->tracing_data == NULL)
-		handler->tracing_data = process_event_stub;
+		handler->tracing_data = process_event_synth_stub;
 	if (handler->build_id == NULL)
-		handler->build_id = process_event_stub;
+		handler->build_id = process_event_synth_stub;
 	if (handler->finished_round == NULL) {
 		if (handler->ordered_samples)
 			handler->finished_round = process_finished_round;
@@ -386,33 +453,61 @@
 
 struct sample_queue {
 	u64			timestamp;
-	struct sample_event	*event;
+	u64			file_offset;
+	event_t			*event;
 	struct list_head	list;
 };
 
+static void perf_session_free_sample_buffers(struct perf_session *session)
+{
+	struct ordered_samples *os = &session->ordered_samples;
+
+	while (!list_empty(&os->to_free)) {
+		struct sample_queue *sq;
+
+		sq = list_entry(os->to_free.next, struct sample_queue, list);
+		list_del(&sq->list);
+		free(sq);
+	}
+}
+
+static int perf_session_deliver_event(struct perf_session *session,
+				      event_t *event,
+				      struct sample_data *sample,
+				      struct perf_event_ops *ops,
+				      u64 file_offset);
+
 static void flush_sample_queue(struct perf_session *s,
 			       struct perf_event_ops *ops)
 {
-	struct list_head *head = &s->ordered_samples.samples_head;
-	u64 limit = s->ordered_samples.next_flush;
+	struct ordered_samples *os = &s->ordered_samples;
+	struct list_head *head = &os->samples;
 	struct sample_queue *tmp, *iter;
+	struct sample_data sample;
+	u64 limit = os->next_flush;
+	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
 
 	if (!ops->ordered_samples || !limit)
 		return;
 
 	list_for_each_entry_safe(iter, tmp, head, list) {
 		if (iter->timestamp > limit)
-			return;
+			break;
 
-		if (iter == s->ordered_samples.last_inserted)
-			s->ordered_samples.last_inserted = NULL;
+		event__parse_sample(iter->event, s, &sample);
+		perf_session_deliver_event(s, iter->event, &sample, ops,
+					   iter->file_offset);
 
-		ops->sample((event_t *)iter->event, s);
-
-		s->ordered_samples.last_flush = iter->timestamp;
+		os->last_flush = iter->timestamp;
 		list_del(&iter->list);
-		free(iter->event);
-		free(iter);
+		list_add(&iter->list, &os->sample_cache);
+	}
+
+	if (list_empty(head)) {
+		os->last_sample = NULL;
+	} else if (last_ts <= limit) {
+		os->last_sample =
+			list_entry(head->prev, struct sample_queue, list);
 	}
 }
 
@@ -465,178 +560,265 @@
 	return 0;
 }
 
-static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
-{
-	struct sample_queue *iter;
-
-	list_for_each_entry_reverse(iter, head, list) {
-		if (iter->timestamp < new->timestamp) {
-			list_add(&new->list, &iter->list);
-			return;
-		}
-	}
-
-	list_add(&new->list, head);
-}
-
-static void __queue_sample_before(struct sample_queue *new,
-				  struct sample_queue *iter,
-				  struct list_head *head)
-{
-	list_for_each_entry_continue_reverse(iter, head, list) {
-		if (iter->timestamp < new->timestamp) {
-			list_add(&new->list, &iter->list);
-			return;
-		}
-	}
-
-	list_add(&new->list, head);
-}
-
-static void __queue_sample_after(struct sample_queue *new,
-				 struct sample_queue *iter,
-				 struct list_head *head)
-{
-	list_for_each_entry_continue(iter, head, list) {
-		if (iter->timestamp > new->timestamp) {
-			list_add_tail(&new->list, &iter->list);
-			return;
-		}
-	}
-	list_add_tail(&new->list, head);
-}
-
 /* The queue is ordered by time */
-static void __queue_sample_event(struct sample_queue *new,
-				 struct perf_session *s)
+static void __queue_event(struct sample_queue *new, struct perf_session *s)
 {
-	struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
-	struct list_head *head = &s->ordered_samples.samples_head;
+	struct ordered_samples *os = &s->ordered_samples;
+	struct sample_queue *sample = os->last_sample;
+	u64 timestamp = new->timestamp;
+	struct list_head *p;
 
+	os->last_sample = new;
 
-	if (!last_inserted) {
-		__queue_sample_end(new, head);
+	if (!sample) {
+		list_add(&new->list, &os->samples);
+		os->max_timestamp = timestamp;
 		return;
 	}
 
 	/*
-	 * Most of the time the current event has a timestamp
-	 * very close to the last event inserted, unless we just switched
-	 * to another event buffer. Having a sorting based on a list and
-	 * on the last inserted event that is close to the current one is
-	 * probably more efficient than an rbtree based sorting.
+	 * last_sample might point to some random place in the list as it's
+	 * the last queued event. We expect that the new event is close to
+	 * this.
 	 */
-	if (last_inserted->timestamp >= new->timestamp)
-		__queue_sample_before(new, last_inserted, head);
-	else
-		__queue_sample_after(new, last_inserted, head);
+	if (sample->timestamp <= timestamp) {
+		while (sample->timestamp <= timestamp) {
+			p = sample->list.next;
+			if (p == &os->samples) {
+				list_add_tail(&new->list, &os->samples);
+				os->max_timestamp = timestamp;
+				return;
+			}
+			sample = list_entry(p, struct sample_queue, list);
+		}
+		list_add_tail(&new->list, &sample->list);
+	} else {
+		while (sample->timestamp > timestamp) {
+			p = sample->list.prev;
+			if (p == &os->samples) {
+				list_add(&new->list, &os->samples);
+				return;
+			}
+			sample = list_entry(p, struct sample_queue, list);
+		}
+		list_add(&new->list, &sample->list);
+	}
 }
 
-static int queue_sample_event(event_t *event, struct sample_data *data,
-			      struct perf_session *s)
+#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))
+
+static int perf_session_queue_event(struct perf_session *s, event_t *event,
+				    struct sample_data *data, u64 file_offset)
 {
+	struct ordered_samples *os = &s->ordered_samples;
+	struct list_head *sc = &os->sample_cache;
 	u64 timestamp = data->time;
 	struct sample_queue *new;
 
+	if (!timestamp || timestamp == ~0ULL)
+		return -ETIME;
 
 	if (timestamp < s->ordered_samples.last_flush) {
 		printf("Warning: Timestamp below last timeslice flush\n");
 		return -EINVAL;
 	}
 
-	new = malloc(sizeof(*new));
-	if (!new)
-		return -ENOMEM;
+	if (!list_empty(sc)) {
+		new = list_entry(sc->next, struct sample_queue, list);
+		list_del(&new->list);
+	} else if (os->sample_buffer) {
+		new = os->sample_buffer + os->sample_buffer_idx;
+		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
+			os->sample_buffer = NULL;
+	} else {
+		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
+		if (!os->sample_buffer)
+			return -ENOMEM;
+		list_add(&os->sample_buffer->list, &os->to_free);
+		os->sample_buffer_idx = 2;
+		new = os->sample_buffer + 1;
+	}
 
 	new->timestamp = timestamp;
+	new->file_offset = file_offset;
+	new->event = event;
 
-	new->event = malloc(event->header.size);
-	if (!new->event) {
-		free(new);
-		return -ENOMEM;
+	__queue_event(new, s);
+
+	return 0;
+}
+
+static void callchain__printf(struct sample_data *sample)
+{
+	unsigned int i;
+
+	printf("... chain: nr:%Lu\n", sample->callchain->nr);
+
+	for (i = 0; i < sample->callchain->nr; i++)
+		printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
+}
+
+static void perf_session__print_tstamp(struct perf_session *session,
+				       event_t *event,
+				       struct sample_data *sample)
+{
+	if (event->header.type != PERF_RECORD_SAMPLE &&
+	    !session->sample_id_all) {
+		fputs("-1 -1 ", stdout);
+		return;
 	}
 
-	memcpy(new->event, event, event->header.size);
+	if ((session->sample_type & PERF_SAMPLE_CPU))
+		printf("%u ", sample->cpu);
 
-	__queue_sample_event(new, s);
-	s->ordered_samples.last_inserted = new;
-
-	if (new->timestamp > s->ordered_samples.max_timestamp)
-		s->ordered_samples.max_timestamp = new->timestamp;
-
-	return 0;
+	if (session->sample_type & PERF_SAMPLE_TIME)
+		printf("%Lu ", sample->time);
 }
 
-static int perf_session__process_sample(event_t *event, struct perf_session *s,
-					struct perf_event_ops *ops)
+static void dump_event(struct perf_session *session, event_t *event,
+		       u64 file_offset, struct sample_data *sample)
 {
-	struct sample_data data;
+	if (!dump_trace)
+		return;
 
-	if (!ops->ordered_samples)
-		return ops->sample(event, s);
+	printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size,
+	       event->header.type);
 
-	bzero(&data, sizeof(struct sample_data));
-	event__parse_sample(event, s->sample_type, &data);
-
-	queue_sample_event(event, &data, s);
-
-	return 0;
-}
-
-static int perf_session__process_event(struct perf_session *self,
-				       event_t *event,
-				       struct perf_event_ops *ops,
-				       u64 offset, u64 head)
-{
 	trace_event(event);
 
-	if (event->header.type < PERF_RECORD_HEADER_MAX) {
-		dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
-			    offset + head, event->header.size,
-			    event__name[event->header.type]);
-		hists__inc_nr_events(&self->hists, event->header.type);
-	}
+	if (sample)
+		perf_session__print_tstamp(session, event, sample);
 
-	if (self->header.needs_swap && event__swap_ops[event->header.type])
-		event__swap_ops[event->header.type](event);
+	printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size,
+	       event__get_event_name(event->header.type));
+}
+
+static void dump_sample(struct perf_session *session, event_t *event,
+			struct sample_data *sample)
+{
+	if (!dump_trace)
+		return;
+
+	printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+	       sample->pid, sample->tid, sample->ip, sample->period);
+
+	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
+		callchain__printf(sample);
+}
+
+static int perf_session_deliver_event(struct perf_session *session,
+				      event_t *event,
+				      struct sample_data *sample,
+				      struct perf_event_ops *ops,
+				      u64 file_offset)
+{
+	dump_event(session, event, file_offset, sample);
 
 	switch (event->header.type) {
 	case PERF_RECORD_SAMPLE:
-		return perf_session__process_sample(event, self, ops);
+		dump_sample(session, event, sample);
+		return ops->sample(event, sample, session);
 	case PERF_RECORD_MMAP:
-		return ops->mmap(event, self);
+		return ops->mmap(event, sample, session);
 	case PERF_RECORD_COMM:
-		return ops->comm(event, self);
+		return ops->comm(event, sample, session);
 	case PERF_RECORD_FORK:
-		return ops->fork(event, self);
+		return ops->fork(event, sample, session);
 	case PERF_RECORD_EXIT:
-		return ops->exit(event, self);
+		return ops->exit(event, sample, session);
 	case PERF_RECORD_LOST:
-		return ops->lost(event, self);
+		return ops->lost(event, sample, session);
 	case PERF_RECORD_READ:
-		return ops->read(event, self);
+		return ops->read(event, sample, session);
 	case PERF_RECORD_THROTTLE:
-		return ops->throttle(event, self);
+		return ops->throttle(event, sample, session);
 	case PERF_RECORD_UNTHROTTLE:
-		return ops->unthrottle(event, self);
-	case PERF_RECORD_HEADER_ATTR:
-		return ops->attr(event, self);
-	case PERF_RECORD_HEADER_EVENT_TYPE:
-		return ops->event_type(event, self);
-	case PERF_RECORD_HEADER_TRACING_DATA:
-		/* setup for reading amidst mmap */
-		lseek(self->fd, offset + head, SEEK_SET);
-		return ops->tracing_data(event, self);
-	case PERF_RECORD_HEADER_BUILD_ID:
-		return ops->build_id(event, self);
-	case PERF_RECORD_FINISHED_ROUND:
-		return ops->finished_round(event, self, ops);
+		return ops->unthrottle(event, sample, session);
 	default:
-		++self->hists.stats.nr_unknown_events;
+		++session->hists.stats.nr_unknown_events;
 		return -1;
 	}
 }
 
+static int perf_session__preprocess_sample(struct perf_session *session,
+					   event_t *event, struct sample_data *sample)
+{
+	if (event->header.type != PERF_RECORD_SAMPLE ||
+	    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
+		return 0;
+
+	if (!ip_callchain__valid(sample->callchain, event)) {
+		pr_debug("call-chain problem with event, skipping it.\n");
+		++session->hists.stats.nr_invalid_chains;
+		session->hists.stats.total_invalid_chains += sample->period;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int perf_session__process_user_event(struct perf_session *session, event_t *event,
+					    struct perf_event_ops *ops, u64 file_offset)
+{
+	dump_event(session, event, file_offset, NULL);
+
+	/* These events are processed right away */
+	switch (event->header.type) {
+	case PERF_RECORD_HEADER_ATTR:
+		return ops->attr(event, session);
+	case PERF_RECORD_HEADER_EVENT_TYPE:
+		return ops->event_type(event, session);
+	case PERF_RECORD_HEADER_TRACING_DATA:
+		/* setup for reading amidst mmap */
+		lseek(session->fd, file_offset, SEEK_SET);
+		return ops->tracing_data(event, session);
+	case PERF_RECORD_HEADER_BUILD_ID:
+		return ops->build_id(event, session);
+	case PERF_RECORD_FINISHED_ROUND:
+		return ops->finished_round(event, session, ops);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int perf_session__process_event(struct perf_session *session,
+				       event_t *event,
+				       struct perf_event_ops *ops,
+				       u64 file_offset)
+{
+	struct sample_data sample;
+	int ret;
+
+	if (session->header.needs_swap && event__swap_ops[event->header.type])
+		event__swap_ops[event->header.type](event);
+
+	if (event->header.type >= PERF_RECORD_HEADER_MAX)
+		return -EINVAL;
+
+	hists__inc_nr_events(&session->hists, event->header.type);
+
+	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+		return perf_session__process_user_event(session, event, ops, file_offset);
+
+	/*
+	 * For all kernel events we get the sample data
+	 */
+	event__parse_sample(event, session, &sample);
+
+	/* Preprocess sample records - precheck callchains */
+	if (perf_session__preprocess_sample(session, event, &sample))
+		return 0;
+
+	if (ops->ordered_samples) {
+		ret = perf_session_queue_event(session, event, &sample,
+					       file_offset);
+		if (ret != -ETIME)
+			return ret;
+	}
+
+	return perf_session_deliver_event(session, event, &sample, ops,
+					  file_offset);
+}
+
 void perf_event_header__bswap(struct perf_event_header *self)
 {
 	self->type = bswap_32(self->type);
@@ -724,8 +906,7 @@
 	}
 
 	if (size == 0 ||
-	    (skip = perf_session__process_event(self, &event, ops,
-						0, head)) < 0) {
+	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
 		dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
 			    head, event.header.size, event.header.type);
 		/*
@@ -740,9 +921,6 @@
 
 	head += size;
 
-	dump_printf("\n%#Lx [%#x]: event: %d\n",
-		    head, event.header.size, event.header.type);
-
 	if (skip > 0)
 		head += skip;
 
@@ -751,82 +929,90 @@
 done:
 	err = 0;
 out_err:
+	perf_session_free_sample_buffers(self);
 	return err;
 }
 
-int __perf_session__process_events(struct perf_session *self,
+int __perf_session__process_events(struct perf_session *session,
 				   u64 data_offset, u64 data_size,
 				   u64 file_size, struct perf_event_ops *ops)
 {
-	int err, mmap_prot, mmap_flags;
-	u64 head, shift;
-	u64 offset = 0;
-	size_t	page_size;
+	u64 head, page_offset, file_offset, file_pos, progress_next;
+	int err, mmap_prot, mmap_flags, map_idx = 0;
+	struct ui_progress *progress;
+	size_t	page_size, mmap_size;
+	char *buf, *mmaps[8];
 	event_t *event;
 	uint32_t size;
-	char *buf;
-	struct ui_progress *progress = ui_progress__new("Processing events...",
-							self->size);
-	if (progress == NULL)
-		return -1;
 
 	perf_event_ops__fill_defaults(ops);
 
 	page_size = sysconf(_SC_PAGESIZE);
 
-	head = data_offset;
-	shift = page_size * (head / page_size);
-	offset += shift;
-	head -= shift;
+	page_offset = page_size * (data_offset / page_size);
+	file_offset = page_offset;
+	head = data_offset - page_offset;
+
+	if (data_offset + data_size < file_size)
+		file_size = data_offset + data_size;
+
+	progress_next = file_size / 16;
+	progress = ui_progress__new("Processing events...", file_size);
+	if (progress == NULL)
+		return -1;
+
+	mmap_size = session->mmap_window;
+	if (mmap_size > file_size)
+		mmap_size = file_size;
+
+	memset(mmaps, 0, sizeof(mmaps));
 
 	mmap_prot  = PROT_READ;
 	mmap_flags = MAP_SHARED;
 
-	if (self->header.needs_swap) {
+	if (session->header.needs_swap) {
 		mmap_prot  |= PROT_WRITE;
 		mmap_flags = MAP_PRIVATE;
 	}
 remap:
-	buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
-		   mmap_flags, self->fd, offset);
+	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
+		   file_offset);
 	if (buf == MAP_FAILED) {
 		pr_err("failed to mmap file\n");
 		err = -errno;
 		goto out_err;
 	}
+	mmaps[map_idx] = buf;
+	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
+	file_pos = file_offset + head;
 
 more:
 	event = (event_t *)(buf + head);
-	ui_progress__update(progress, offset);
 
-	if (self->header.needs_swap)
+	if (session->header.needs_swap)
 		perf_event_header__bswap(&event->header);
 	size = event->header.size;
 	if (size == 0)
 		size = 8;
 
-	if (head + event->header.size >= page_size * self->mmap_window) {
-		int munmap_ret;
+	if (head + event->header.size >= mmap_size) {
+		if (mmaps[map_idx]) {
+			munmap(mmaps[map_idx], mmap_size);
+			mmaps[map_idx] = NULL;
+		}
 
-		shift = page_size * (head / page_size);
-
-		munmap_ret = munmap(buf, page_size * self->mmap_window);
-		assert(munmap_ret == 0);
-
-		offset += shift;
-		head -= shift;
+		page_offset = page_size * (head / page_size);
+		file_offset += page_offset;
+		head -= page_offset;
 		goto remap;
 	}
 
 	size = event->header.size;
 
-	dump_printf("\n%#Lx [%#x]: event: %d\n",
-		    offset + head, event->header.size, event->header.type);
-
 	if (size == 0 ||
-	    perf_session__process_event(self, event, ops, offset, head) < 0) {
+	    perf_session__process_event(session, event, ops, file_pos) < 0) {
 		dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
-			    offset + head, event->header.size,
+			    file_offset + head, event->header.size,
 			    event->header.type);
 		/*
 		 * assume we lost track of the stream, check alignment, and
@@ -839,19 +1025,49 @@
 	}
 
 	head += size;
+	file_pos += size;
 
-	if (offset + head >= data_offset + data_size)
-		goto done;
+	if (file_pos >= progress_next) {
+		progress_next += file_size / 16;
+		ui_progress__update(progress, file_pos);
+	}
 
-	if (offset + head < file_size)
+	if (file_pos < file_size)
 		goto more;
-done:
+
 	err = 0;
 	/* do the final flush for ordered samples */
-	self->ordered_samples.next_flush = ULLONG_MAX;
-	flush_sample_queue(self, ops);
+	session->ordered_samples.next_flush = ULLONG_MAX;
+	flush_sample_queue(session, ops);
 out_err:
 	ui_progress__delete(progress);
+
+	if (ops->lost == event__process_lost &&
+	    session->hists.stats.total_lost != 0) {
+		ui__warning("Processed %Lu events and LOST %Lu!\n\n"
+			    "Check IO/CPU overload!\n\n",
+			    session->hists.stats.total_period,
+			    session->hists.stats.total_lost);
+	}
+
+	if (session->hists.stats.nr_unknown_events != 0) {
+		ui__warning("Found %u unknown events!\n\n"
+			    "Is this an older tool processing a perf.data "
+			    "file generated by a more recent tool?\n\n"
+			    "If that is not the case, consider "
+			    "reporting to linux-kernel@vger.kernel.org.\n\n",
+			    session->hists.stats.nr_unknown_events);
+	}
+
+ 	if (session->hists.stats.nr_invalid_chains != 0) {
+ 		ui__warning("Found invalid callchains!\n\n"
+ 			    "%u out of %u events were discarded for this reason.\n\n"
+ 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
+ 			    session->hists.stats.nr_invalid_chains,
+ 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
+ 	}
+
+	perf_session_free_sample_buffers(session);
 	return err;
 }
 
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 9fa0fc2..ffe4b98 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -17,8 +17,12 @@
 	u64			last_flush;
 	u64			next_flush;
 	u64			max_timestamp;
-	struct list_head	samples_head;
-	struct sample_queue	*last_inserted;
+	struct list_head	samples;
+	struct list_head	sample_cache;
+	struct list_head	to_free;
+	struct sample_queue	*sample_buffer;
+	struct sample_queue	*last_sample;
+	int			sample_buffer_idx;
 };
 
 struct perf_session {
@@ -42,6 +46,8 @@
 	int			fd;
 	bool			fd_pipe;
 	bool			repipe;
+	bool			sample_id_all;
+	u16			id_hdr_size;
 	int			cwdlen;
 	char			*cwd;
 	struct ordered_samples	ordered_samples;
@@ -50,7 +56,9 @@
 
 struct perf_event_ops;
 
-typedef int (*event_op)(event_t *self, struct perf_session *session);
+typedef int (*event_op)(event_t *self, struct sample_data *sample,
+			struct perf_session *session);
+typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
 typedef int (*event_op2)(event_t *self, struct perf_session *session,
 			 struct perf_event_ops *ops);
 
@@ -63,16 +71,19 @@
 			lost,
 			read,
 			throttle,
-			unthrottle,
-			attr,
+			unthrottle;
+	event_synth_op	attr,
 			event_type,
 			tracing_data,
 			build_id;
 	event_op2	finished_round;
 	bool		ordered_samples;
+	bool		ordering_requires_timestamps;
 };
 
-struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe);
+struct perf_session *perf_session__new(const char *filename, int mode,
+				       bool force, bool repipe,
+				       struct perf_event_ops *ops);
 void perf_session__delete(struct perf_session *self);
 
 void perf_event_header__bswap(struct perf_event_header *self);
@@ -100,6 +111,8 @@
 
 int do_read(int fd, void *buf, size_t size);
 void perf_session__update_sample_type(struct perf_session *self);
+void perf_session__set_sample_id_all(struct perf_session *session, bool value);
+void perf_session__set_sample_type(struct perf_session *session, u64 type);
 void perf_session__remove_thread(struct perf_session *self, struct thread *th);
 
 static inline
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index b62a553..f44fa54 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -170,7 +170,7 @@
 		return repsep_snprintf(bf, size, "%-*s", width, dso_name);
 	}
 
-	return repsep_snprintf(bf, size, "%*Lx", width, self->ip);
+	return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
 }
 
 /* --sort symbol */
@@ -196,7 +196,7 @@
 
 	if (verbose) {
 		char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
-		ret += repsep_snprintf(bf, size, "%*Lx %c ",
+		ret += repsep_snprintf(bf, size, "%-#*llx %c ",
 				       BITS_PER_LONG / 4, self->ip, o);
 	}
 
@@ -205,7 +205,7 @@
 		ret += repsep_snprintf(bf + ret, size - ret, "%s",
 				       self->ms.sym->name);
 	else
-		ret += repsep_snprintf(bf + ret, size - ret, "%*Lx",
+		ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
 				       BITS_PER_LONG / 4, self->ip);
 
 	return ret;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index d628c8d..2ea1a2e 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -22,6 +22,10 @@
 #include <limits.h>
 #include <sys/utsname.h>
 
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 128
+#endif
+
 #ifndef NT_GNU_BUILD_ID
 #define NT_GNU_BUILD_ID 3
 #endif
@@ -41,6 +45,7 @@
 	.exclude_other	  = true,
 	.use_modules	  = true,
 	.try_vmlinux_path = true,
+	.symfs            = "",
 };
 
 int dso__name_len(const struct dso *self)
@@ -92,7 +97,7 @@
 		prev = curr;
 		curr = rb_entry(nd, struct symbol, rb_node);
 
-		if (prev->end == prev->start)
+		if (prev->end == prev->start && prev->end != curr->start)
 			prev->end = curr->start - 1;
 	}
 
@@ -121,7 +126,7 @@
 	 * We still haven't the actual symbols, so guess the
 	 * last map final address.
 	 */
-	curr->end = ~0UL;
+	curr->end = ~0ULL;
 }
 
 static void map_groups__fixup_end(struct map_groups *self)
@@ -425,16 +430,25 @@
 
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
-						     char type, u64 start))
+					  char type, u64 start, u64 end))
 {
 	char *line = NULL;
 	size_t n;
-	int err = 0;
+	int err = -1;
+	u64 prev_start = 0;
+	char prev_symbol_type = 0;
+	char *prev_symbol_name;
 	FILE *file = fopen(filename, "r");
 
 	if (file == NULL)
 		goto out_failure;
 
+	prev_symbol_name = malloc(KSYM_NAME_LEN);
+	if (prev_symbol_name == NULL)
+		goto out_close;
+
+	err = 0;
+
 	while (!feof(file)) {
 		u64 start;
 		int line_len, len;
@@ -454,14 +468,33 @@
 			continue;
 
 		symbol_type = toupper(line[len]);
-		symbol_name = line + len + 2;
+		len += 2;
+		symbol_name = line + len;
+		len = line_len - len;
 
-		err = process_symbol(arg, symbol_name, symbol_type, start);
-		if (err)
+		if (len >= KSYM_NAME_LEN) {
+			err = -1;
 			break;
+		}
+
+		if (prev_symbol_type) {
+			u64 end = start;
+			if (end != prev_start)
+				--end;
+			err = process_symbol(arg, prev_symbol_name,
+					     prev_symbol_type, prev_start, end);
+			if (err)
+				break;
+		}
+
+		memcpy(prev_symbol_name, symbol_name, len + 1);
+		prev_symbol_type = symbol_type;
+		prev_start = start;
 	}
 
+	free(prev_symbol_name);
 	free(line);
+out_close:
 	fclose(file);
 	return err;
 
@@ -483,7 +516,7 @@
 }
 
 static int map__process_kallsym_symbol(void *arg, const char *name,
-				       char type, u64 start)
+				       char type, u64 start, u64 end)
 {
 	struct symbol *sym;
 	struct process_kallsyms_args *a = arg;
@@ -492,11 +525,8 @@
 	if (!symbol_type__is_a(type, a->map->type))
 		return 0;
 
-	/*
-	 * Will fix up the end later, when we have all symbols sorted.
-	 */
-	sym = symbol__new(start, 0, kallsyms2elf_type(type), name);
-
+	sym = symbol__new(start, end - start + 1,
+			  kallsyms2elf_type(type), name);
 	if (sym == NULL)
 		return -ENOMEM;
 	/*
@@ -649,7 +679,6 @@
 	if (dso__load_all_kallsyms(self, filename, map) < 0)
 		return -1;
 
-	symbols__fixup_end(&self->symbols[map->type]);
 	if (self->kernel == DSO_TYPE_GUEST_KERNEL)
 		self->origin = DSO__ORIG_GUEST_KERNEL;
 	else
@@ -839,8 +868,11 @@
 	char sympltname[1024];
 	Elf *elf;
 	int nr = 0, symidx, fd, err = 0;
+	char name[PATH_MAX];
 
-	fd = open(self->long_name, O_RDONLY);
+	snprintf(name, sizeof(name), "%s%s",
+		 symbol_conf.symfs, self->long_name);
+	fd = open(name, O_RDONLY);
 	if (fd < 0)
 		goto out;
 
@@ -1452,16 +1484,19 @@
 	     self->origin++) {
 		switch (self->origin) {
 		case DSO__ORIG_BUILD_ID_CACHE:
-			if (dso__build_id_filename(self, name, size) == NULL)
+			/* skip the locally configured cache if a symfs is given */
+			if (symbol_conf.symfs[0] ||
+			    (dso__build_id_filename(self, name, size) == NULL)) {
 				continue;
+			}
 			break;
 		case DSO__ORIG_FEDORA:
-			snprintf(name, size, "/usr/lib/debug%s.debug",
-				 self->long_name);
+			snprintf(name, size, "%s/usr/lib/debug%s.debug",
+				 symbol_conf.symfs, self->long_name);
 			break;
 		case DSO__ORIG_UBUNTU:
-			snprintf(name, size, "/usr/lib/debug%s",
-				 self->long_name);
+			snprintf(name, size, "%s/usr/lib/debug%s",
+				 symbol_conf.symfs, self->long_name);
 			break;
 		case DSO__ORIG_BUILDID: {
 			char build_id_hex[BUILD_ID_SIZE * 2 + 1];
@@ -1473,19 +1508,26 @@
 					  sizeof(self->build_id),
 					  build_id_hex);
 			snprintf(name, size,
-				 "/usr/lib/debug/.build-id/%.2s/%s.debug",
-				 build_id_hex, build_id_hex + 2);
+				 "%s/usr/lib/debug/.build-id/%.2s/%s.debug",
+				 symbol_conf.symfs, build_id_hex, build_id_hex + 2);
 			}
 			break;
 		case DSO__ORIG_DSO:
-			snprintf(name, size, "%s", self->long_name);
+			snprintf(name, size, "%s%s",
+			     symbol_conf.symfs, self->long_name);
 			break;
 		case DSO__ORIG_GUEST_KMODULE:
 			if (map->groups && map->groups->machine)
 				root_dir = map->groups->machine->root_dir;
 			else
 				root_dir = "";
-			snprintf(name, size, "%s%s", root_dir, self->long_name);
+			snprintf(name, size, "%s%s%s", symbol_conf.symfs,
+				 root_dir, self->long_name);
+			break;
+
+		case DSO__ORIG_KMODULE:
+			snprintf(name, size, "%s%s", symbol_conf.symfs,
+				 self->long_name);
 			break;
 
 		default:
@@ -1784,17 +1826,20 @@
 			     const char *vmlinux, symbol_filter_t filter)
 {
 	int err = -1, fd;
+	char symfs_vmlinux[PATH_MAX];
 
-	fd = open(vmlinux, O_RDONLY);
+	snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s",
+		 symbol_conf.symfs, vmlinux);
+	fd = open(symfs_vmlinux, O_RDONLY);
 	if (fd < 0)
 		return -1;
 
 	dso__set_loaded(self, map->type);
-	err = dso__load_sym(self, map, vmlinux, fd, filter, 0, 0);
+	err = dso__load_sym(self, map, symfs_vmlinux, fd, filter, 0, 0);
 	close(fd);
 
 	if (err > 0)
-		pr_debug("Using %s for symbols\n", vmlinux);
+		pr_debug("Using %s for symbols\n", symfs_vmlinux);
 
 	return err;
 }
@@ -1836,8 +1881,8 @@
 	const char *kallsyms_filename = NULL;
 	char *kallsyms_allocated_filename = NULL;
 	/*
-	 * Step 1: if the user specified a vmlinux filename, use it and only
-	 * it, reporting errors to the user if it cannot be used.
+	 * Step 1: if the user specified a kallsyms or vmlinux filename, use
+	 * it and only it, reporting errors to the user if it cannot be used.
 	 *
 	 * For instance, try to analyse an ARM perf.data file _without_ a
 	 * build-id, or if the user specifies the wrong path to the right
@@ -1850,6 +1895,11 @@
 	 * validation in dso__load_vmlinux and will bail out if they don't
 	 * match.
 	 */
+	if (symbol_conf.kallsyms_name != NULL) {
+		kallsyms_filename = symbol_conf.kallsyms_name;
+		goto do_kallsyms;
+	}
+
 	if (symbol_conf.vmlinux_name != NULL) {
 		err = dso__load_vmlinux(self, map,
 					symbol_conf.vmlinux_name, filter);
@@ -1867,6 +1917,10 @@
 			goto out_fixup;
 	}
 
+	/* do not try local files if a symfs was given */
+	if (symbol_conf.symfs[0] != 0)
+		return -1;
+
 	/*
 	 * Say the kernel DSO was created when processing the build-id header table,
 	 * we have a build-id, so check if it is the same as the running kernel,
@@ -2136,7 +2190,7 @@
 };
 
 static int symbol__in_kernel(void *arg, const char *name,
-			     char type __used, u64 start)
+			     char type __used, u64 start, u64 end __used)
 {
 	struct process_args *args = arg;
 
@@ -2257,9 +2311,6 @@
 	struct utsname uts;
 	char bf[PATH_MAX];
 
-	if (uname(&uts) < 0)
-		return -1;
-
 	vmlinux_path = malloc(sizeof(char *) * 5);
 	if (vmlinux_path == NULL)
 		return -1;
@@ -2272,6 +2323,14 @@
 	if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
 		goto out_fail;
 	++vmlinux_path__nr_entries;
+
+	/* only try running kernel version if no symfs was given */
+	if (symbol_conf.symfs[0] != 0)
+		return 0;
+
+	if (uname(&uts) < 0)
+		return -1;
+
 	snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
 	vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
 	if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
@@ -2331,6 +2390,8 @@
 
 int symbol__init(void)
 {
+	const char *symfs;
+
 	if (symbol_conf.initialized)
 		return 0;
 
@@ -2359,6 +2420,18 @@
 		       symbol_conf.sym_list_str, "symbol") < 0)
 		goto out_free_comm_list;
 
+	/*
+	 * A path to symbols of "/" is identical to ""
+	 * reset here for simplicity.
+	 */
+	symfs = realpath(symbol_conf.symfs, NULL);
+	if (symfs == NULL)
+		symfs = symbol_conf.symfs;
+	if (strcmp(symfs, "/") == 0)
+		symbol_conf.symfs = "";
+	if (symfs != symbol_conf.symfs)
+		free((void *)symfs);
+
 	symbol_conf.initialized = true;
 	return 0;
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 038f220..7b8c27b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -72,6 +72,7 @@
 			show_cpu_utilization,
 			initialized;
 	const char	*vmlinux_name,
+			*kallsyms_name,
 			*source_prefix,
 			*field_sep;
 	const char	*default_guest_vmlinux_name,
@@ -85,6 +86,7 @@
        struct strlist	*dso_list,
 			*comm_list,
 			*sym_list;
+	const char	*symfs;
 };
 
 extern struct symbol_conf symbol_conf;
@@ -213,7 +215,7 @@
 int build_id__sprintf(const u8 *self, int len, char *bf);
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
-					  char type, u64 start));
+					  char type, u64 start, u64 end));
 
 void machine__destroy_kernel_maps(struct machine *self);
 int __machine__create_kernel_maps(struct machine *self, struct dso *kernel);
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 056c695..7b5a892 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -104,10 +104,24 @@
 	return rc;
 }
 
-static const char yes[] = "Yes", no[] = "No";
+static const char yes[] = "Yes", no[] = "No",
+		  warning_str[] = "Warning!", ok[] = "Ok";
 
 bool ui__dialog_yesno(const char *msg)
 {
 	/* newtWinChoice should really be accepting const char pointers... */
 	return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1;
 }
+
+void ui__warning(const char *format, ...)
+{
+	va_list args;
+
+	va_start(args, format);
+	if (use_browser > 0)
+		newtWinMessagev((char *)warning_str, (char *)ok,
+				(char *)format, args);
+	else
+		vfprintf(stderr, format, args);
+	va_end(args);
+}