Merge branch 'akpm' (patches from Andrew)

Merge first patchbomb from Andrew Morton:

 - arch/sh updates

 - ocfs2 updates

 - kernel/watchdog feature

 - about half of mm/

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (122 commits)
  Documentation: update arch list in the 'memtest' entry
  Kconfig: memtest: update number of test patterns up to 17
  arm: add support for memtest
  arm64: add support for memtest
  memtest: use phys_addr_t for physical addresses
  mm: move memtest under mm
  mm, hugetlb: abort __get_user_pages if current has been oom killed
  mm, mempool: do not allow atomic resizing
  memcg: print cgroup information when system panics due to panic_on_oom
  mm: numa: remove migrate_ratelimited
  mm: fold arch_randomize_brk into ARCH_HAS_ELF_RANDOMIZE
  mm: split ET_DYN ASLR from mmap ASLR
  s390: redefine randomize_et_dyn for ELF_ET_DYN_BASE
  mm: expose arch_mmap_rnd when available
  s390: standardize mmap_rnd() usage
  powerpc: standardize mmap_rnd() usage
  mips: extract logic for mmap_rnd()
  arm64: standardize mmap_rnd() usage
  x86: standardize mmap_rnd() usage
  arm: factor out mmap ASLR into mmap_rnd
  ...
diff --git a/Documentation/ABI/testing/sysfs-driver-hid b/Documentation/ABI/testing/sysfs-driver-hid
index b6490e1..48942ca 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid
+++ b/Documentation/ABI/testing/sysfs-driver-hid
@@ -8,3 +8,13 @@
 		report descriptor.
 		This file cannot be written.
 Users:		HIDAPI library (http://www.signal11.us/oss/hidapi)
+
+What:		For USB devices	: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
+		For BT devices	: /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
+		Symlink		: /sys/class/hidraw/hidraw<num>/device/country
+Date:		February 2015
+KernelVersion:	3.19
+Contact:	Olivier Gay <ogay@logitech.com>
+Description:	When read, this file returns the hex integer value in ASCII
+		of the device's HID country code (e.g. 21 for US).
+		This file cannot be written.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
index 167d903..b3f6a2a 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
+++ b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
@@ -5,3 +5,48 @@
 Description:	Display minimum, maximum and current range of the steering
 		wheel. Writing a value within min and max boundaries sets the
 		range of the wheel.
+
+What:		/sys/bus/hid/drivers/logitech/<dev>/alternate_modes
+Date:		Feb 2015
+KernelVersion:	4.1
+Contact:	Michal Malý <madcatxster@gmail.com>
+Description:	Displays a set of alternate modes supported by a wheel. Each
+		mode is listed as follows:
+		  Tag: Mode Name
+		Currently active mode is marked with an asterisk. List also
+		contains an abstract item "native" which always denotes the
+		native mode of the wheel. Echoing the mode tag switches the
+		wheel into the corresponding mode. Depending on the exact model
+		of the wheel not all listed modes might always be selectable.
+		If a wheel cannot be switched into the desired mode, -EINVAL
+		is returned accompanied with an explanatory message in the
+		kernel log.
+		This entry is not created for devices that have only one mode.
+
+		Currently supported mode switches:
+		Driving Force Pro:
+		  DF-EX --> DFP
+
+		G25:
+		  DF-EX --> DFP --> G25
+
+		G27:
+		  DF-EX <*> DFP <-> G25 <-> G27
+		  DF-EX <*--------> G25 <-> G27
+		  DF-EX <*----------------> G27
+
+		DFGT:
+		  DF-EX <*> DFP <-> DFGT
+		  DF-EX <*--------> DFGT
+
+		* hid_logitech module must be loaded with lg4ff_no_autoswitch=1
+		  parameter set in order for the switch to DF-EX mode to work.
+
+What:		/sys/bus/hid/drivers/logitech/<dev>/real_id
+Date:		Feb 2015
+KernelVersion:	4.1
+Contact:	Michal Malý <madcatxster@gmail.com>
+Description:	Displays the real model of the wheel regardless of any
+		alternate mode the wheel might be switched to.
+		It is a read-only value.
+		This entry is not created for devices that have only one mode.
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 183e41b..dab6da3 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -201,11 +201,11 @@
 atomic_t and return the new counter value after the operation is
 performed.
 
-Unlike the above routines, it is required that explicit memory
-barriers are performed before and after the operation.  It must be
-done such that all memory operations before and after the atomic
-operation calls are strongly ordered with respect to the atomic
-operation itself.
+Unlike the above routines, it is required that these primitives
+include explicit memory barriers that are performed before and after
+the operation.  It must be done such that all memory operations before
+and after the atomic operation calls are strongly ordered with respect
+to the atomic operation itself.
 
 For example, it should behave as if a smp_mb() call existed both
 before and after the atomic operation.
@@ -233,21 +233,21 @@
 given atomic counter.  They return a boolean indicating whether the
 resulting counter value was zero or not.
 
-It requires explicit memory barrier semantics around the operation as
-above.
+Again, these primitives provide explicit memory barrier semantics around
+the atomic operation.
 
 	int atomic_sub_and_test(int i, atomic_t *v);
 
 This is identical to atomic_dec_and_test() except that an explicit
-decrement is given instead of the implicit "1".  It requires explicit
-memory barrier semantics around the operation.
+decrement is given instead of the implicit "1".  This primitive must
+provide explicit memory barrier semantics around the operation.
 
 	int atomic_add_negative(int i, atomic_t *v);
 
-The given increment is added to the given atomic counter value.  A
-boolean is return which indicates whether the resulting counter value
-is negative.  It requires explicit memory barrier semantics around the
-operation.
+The given increment is added to the given atomic counter value.  A boolean
+is return which indicates whether the resulting counter value is negative.
+This primitive must provide explicit memory barrier semantics around
+the operation.
 
 Then:
 
@@ -257,7 +257,7 @@
 the given new value.  It returns the old value that the atomic variable v had
 just before the operation.
 
-atomic_xchg requires explicit memory barriers around the operation.
+atomic_xchg must provide explicit memory barriers around the operation.
 
 	int atomic_cmpxchg(atomic_t *v, int old, int new);
 
@@ -266,7 +266,7 @@
 atomic_cmpxchg will only satisfy its atomicity semantics as long as all
 other accesses of *v are performed through atomic_xxx operations.
 
-atomic_cmpxchg requires explicit memory barriers around the operation.
+atomic_cmpxchg must provide explicit memory barriers around the operation.
 
 The semantics for atomic_cmpxchg are the same as those defined for 'cas'
 below.
@@ -279,8 +279,8 @@
 returns non zero. If v is equal to u then it returns zero. This is done as
 an atomic operation.
 
-atomic_add_unless requires explicit memory barriers around the operation
-unless it fails (returns 0).
+atomic_add_unless must provide explicit memory barriers around the
+operation unless it fails (returns 0).
 
 atomic_inc_not_zero, equivalent to atomic_add_unless(v, 1, 0)
 
@@ -460,9 +460,9 @@
 like this occur as well.
 
 These routines, like the atomic_t counter operations returning values,
-require explicit memory barrier semantics around their execution.  All
-memory operations before the atomic bit operation call must be made
-visible globally before the atomic bit operation is made visible.
+must provide explicit memory barrier semantics around their execution.
+All memory operations before the atomic bit operation call must be
+made visible globally before the atomic bit operation is made visible.
 Likewise, the atomic bit operation must be visible globally before any
 subsequent memory operation is made visible.  For example:
 
@@ -536,8 +536,9 @@
 These non-atomic variants also do not require any special memory
 barrier semantics.
 
-The routines xchg() and cmpxchg() need the same exact memory barriers
-as the atomic and bit operations returning values.
+The routines xchg() and cmpxchg() must provide the same exact
+memory-barrier semantics as the atomic and bit operations returning
+values.
 
 Spinlocks and rwlocks have memory barrier expectations as well.
 The rule to follow is simple:
diff --git a/Documentation/hid/hid-sensor.txt b/Documentation/hid/hid-sensor.txt
index 948b098..b287752 100644
--- a/Documentation/hid/hid-sensor.txt
+++ b/Documentation/hid/hid-sensor.txt
@@ -138,3 +138,87 @@
 the usage id of X axis. HID sensors can provide events, so this is not necessary
 to poll for any field. If there is some new sample, the core driver will call
 registered callback function to process the sample.
+
+
+----------
+
+HID Custom and generic Sensors
+
+HID Sensor specification defines two special sensor usage types. Since they
+don't represent a standard sensor, it is not possible to define using Linux IIO
+type interfaces.
+The purpose of these sensors is to extend the functionality or provide a
+way to obfuscate the data being communicated by a sensor. Without knowing the
+mapping between the data and its encapsulated form, it is difficult for
+an application/driver to determine what data is being communicated by the sensor.
+This allows some differentiating use cases, where vendor can provide applications.
+Some common use cases are debug other sensors or to provide some events like
+keyboard attached/detached or lid open/close.
+
+To allow application to utilize these sensors, here they are exported uses sysfs
+attribute groups, attributes and misc device interface.
+
+An example of this representation on sysfs:
+/sys/devices/pci0000:00/INT33C2:00/i2c-0/i2c-INT33D1:00/0018:8086:09FA.0001/HID-SENSOR-2000e1.6.auto$ tree -R
+.
+????????? enable_sensor
+????????? feature-0-200316
+??????? ????????? feature-0-200316-maximum
+??????? ????????? feature-0-200316-minimum
+??????? ????????? feature-0-200316-name
+??????? ????????? feature-0-200316-size
+??????? ????????? feature-0-200316-unit-expo
+??????? ????????? feature-0-200316-units
+??????? ????????? feature-0-200316-value
+????????? feature-1-200201
+??????? ????????? feature-1-200201-maximum
+??????? ????????? feature-1-200201-minimum
+??????? ????????? feature-1-200201-name
+??????? ????????? feature-1-200201-size
+??????? ????????? feature-1-200201-unit-expo
+??????? ????????? feature-1-200201-units
+??????? ????????? feature-1-200201-value
+????????? input-0-200201
+??????? ????????? input-0-200201-maximum
+??????? ????????? input-0-200201-minimum
+??????? ????????? input-0-200201-name
+??????? ????????? input-0-200201-size
+??????? ????????? input-0-200201-unit-expo
+??????? ????????? input-0-200201-units
+??????? ????????? input-0-200201-value
+????????? input-1-200202
+??????? ????????? input-1-200202-maximum
+??????? ????????? input-1-200202-minimum
+??????? ????????? input-1-200202-name
+??????? ????????? input-1-200202-size
+??????? ????????? input-1-200202-unit-expo
+??????? ????????? input-1-200202-units
+??????? ????????? input-1-200202-value
+
+Here there is a custom sensors with four fields, two feature and two inputs.
+Each field is represented by a set of attributes. All fields except the "value"
+are read only. The value field is a RW field.
+Example
+/sys/bus/platform/devices/HID-SENSOR-2000e1.6.auto/feature-0-200316$ grep -r . *
+feature-0-200316-maximum:6
+feature-0-200316-minimum:0
+feature-0-200316-name:property-reporting-state
+feature-0-200316-size:1
+feature-0-200316-unit-expo:0
+feature-0-200316-units:25
+feature-0-200316-value:1
+
+How to enable such sensor?
+By default sensor can be power gated. To enable sysfs attribute "enable" can be
+used.
+$ echo 1 > enable_sensor
+
+Once enabled and powered on, sensor can report value using HID reports.
+These reports are pushed using misc device interface in a FIFO order.
+/dev$ tree | grep HID-SENSOR-2000e1.6.auto
+??????? ????????? 10:53 -> ../HID-SENSOR-2000e1.6.auto
+????????? HID-SENSOR-2000e1.6.auto
+
+Each reports can be of variable length preceded by a header. This header
+consist of a 32 bit usage id, 64 bit time stamp and 32 bit length field of raw
+data.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8cc635f..3275563 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2973,6 +2973,12 @@
 			Set maximum number of finished RCU callbacks to
 			process in one batch.
 
+	rcutree.gp_init_delay=	[KNL]
+			Set the number of jiffies to delay each step of
+			RCU grace-period initialization.  This only has
+			effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT is
+			set.
+
 	rcutree.rcu_fanout_leaf= [KNL]
 			Increase the number of CPUs assigned to each
 			leaf rcu_node structure.  Useful for very large
@@ -2996,11 +3002,15 @@
 			value is one, and maximum value is HZ.
 
 	rcutree.kthread_prio= 	 [KNL,BOOT]
-			Set the SCHED_FIFO priority of the RCU
-			per-CPU kthreads (rcuc/N). This value is also
-			used for the priority of the RCU boost threads
-			(rcub/N). Valid values are 1-99 and the default
-			is 1 (the least-favored priority).
+			Set the SCHED_FIFO priority of the RCU per-CPU
+			kthreads (rcuc/N). This value is also used for
+			the priority of the RCU boost threads (rcub/N)
+			and for the RCU grace-period kthreads (rcu_bh,
+			rcu_preempt, and rcu_sched). If RCU_BOOST is
+			set, valid values are 1-99 and the default is 1
+			(the least-favored priority).  Otherwise, when
+			RCU_BOOST is not set, valid values are 0-99 and
+			the default is zero (non-realtime operation).
 
 	rcutree.rcu_nocb_leader_stride= [KNL]
 			Set the number of NOCB kthread groups, which
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index f3cd299..f4cbfe0 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -190,20 +190,24 @@
 		on each CPU, including cs_dbs_timer() and od_dbs_timer().
 		WARNING:  Please check your CPU specifications to
 		make sure that this is safe on your particular system.
-	d.	It is not possible to entirely get rid of OS jitter
-		from vmstat_update() on CONFIG_SMP=y systems, but you
-		can decrease its frequency by writing a large value
-		to /proc/sys/vm/stat_interval.	The default value is
-		HZ, for an interval of one second.  Of course, larger
-		values will make your virtual-memory statistics update
-		more slowly.  Of course, you can also run your workload
-		at a real-time priority, thus preempting vmstat_update(),
+	d.	As of v3.18, Christoph Lameter's on-demand vmstat workers
+		commit prevents OS jitter due to vmstat_update() on
+		CONFIG_SMP=y systems.  Before v3.18, is not possible
+		to entirely get rid of the OS jitter, but you can
+		decrease its frequency by writing a large value to
+		/proc/sys/vm/stat_interval.  The default value is HZ,
+		for an interval of one second.	Of course, larger values
+		will make your virtual-memory statistics update more
+		slowly.  Of course, you can also run your workload at
+		a real-time priority, thus preempting vmstat_update(),
 		but if your workload is CPU-bound, this is a bad idea.
 		However, there is an RFC patch from Christoph Lameter
 		(based on an earlier one from Gilad Ben-Yossef) that
 		reduces or even eliminates vmstat overhead for some
 		workloads at https://lkml.org/lkml/2013/9/4/379.
-	e.	If running on high-end powerpc servers, build with
+	e.	Boot with "elevator=noop" to avoid workqueue use by
+		the block layer.
+	f.	If running on high-end powerpc servers, build with
 		CONFIG_PPC_RTAS_DAEMON=n.  This prevents the RTAS
 		daemon from running on each CPU every second or so.
 		(This will require editing Kconfig files and will defeat
@@ -211,12 +215,12 @@
 		due to the rtas_event_scan() function.
 		WARNING:  Please check your CPU specifications to
 		make sure that this is safe on your particular system.
-	f.	If running on Cell Processor, build your kernel with
+	g.	If running on Cell Processor, build your kernel with
 		CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from
 		spu_gov_work().
 		WARNING:  Please check your CPU specifications to
 		make sure that this is safe on your particular system.
-	g.	If running on PowerMAC, build your kernel with
+	h.	If running on PowerMAC, build your kernel with
 		CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
 		avoiding OS jitter from rackmeter_do_timer().
 
@@ -258,8 +262,12 @@
 To reduce its OS jitter, do at least one of the following:
 1.	Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these
 	kthreads from being created in the first place.
-2.	Echo a zero to /proc/sys/kernel/watchdog to disable the
+2.	Boot with "nosoftlockup=0", which will also prevent these kthreads
+	from being created.  Other related watchdog and softlockup boot
+	parameters may be found in Documentation/kernel-parameters.txt
+	and Documentation/watchdog/watchdog-parameters.txt.
+3.	Echo a zero to /proc/sys/kernel/watchdog to disable the
 	watchdog timer.
-3.	Echo a large number of /proc/sys/kernel/watchdog_thresh in
+4.	Echo a large number of /proc/sys/kernel/watchdog_thresh in
 	order to reduce the frequency of OS jitter due to the watchdog
 	timer down to a level that is acceptable for your workload.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index ca2387e..6974f1c 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -592,9 +592,9 @@
 CONTROL DEPENDENCIES
 --------------------
 
-A control dependency requires a full read memory barrier, not simply a data
-dependency barrier to make it work correctly.  Consider the following bit of
-code:
+A load-load control dependency requires a full read memory barrier, not
+simply a data dependency barrier to make it work correctly.  Consider the
+following bit of code:
 
 	q = ACCESS_ONCE(a);
 	if (q) {
@@ -615,14 +615,15 @@
 	}
 
 However, stores are not speculated.  This means that ordering -is- provided
-in the following example:
+for load-store control dependencies, as in the following example:
 
 	q = ACCESS_ONCE(a);
 	if (q) {
 		ACCESS_ONCE(b) = p;
 	}
 
-Please note that ACCESS_ONCE() is not optional!  Without the
+Control dependencies pair normally with other types of barriers.
+That said, please note that ACCESS_ONCE() is not optional!  Without the
 ACCESS_ONCE(), might combine the load from 'a' with other loads from
 'a', and the store to 'b' with other stores to 'b', with possible highly
 counterintuitive effects on ordering.
@@ -813,6 +814,8 @@
       barrier() can help to preserve your control dependency.  Please
       see the Compiler Barrier section for more information.
 
+  (*) Control dependencies pair normally with other types of barriers.
+
   (*) Control dependencies do -not- provide transitivity.  If you
       need transitivity, use smp_mb().
 
@@ -823,14 +826,14 @@
 When dealing with CPU-CPU interactions, certain types of memory barrier should
 always be paired.  A lack of appropriate pairing is almost certainly an error.
 
-General barriers pair with each other, though they also pair with
-most other types of barriers, albeit without transitivity.  An acquire
-barrier pairs with a release barrier, but both may also pair with other
-barriers, including of course general barriers.  A write barrier pairs
-with a data dependency barrier, an acquire barrier, a release barrier,
-a read barrier, or a general barrier.  Similarly a read barrier or a
-data dependency barrier pairs with a write barrier, an acquire barrier,
-a release barrier, or a general barrier:
+General barriers pair with each other, though they also pair with most
+other types of barriers, albeit without transitivity.  An acquire barrier
+pairs with a release barrier, but both may also pair with other barriers,
+including of course general barriers.  A write barrier pairs with a data
+dependency barrier, a control dependency, an acquire barrier, a release
+barrier, a read barrier, or a general barrier.  Similarly a read barrier,
+control dependency, or a data dependency barrier pairs with a write
+barrier, an acquire barrier, a release barrier, or a general barrier:
 
 	CPU 1		      CPU 2
 	===============	      ===============
@@ -850,6 +853,19 @@
 			      <data dependency barrier>
 			      y = *x;
 
+Or even:
+
+	CPU 1		      CPU 2
+	===============	      ===============================
+	r1 = ACCESS_ONCE(y);
+	<general barrier>
+	ACCESS_ONCE(y) = 1;   if (r2 = ACCESS_ONCE(x)) {
+			         <implicit control dependency>
+			         ACCESS_ONCE(y) = 1;
+			      }
+
+	assert(r1 == 0 || r2 == 0);
+
 Basically, the read barrier always has to be there, even though it can be of
 the "weaker" type.
 
diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt
index cca122f..6eaf576 100644
--- a/Documentation/timers/NO_HZ.txt
+++ b/Documentation/timers/NO_HZ.txt
@@ -158,13 +158,9 @@
 	to the need to inform kernel subsystems (such as RCU) about
 	the change in mode.
 
-3.	POSIX CPU timers on adaptive-tick CPUs may miss their deadlines
-	(perhaps indefinitely) because they currently rely on
-	scheduling-tick interrupts.  This will likely be fixed in
-	one of two ways: (1) Prevent CPUs with POSIX CPU timers from
-	entering adaptive-tick mode, or (2) Use hrtimers or other
-	adaptive-ticks-immune mechanism to cause the POSIX CPU timer to
-	fire properly.
+3.	POSIX CPU timers prevent CPUs from entering adaptive-tick mode.
+	Real-time applications needing to take actions based on CPU time
+	consumption need to use other means of doing so.
 
 4.	If there are more perf events pending than the hardware can
 	accommodate, they are normally round-robined so as to collect
diff --git a/MAINTAINERS b/MAINTAINERS
index 38579ac..17631e6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4337,10 +4337,10 @@
 
 GFS2 FILE SYSTEM
 M:	Steven Whitehouse <swhiteho@redhat.com>
+M:	Bob Peterson <rpeterso@redhat.com>
 L:	cluster-devel@redhat.com
 W:	http://sources.redhat.com/cluster/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
 S:	Supported
 F:	Documentation/filesystems/gfs2*.txt
 F:	fs/gfs2/
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 98c00a2..f46efd1 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -155,8 +155,6 @@
  */
 void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
 {
-	set_fs(USER_DS); /* user space */
-
 	regs->sp = usp;
 	regs->ret = pc;
 
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index e550b11..93c6ea5 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -841,7 +841,7 @@
 				break;
 			case DW_CFA_GNU_window_save:
 			default:
-				unw_debug("UNKNOW OPCODE 0x%x\n", opcode);
+				unw_debug("UNKNOWN OPCODE 0x%x\n", opcode);
 				result = 0;
 				break;
 			}
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 7fc70ae..dc7d0a9 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -648,7 +648,7 @@
 		 * Per-cpu breakpoints are not supported by our stepping
 		 * mechanism.
 		 */
-		if (!bp->hw.bp_target)
+		if (!bp->hw.target)
 			return -EINVAL;
 
 		/*
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 98bbe06..e7d934d 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -527,7 +527,7 @@
 	 * Disallow per-task kernel breakpoints since these would
 	 * complicate the stepping code.
 	 */
-	if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target)
+	if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
 		return -EINVAL;
 
 	return 0;
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 8ad3e90..1c72595 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -413,16 +413,14 @@
 	return 0;
 }
 
-static DECLARE_COMPLETION(cpu_killed);
-
 int __cpu_die(unsigned int cpu)
 {
-	return wait_for_completion_timeout(&cpu_killed, 5000);
+	return cpu_wait_death(cpu, 5);
 }
 
 void cpu_die(void)
 {
-	complete(&cpu_killed);
+	(void)cpu_report_death();
 
 	atomic_dec(&init_mm.mm_users);
 	atomic_dec(&init_mm.mm_count);
diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c
index 57d2ea8..3ae9f5a 100644
--- a/arch/c6x/kernel/process.c
+++ b/arch/c6x/kernel/process.c
@@ -101,7 +101,6 @@
 	 */
 	usp -= 8;
 
-	set_fs(USER_DS);
 	regs->pc  = pc;
 	regs->sp  = usp;
 	regs->tsr |= 0x40; /* set user mode */
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 336713a..85ca672 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -176,8 +176,6 @@
 	struct sigframe __user *frame;
 	int rsig, sig = ksig->sig;
 
-	set_fs(USER_DS);
-
 	frame = get_sigframe(ksig, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
@@ -257,8 +255,6 @@
 	struct rt_sigframe __user *frame;
 	int rsig, sig = ksig->sig;
 
-	set_fs(USER_DS);
-
 	frame = get_sigframe(ksig, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 0a0dd5c..a9ebd47 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -37,8 +37,6 @@
  */
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
-	/* Set to run with user-mode data segmentation */
-	set_fs(USER_DS);
 	/* We want to zero all data-containing registers. Is this overkill? */
 	memset(regs, 0, sizeof(*regs));
 	/* We might want to also zero all Processor registers here */
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 7736c66..8c25e0c 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -214,8 +214,6 @@
 	regs->r2 = (unsigned long)&frame->uc;
 	regs->bpc = (unsigned long)ksig->ka.sa.sa_handler;
 
-	set_fs(USER_DS);
-
 #if DEBUG_SIG
 	printk("SIG deliver (%s:%d): sp=%p pc=%p\n",
 		current->comm, current->pid, frame, regs->pc);
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
index 13272fd..0838ca6 100644
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@@ -111,7 +111,6 @@
  */
 #define start_thread(regs, pc, usp) do {				   \
 	unsigned int *argc = (unsigned int *) bprm->exec;		   \
-	set_fs(USER_DS);						   \
 	current->thread.int_depth = 1;					   \
 	/* Force this process down to user land */			   \
 	regs->ctx.SaveMask = TBICTX_PRIV_BIT;				   \
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index f006d22..ac3a199 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -261,7 +261,6 @@
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static DECLARE_COMPLETION(cpu_killed);
 
 /*
  * __cpu_disable runs on the processor to be shutdown.
@@ -299,7 +298,7 @@
  */
 void __cpu_die(unsigned int cpu)
 {
-	if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1)))
+	if (!cpu_wait_death(cpu, 1))
 		pr_err("CPU%u: unable to kill\n", cpu);
 }
 
@@ -314,7 +313,7 @@
 	local_irq_disable();
 	idle_task_exit();
 
-	complete(&cpu_killed);
+	(void)cpu_report_death();
 
 	asm ("XOR	TXENABLE, D0Re0,D0Re0\n");
 }
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index a1cbaf9..20ccd4e 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -236,8 +236,6 @@
 	/* Offset to handle microblaze rtid r14, 0 */
 	regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
 
-	set_fs(USER_DS);
-
 #ifdef DEBUG_SIG
 	pr_info("SIG deliver (%s:%d): sp=%p pc=%08lx\n",
 		current->comm, current->pid, frame, regs->pc);
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 0e075b5..2f8c74f 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -94,7 +94,6 @@
 
 void flush_thread(void)
 {
-	set_fs(USER_DS);
 }
 
 int copy_thread(unsigned long clone_flags,
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 386af25..7095dfe 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -197,7 +197,6 @@
 {
 	unsigned long sr = mfspr(SPR_SR) & ~SPR_SR_SM;
 
-	set_fs(USER_DS);
 	memset(regs, 0, sizeof(struct pt_regs));
 
 	regs->pc = pc;
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 942c7b1..9930904 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -106,10 +106,6 @@
 	spinlock_t mmu_lock;
 };
 
-#define CONTEXT_HOST		0
-#define CONTEXT_GUEST		1
-#define CONTEXT_GUEST_END	2
-
 #define VSID_REAL	0x07ffffffffc00000ULL
 #define VSID_BAT	0x07ffffffffb00000ULL
 #define VSID_64K	0x0800000000000000ULL
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index 6e909f3..37d2da6 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -478,7 +478,7 @@
 
 
 /*
- * Kenrel asynchronous i2c interface
+ * Kernel asynchronous i2c interface
  */
 
 #define SMU_I2C_READ_MAX	0x1d
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b8e15c6..308c5e1 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -721,7 +721,7 @@
 	 */
 	of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
 	if (boot_cpuid < 0) {
-		printk("Failed to indentify boot CPU !\n");
+		printk("Failed to identify boot CPU !\n");
 		BUG();
 	}
 
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 7c4f669..7fd60dcb 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -124,7 +124,7 @@
 
 static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
 static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
-static void power_pmu_flush_branch_stack(void) {}
+static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
 static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
 static void pmao_restore_workaround(bool ebb) { }
 #endif /* CONFIG_PPC32 */
@@ -350,6 +350,7 @@
 		cpuhw->bhrb_context = event->ctx;
 	}
 	cpuhw->bhrb_users++;
+	perf_sched_cb_inc(event->ctx->pmu);
 }
 
 static void power_pmu_bhrb_disable(struct perf_event *event)
@@ -361,6 +362,7 @@
 
 	cpuhw->bhrb_users--;
 	WARN_ON_ONCE(cpuhw->bhrb_users < 0);
+	perf_sched_cb_dec(event->ctx->pmu);
 
 	if (!cpuhw->disabled && !cpuhw->bhrb_users) {
 		/* BHRB cannot be turned off when other
@@ -375,9 +377,12 @@
 /* Called from ctxsw to prevent one process's branch entries to
  * mingle with the other process's entries during context switch.
  */
-static void power_pmu_flush_branch_stack(void)
+static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-	if (ppmu->bhrb_nr)
+	if (!ppmu->bhrb_nr)
+		return;
+
+	if (sched_in)
 		power_pmu_bhrb_reset();
 }
 /* Calculate the to address for a branch */
@@ -1901,7 +1906,7 @@
 	.cancel_txn	= power_pmu_cancel_txn,
 	.commit_txn	= power_pmu_commit_txn,
 	.event_idx	= power_pmu_event_idx,
-	.flush_branch_stack = power_pmu_flush_branch_stack,
+	.sched_task	= power_pmu_sched_task,
 };
 
 /*
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9445a82..abeb9ec 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -1126,7 +1126,7 @@
 	/* Physical domains & other lpars require extra capabilities */
 	if (!caps.collect_privileged && (is_physical_domain(domain) ||
 		(event_get_lpar(event) != event_get_lpar_max()))) {
-		pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n",
+		pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
 				is_physical_domain(domain),
 				event_get_lpar(event));
 		return -EACCES;
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index 7a180f0..680232d 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -50,14 +50,14 @@
 	/* Map the global utilities registers. */
 	guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
 	if (!guts_np) {
-		pr_err("p1022rdk: missing global utilties device node\n");
+		pr_err("p1022rdk: missing global utilities device node\n");
 		return;
 	}
 
 	guts = of_iomap(guts_np, 0);
 	of_node_put(guts_np);
 	if (!guts) {
-		pr_err("p1022rdk: could not map global utilties device\n");
+		pr_err("p1022rdk: could not map global utilities device\n");
 		return;
 	}
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index ac2b75d..6321fd8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -134,6 +134,7 @@
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_KVM if 64BIT
+	select HAVE_LIVEPATCH
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MEMBLOCK_PHYS_MAP
@@ -165,6 +166,8 @@
 
 source "kernel/Kconfig.freezer"
 
+source "kernel/livepatch/Kconfig"
+
 menu "Processor type and features"
 
 config HAVE_MARCH_Z900_FEATURES
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 99824ff..df7d8cb 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
 
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
new file mode 100644
index 0000000..7aa7991
--- /dev/null
+++ b/arch/s390/include/asm/livepatch.h
@@ -0,0 +1,43 @@
+/*
+ * livepatch.h - s390-specific Kernel Live Patching Core
+ *
+ *  Copyright (c) 2013-2015 SUSE
+ *   Authors: Jiri Kosina
+ *	      Vojtech Pavlik
+ *	      Jiri Slaby
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef ASM_LIVEPATCH_H
+#define ASM_LIVEPATCH_H
+
+#include <linux/module.h>
+
+#ifdef CONFIG_LIVEPATCH
+static inline int klp_check_compiler_support(void)
+{
+	return 0;
+}
+
+static inline int klp_write_module_reloc(struct module *mod, unsigned long
+		type, unsigned long loc, unsigned long value)
+{
+	/* not supported yet */
+	return -ENOSYS;
+}
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->psw.addr = ip;
+}
+#else
+#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#endif
+
+#endif
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 653a7ec..3208d33 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -10,6 +10,13 @@
 #define TRACE_INCLUDE_FILE trace-s390
 
 /*
+ * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a
+ * legitimate C variable. It is not exported to user space.
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR kvm_s390
+
+/*
  * Trace point for the creation of the kvm instance.
  */
 TRACE_EVENT(kvm_s390_create_vm,
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 0b34f2a..9729289 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -329,8 +329,6 @@
 	if (err)
 		return -EFAULT;
 
-	set_fs(USER_DS);
-
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
 		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
@@ -408,8 +406,6 @@
 	if (err)
 		return -EFAULT;
 
-	set_fs(USER_DS);
-
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
 		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 71993c6..0462995 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -457,8 +457,6 @@
 
 	regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler);
 
-	set_fs(USER_DS);
-
 	/* Broken %016Lx */
 	pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
 		 signal, current->comm, current->pid, frame,
@@ -547,8 +545,6 @@
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
 	regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler);
 
-	set_fs(USER_DS);
-
 	pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
 		 signal, current->comm, current->pid, frame,
 		 regs->pc >> 32, regs->pc & 0xffffffff,
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index d2b1298..bf2caa1 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -34,8 +34,6 @@
 #endif
 #endif
 
-DECLARE_PER_CPU(int, cpu_state);
-
 int mwait_usable(const struct cpuinfo_x86 *);
 
 #endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 854c04b..7ee9b94 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -12,7 +12,7 @@
 #include <asm/disabled-features.h>
 #endif
 
-#define NCAPINTS	11	/* N 32-bit words worth of info */
+#define NCAPINTS	13	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -195,6 +195,7 @@
 #define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */
 #define X86_FEATURE_HWP_EPP	( 7*32+13) /* Intel HWP_EPP */
 #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
+#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
@@ -226,6 +227,7 @@
 #define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
 #define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
 #define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
 #define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
 #define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
@@ -244,6 +246,12 @@
 #define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
 #define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
 
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index a455a53..2d29197b 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -32,8 +32,8 @@
 #endif
 	return 0;
 }
-extern int klp_write_module_reloc(struct module *mod, unsigned long type,
-				  unsigned long loc, unsigned long value);
+int klp_write_module_reloc(struct module *mod, unsigned long type,
+			   unsigned long loc, unsigned long value);
 
 static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 {
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d2203b5..23ba676 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -109,6 +109,9 @@
 	/* in KB - valid for CPUS which support this call: */
 	int			x86_cache_size;
 	int			x86_cache_alignment;	/* In bytes */
+	/* Cache QoS architectural values: */
+	int			x86_cache_max_rmid;	/* max index */
+	int			x86_cache_occ_scale;	/* scale to bytes */
 	int			x86_power;
 	unsigned long		loops_per_jiffy;
 	/* cpuid returned max cores value: */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 81d02fc..17a8dce 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -150,13 +150,13 @@
 }
 
 void cpu_disable_common(void);
-void cpu_die_common(unsigned int cpu);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
 void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
+int common_cpu_die(unsigned int cpu);
 void native_cpu_die(unsigned int cpu);
 void native_play_dead(void);
 void play_dead_common(void);
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 3ce0791..1a4eae6 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -74,6 +74,24 @@
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
 
+#define MSR_IA32_RTIT_CTL		0x00000570
+#define RTIT_CTL_TRACEEN		BIT(0)
+#define RTIT_CTL_OS			BIT(2)
+#define RTIT_CTL_USR			BIT(3)
+#define RTIT_CTL_CR3EN			BIT(7)
+#define RTIT_CTL_TOPA			BIT(8)
+#define RTIT_CTL_TSC_EN			BIT(10)
+#define RTIT_CTL_DISRETC		BIT(11)
+#define RTIT_CTL_BRANCH_EN		BIT(13)
+#define MSR_IA32_RTIT_STATUS		0x00000571
+#define RTIT_STATUS_CONTEXTEN		BIT(1)
+#define RTIT_STATUS_TRIGGEREN		BIT(2)
+#define RTIT_STATUS_ERROR		BIT(4)
+#define RTIT_STATUS_STOPPED		BIT(5)
+#define MSR_IA32_RTIT_CR3_MATCH		0x00000572
+#define MSR_IA32_RTIT_OUTPUT_BASE	0x00000560
+#define MSR_IA32_RTIT_OUTPUT_MASK	0x00000561
+
 #define MSR_MTRRfix64K_00000		0x00000250
 #define MSR_MTRRfix16K_80000		0x00000258
 #define MSR_MTRRfix16K_A0000		0x00000259
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 80091ae..9bff687 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -39,7 +39,8 @@
 endif
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_knc.o perf_event_p4.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_rapl.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_rapl.o perf_event_intel_cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_pt.o perf_event_intel_bts.o
 
 obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= perf_event_intel_uncore.o \
 					   perf_event_intel_uncore_snb.o \
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3f70538..a62cf04 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -646,6 +646,30 @@
 		c->x86_capability[10] = eax;
 	}
 
+	/* Additional Intel-defined flags: level 0x0000000F */
+	if (c->cpuid_level >= 0x0000000F) {
+		u32 eax, ebx, ecx, edx;
+
+		/* QoS sub-leaf, EAX=0Fh, ECX=0 */
+		cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
+		c->x86_capability[11] = edx;
+		if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
+			/* will be overridden if occupancy monitoring exists */
+			c->x86_cache_max_rmid = ebx;
+
+			/* QoS sub-leaf, EAX=0Fh, ECX=1 */
+			cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
+			c->x86_capability[12] = edx;
+			if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
+				c->x86_cache_max_rmid = ecx;
+				c->x86_cache_occ_scale = ebx;
+			}
+		} else {
+			c->x86_cache_max_rmid = -1;
+			c->x86_cache_occ_scale = -1;
+		}
+	}
+
 	/* AMD-defined flags: level 0x80000001 */
 	xlvl = cpuid_eax(0x80000000);
 	c->extended_cpuid_level = xlvl;
@@ -834,6 +858,20 @@
 	detect_nopl(c);
 }
 
+static void x86_init_cache_qos(struct cpuinfo_x86 *c)
+{
+	/*
+	 * The heavy lifting of max_rmid and cache_occ_scale are handled
+	 * in get_cpu_cap().  Here we just set the max_rmid for the boot_cpu
+	 * in case CQM bits really aren't there in this CPU.
+	 */
+	if (c != &boot_cpu_data) {
+		boot_cpu_data.x86_cache_max_rmid =
+			min(boot_cpu_data.x86_cache_max_rmid,
+			    c->x86_cache_max_rmid);
+	}
+}
+
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
@@ -923,6 +961,7 @@
 
 	init_hypervisor(c);
 	x86_init_rdrand(c);
+	x86_init_cache_qos(c);
 
 	/*
 	 * Clear/Set all flags overriden by options, need do it
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h
new file mode 100644
index 0000000..1c338b0
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_pt.h
@@ -0,0 +1,131 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#ifndef __INTEL_PT_H__
+#define __INTEL_PT_H__
+
+/*
+ * Single-entry ToPA: when this close to region boundary, switch
+ * buffers to avoid losing data.
+ */
+#define TOPA_PMI_MARGIN 512
+
+/*
+ * Table of Physical Addresses bits
+ */
+enum topa_sz {
+	TOPA_4K	= 0,
+	TOPA_8K,
+	TOPA_16K,
+	TOPA_32K,
+	TOPA_64K,
+	TOPA_128K,
+	TOPA_256K,
+	TOPA_512K,
+	TOPA_1MB,
+	TOPA_2MB,
+	TOPA_4MB,
+	TOPA_8MB,
+	TOPA_16MB,
+	TOPA_32MB,
+	TOPA_64MB,
+	TOPA_128MB,
+	TOPA_SZ_END,
+};
+
+static inline unsigned int sizes(enum topa_sz tsz)
+{
+	return 1 << (tsz + 12);
+};
+
+struct topa_entry {
+	u64	end	: 1;
+	u64	rsvd0	: 1;
+	u64	intr	: 1;
+	u64	rsvd1	: 1;
+	u64	stop	: 1;
+	u64	rsvd2	: 1;
+	u64	size	: 4;
+	u64	rsvd3	: 2;
+	u64	base	: 36;
+	u64	rsvd4	: 16;
+};
+
+#define TOPA_SHIFT 12
+#define PT_CPUID_LEAVES 2
+
+enum pt_capabilities {
+	PT_CAP_max_subleaf = 0,
+	PT_CAP_cr3_filtering,
+	PT_CAP_topa_output,
+	PT_CAP_topa_multiple_entries,
+	PT_CAP_payloads_lip,
+};
+
+struct pt_pmu {
+	struct pmu		pmu;
+	u32			caps[4 * PT_CPUID_LEAVES];
+};
+
+/**
+ * struct pt_buffer - buffer configuration; one buffer per task_struct or
+ *		cpu, depending on perf event configuration
+ * @cpu:	cpu for per-cpu allocation
+ * @tables:	list of ToPA tables in this buffer
+ * @first:	shorthand for first topa table
+ * @last:	shorthand for last topa table
+ * @cur:	current topa table
+ * @nr_pages:	buffer size in pages
+ * @cur_idx:	current output region's index within @cur table
+ * @output_off:	offset within the current output region
+ * @data_size:	running total of the amount of data in this buffer
+ * @lost:	if data was lost/truncated
+ * @head:	logical write offset inside the buffer
+ * @snapshot:	if this is for a snapshot/overwrite counter
+ * @stop_pos:	STOP topa entry in the buffer
+ * @intr_pos:	INT topa entry in the buffer
+ * @data_pages:	array of pages from perf
+ * @topa_index:	table of topa entries indexed by page offset
+ */
+struct pt_buffer {
+	int			cpu;
+	struct list_head	tables;
+	struct topa		*first, *last, *cur;
+	unsigned int		cur_idx;
+	size_t			output_off;
+	unsigned long		nr_pages;
+	local_t			data_size;
+	local_t			lost;
+	local64_t		head;
+	bool			snapshot;
+	unsigned long		stop_pos, intr_pos;
+	void			**data_pages;
+	struct topa_entry	*topa_index[0];
+};
+
+/**
+ * struct pt - per-cpu pt context
+ * @handle:	perf output handle
+ * @handle_nmi:	do handle PT PMI on this cpu, there's an active event
+ */
+struct pt {
+	struct perf_output_handle handle;
+	int			handle_nmi;
+};
+
+#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e2888a3..87848eb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -263,6 +263,14 @@
 	}
 }
 
+void hw_perf_lbr_event_destroy(struct perf_event *event)
+{
+	hw_perf_event_destroy(event);
+
+	/* undo the lbr/bts event accounting */
+	x86_del_exclusive(x86_lbr_exclusive_lbr);
+}
+
 static inline int x86_pmu_initialized(void)
 {
 	return x86_pmu.handle_irq != NULL;
@@ -302,6 +310,35 @@
 	return x86_pmu_extra_regs(val, event);
 }
 
+/*
+ * Check if we can create event of a certain type (that no conflicting events
+ * are present).
+ */
+int x86_add_exclusive(unsigned int what)
+{
+	int ret = -EBUSY, i;
+
+	if (atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what]))
+		return 0;
+
+	mutex_lock(&pmc_reserve_mutex);
+	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++)
+		if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
+			goto out;
+
+	atomic_inc(&x86_pmu.lbr_exclusive[what]);
+	ret = 0;
+
+out:
+	mutex_unlock(&pmc_reserve_mutex);
+	return ret;
+}
+
+void x86_del_exclusive(unsigned int what)
+{
+	atomic_dec(&x86_pmu.lbr_exclusive[what]);
+}
+
 int x86_setup_perfctr(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
@@ -346,6 +383,12 @@
 		/* BTS is currently only allowed for user-mode. */
 		if (!attr->exclude_kernel)
 			return -EOPNOTSUPP;
+
+		/* disallow bts if conflicting events are present */
+		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+			return -EBUSY;
+
+		event->destroy = hw_perf_lbr_event_destroy;
 	}
 
 	hwc->config |= config;
@@ -399,39 +442,41 @@
 
 		if (event->attr.precise_ip > precise)
 			return -EOPNOTSUPP;
-		/*
-		 * check that PEBS LBR correction does not conflict with
-		 * whatever the user is asking with attr->branch_sample_type
-		 */
-		if (event->attr.precise_ip > 1 &&
-		    x86_pmu.intel_cap.pebs_format < 2) {
-			u64 *br_type = &event->attr.branch_sample_type;
+	}
+	/*
+	 * check that PEBS LBR correction does not conflict with
+	 * whatever the user is asking with attr->branch_sample_type
+	 */
+	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
+		u64 *br_type = &event->attr.branch_sample_type;
 
-			if (has_branch_stack(event)) {
-				if (!precise_br_compat(event))
-					return -EOPNOTSUPP;
+		if (has_branch_stack(event)) {
+			if (!precise_br_compat(event))
+				return -EOPNOTSUPP;
 
-				/* branch_sample_type is compatible */
+			/* branch_sample_type is compatible */
 
-			} else {
-				/*
-				 * user did not specify  branch_sample_type
-				 *
-				 * For PEBS fixups, we capture all
-				 * the branches at the priv level of the
-				 * event.
-				 */
-				*br_type = PERF_SAMPLE_BRANCH_ANY;
+		} else {
+			/*
+			 * user did not specify  branch_sample_type
+			 *
+			 * For PEBS fixups, we capture all
+			 * the branches at the priv level of the
+			 * event.
+			 */
+			*br_type = PERF_SAMPLE_BRANCH_ANY;
 
-				if (!event->attr.exclude_user)
-					*br_type |= PERF_SAMPLE_BRANCH_USER;
+			if (!event->attr.exclude_user)
+				*br_type |= PERF_SAMPLE_BRANCH_USER;
 
-				if (!event->attr.exclude_kernel)
-					*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
-			}
+			if (!event->attr.exclude_kernel)
+				*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
 		}
 	}
 
+	if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
+		event->attach_state |= PERF_ATTACH_TASK_DATA;
+
 	/*
 	 * Generate PMC IRQs:
 	 * (keep 'enabled' bit clear for now)
@@ -449,6 +494,12 @@
 	if (event->attr.type == PERF_TYPE_RAW)
 		event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
 
+	if (event->attr.sample_period && x86_pmu.limit_period) {
+		if (x86_pmu.limit_period(event, event->attr.sample_period) >
+				event->attr.sample_period)
+			return -EINVAL;
+	}
+
 	return x86_setup_perfctr(event);
 }
 
@@ -728,14 +779,17 @@
 	struct event_constraint *c;
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	struct perf_event *e;
-	int i, wmin, wmax, num = 0;
+	int i, wmin, wmax, unsched = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
+	if (x86_pmu.start_scheduling)
+		x86_pmu.start_scheduling(cpuc);
+
 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 		hwc = &cpuc->event_list[i]->hw;
-		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
+		c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
 		hwc->constraint = c;
 
 		wmin = min(wmin, c->weight);
@@ -768,24 +822,30 @@
 
 	/* slow path */
 	if (i != n)
-		num = perf_assign_events(cpuc->event_list, n, wmin,
-					 wmax, assign);
+		unsched = perf_assign_events(cpuc->event_list, n, wmin,
+					     wmax, assign);
 
 	/*
-	 * Mark the event as committed, so we do not put_constraint()
-	 * in case new events are added and fail scheduling.
+	 * In case of success (unsched = 0), mark events as committed,
+	 * so we do not put_constraint() in case new events are added
+	 * and fail to be scheduled
+	 *
+	 * We invoke the lower level commit callback to lock the resource
+	 *
+	 * We do not need to do all of this in case we are called to
+	 * validate an event group (assign == NULL)
 	 */
-	if (!num && assign) {
+	if (!unsched && assign) {
 		for (i = 0; i < n; i++) {
 			e = cpuc->event_list[i];
 			e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+			if (x86_pmu.commit_scheduling)
+				x86_pmu.commit_scheduling(cpuc, e, assign[i]);
 		}
 	}
-	/*
-	 * scheduling failed or is just a simulation,
-	 * free resources if necessary
-	 */
-	if (!assign || num) {
+
+	if (!assign || unsched) {
+
 		for (i = 0; i < n; i++) {
 			e = cpuc->event_list[i];
 			/*
@@ -795,11 +855,18 @@
 			if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
 				continue;
 
+			/*
+			 * release events that failed scheduling
+			 */
 			if (x86_pmu.put_event_constraints)
 				x86_pmu.put_event_constraints(cpuc, e);
 		}
 	}
-	return num ? -EINVAL : 0;
+
+	if (x86_pmu.stop_scheduling)
+		x86_pmu.stop_scheduling(cpuc);
+
+	return unsched ? -EINVAL : 0;
 }
 
 /*
@@ -986,6 +1053,9 @@
 	if (left > x86_pmu.max_period)
 		left = x86_pmu.max_period;
 
+	if (x86_pmu.limit_period)
+		left = x86_pmu.limit_period(event, left);
+
 	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
 
 	/*
@@ -1033,7 +1103,6 @@
 
 	hwc = &event->hw;
 
-	perf_pmu_disable(event->pmu);
 	n0 = cpuc->n_events;
 	ret = n = collect_events(cpuc, event, false);
 	if (ret < 0)
@@ -1071,7 +1140,6 @@
 
 	ret = 0;
 out:
-	perf_pmu_enable(event->pmu);
 	return ret;
 }
 
@@ -1103,7 +1171,7 @@
 void perf_event_print_debug(void)
 {
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
-	u64 pebs;
+	u64 pebs, debugctl;
 	struct cpu_hw_events *cpuc;
 	unsigned long flags;
 	int cpu, idx;
@@ -1121,14 +1189,20 @@
 		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
-		rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
 
 		pr_info("\n");
 		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
 		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
 		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
 		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
-		pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
+		if (x86_pmu.pebs_constraints) {
+			rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
+			pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
+		}
+		if (x86_pmu.lbr_nr) {
+			rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+			pr_info("CPU#%d: debugctl:   %016llx\n", cpu, debugctl);
+		}
 	}
 	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 
@@ -1321,11 +1395,12 @@
 {
 	unsigned int cpu = (long)hcpu;
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-	int ret = NOTIFY_OK;
+	int i, ret = NOTIFY_OK;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
-		cpuc->kfree_on_online = NULL;
+		for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+			cpuc->kfree_on_online[i] = NULL;
 		if (x86_pmu.cpu_prepare)
 			ret = x86_pmu.cpu_prepare(cpu);
 		break;
@@ -1336,7 +1411,10 @@
 		break;
 
 	case CPU_ONLINE:
-		kfree(cpuc->kfree_on_online);
+		for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+			kfree(cpuc->kfree_on_online[i]);
+			cpuc->kfree_on_online[i] = NULL;
+		}
 		break;
 
 	case CPU_DYING:
@@ -1712,7 +1790,7 @@
 	if (IS_ERR(fake_cpuc))
 		return PTR_ERR(fake_cpuc);
 
-	c = x86_pmu.get_event_constraints(fake_cpuc, event);
+	c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
 
 	if (!c || !c->weight)
 		ret = -EINVAL;
@@ -1914,10 +1992,10 @@
 	NULL,
 };
 
-static void x86_pmu_flush_branch_stack(void)
+static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-	if (x86_pmu.flush_branch_stack)
-		x86_pmu.flush_branch_stack();
+	if (x86_pmu.sched_task)
+		x86_pmu.sched_task(ctx, sched_in);
 }
 
 void perf_check_microcode(void)
@@ -1949,7 +2027,8 @@
 	.commit_txn		= x86_pmu_commit_txn,
 
 	.event_idx		= x86_pmu_event_idx,
-	.flush_branch_stack	= x86_pmu_flush_branch_stack,
+	.sched_task		= x86_pmu_sched_task,
+	.task_ctx_size          = sizeof(struct x86_perf_task_context),
 };
 
 void arch_perf_update_userpage(struct perf_event *event,
@@ -1968,13 +2047,23 @@
 
 	data = cyc2ns_read_begin();
 
+	/*
+	 * Internal timekeeping for enabled/running/stopped times
+	 * is always in the local_clock domain.
+	 */
 	userpg->cap_user_time = 1;
 	userpg->time_mult = data->cyc2ns_mul;
 	userpg->time_shift = data->cyc2ns_shift;
 	userpg->time_offset = data->cyc2ns_offset - now;
 
-	userpg->cap_user_time_zero = 1;
-	userpg->time_zero = data->cyc2ns_offset;
+	/*
+	 * cap_user_time_zero doesn't make sense when we're using a different
+	 * time base for the records.
+	 */
+	if (event->clock == &local_clock) {
+		userpg->cap_user_time_zero = 1;
+		userpg->time_zero = data->cyc2ns_offset;
+	}
 
 	cyc2ns_read_end(data);
 }
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index df525d2..329f035 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -71,6 +71,8 @@
 #define PERF_X86_EVENT_COMMITTED	0x8 /* event passed commit_txn */
 #define PERF_X86_EVENT_PEBS_LD_HSW	0x10 /* haswell style datala, load */
 #define PERF_X86_EVENT_PEBS_NA_HSW	0x20 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL		0x40 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC		0x80 /* dynamic alloc'd constraint */
 #define PERF_X86_EVENT_RDPMC_ALLOWED	0x40 /* grant rdpmc permission */
 
 
@@ -123,8 +125,37 @@
 	unsigned                core_id;	/* per-core: core id */
 };
 
+enum intel_excl_state_type {
+	INTEL_EXCL_UNUSED    = 0, /* counter is unused */
+	INTEL_EXCL_SHARED    = 1, /* counter can be used by both threads */
+	INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
+};
+
+struct intel_excl_states {
+	enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
+	enum intel_excl_state_type state[X86_PMC_IDX_MAX];
+	int  num_alloc_cntrs;/* #counters allocated */
+	int  max_alloc_cntrs;/* max #counters allowed */
+	bool sched_started; /* true if scheduling has started */
+};
+
+struct intel_excl_cntrs {
+	raw_spinlock_t	lock;
+
+	struct intel_excl_states states[2];
+
+	int		refcnt;		/* per-core: #HT threads */
+	unsigned	core_id;	/* per-core: core id */
+};
+
 #define MAX_LBR_ENTRIES		16
 
+enum {
+	X86_PERF_KFREE_SHARED = 0,
+	X86_PERF_KFREE_EXCL   = 1,
+	X86_PERF_KFREE_MAX
+};
+
 struct cpu_hw_events {
 	/*
 	 * Generic x86 PMC bits
@@ -179,6 +210,12 @@
 	 * used on Intel NHM/WSM/SNB
 	 */
 	struct intel_shared_regs	*shared_regs;
+	/*
+	 * manage exclusive counter access between hyperthread
+	 */
+	struct event_constraint *constraint_list; /* in enable order */
+	struct intel_excl_cntrs		*excl_cntrs;
+	int excl_thread_id; /* 0 or 1 */
 
 	/*
 	 * AMD specific bits
@@ -187,7 +224,7 @@
 	/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
 	u64				perf_ctr_virt_mask;
 
-	void				*kfree_on_online;
+	void				*kfree_on_online[X86_PERF_KFREE_MAX];
 };
 
 #define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
@@ -202,6 +239,10 @@
 #define EVENT_CONSTRAINT(c, n, m)	\
 	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
 
+#define INTEL_EXCLEVT_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
+			   0, PERF_X86_EVENT_EXCL)
+
 /*
  * The overlap flag marks event constraints with overlapping counter
  * masks. This is the case if the counter mask of such an event is not
@@ -259,6 +300,10 @@
 #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n)	\
 	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
 
+#define INTEL_EXCLUEVT_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+			   HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
+
 #define INTEL_PLD_CONSTRAINT(c, n)	\
 	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
@@ -283,22 +328,40 @@
 
 /* Check flags and event code, and set the HSW load flag */
 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
-	__EVENT_CONSTRAINT(code, n, 			\
+	__EVENT_CONSTRAINT(code, n,			\
 			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
 
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
+	__EVENT_CONSTRAINT(code, n,			\
+			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, \
+			  PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
 /* Check flags and event code/umask, and set the HSW store flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
 			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
 
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
+	__EVENT_CONSTRAINT(code, n,			\
+			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, \
+			  PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
+
 /* Check flags and event code/umask, and set the HSW load flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
 			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
 
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
+	__EVENT_CONSTRAINT(code, n,			\
+			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, \
+			  PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
 /* Check flags and event code/umask, and set the HSW N/A flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
@@ -408,6 +471,13 @@
 
 #define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
 
+enum {
+	x86_lbr_exclusive_lbr,
+	x86_lbr_exclusive_bts,
+	x86_lbr_exclusive_pt,
+	x86_lbr_exclusive_max,
+};
+
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -443,14 +513,25 @@
 	u64		max_period;
 	struct event_constraint *
 			(*get_event_constraints)(struct cpu_hw_events *cpuc,
+						 int idx,
 						 struct perf_event *event);
 
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
+
+	void		(*commit_scheduling)(struct cpu_hw_events *cpuc,
+					     struct perf_event *event,
+					     int cntr);
+
+	void		(*start_scheduling)(struct cpu_hw_events *cpuc);
+
+	void		(*stop_scheduling)(struct cpu_hw_events *cpuc);
+
 	struct event_constraint *event_constraints;
 	struct x86_pmu_quirk *quirks;
 	int		perfctr_second_write;
 	bool		late_ack;
+	unsigned	(*limit_period)(struct perf_event *event, unsigned l);
 
 	/*
 	 * sysfs attrs
@@ -472,7 +553,8 @@
 	void		(*cpu_dead)(int cpu);
 
 	void		(*check_microcode)(void);
-	void		(*flush_branch_stack)(void);
+	void		(*sched_task)(struct perf_event_context *ctx,
+				      bool sched_in);
 
 	/*
 	 * Intel Arch Perfmon v2+
@@ -504,10 +586,15 @@
 	bool		lbr_double_abort;	   /* duplicated lbr aborts */
 
 	/*
+	 * Intel PT/LBR/BTS are exclusive
+	 */
+	atomic_t	lbr_exclusive[x86_lbr_exclusive_max];
+
+	/*
 	 * Extra registers for events
 	 */
 	struct extra_reg *extra_regs;
-	unsigned int er_flags;
+	unsigned int flags;
 
 	/*
 	 * Intel host/guest support (KVM)
@@ -515,6 +602,13 @@
 	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
 };
 
+struct x86_perf_task_context {
+	u64 lbr_from[MAX_LBR_ENTRIES];
+	u64 lbr_to[MAX_LBR_ENTRIES];
+	int lbr_callstack_users;
+	int lbr_stack_state;
+};
+
 #define x86_add_quirk(func_)						\
 do {									\
 	static struct x86_pmu_quirk __quirk __initdata = {		\
@@ -524,8 +618,13 @@
 	x86_pmu.quirks = &__quirk;					\
 } while (0)
 
-#define ERF_NO_HT_SHARING	1
-#define ERF_HAS_RSP_1		2
+/*
+ * x86_pmu flags
+ */
+#define PMU_FL_NO_HT_SHARING	0x1 /* no hyper-threading resource sharing */
+#define PMU_FL_HAS_RSP_1	0x2 /* has 2 equivalent offcore_rsp regs   */
+#define PMU_FL_EXCL_CNTRS	0x4 /* has exclusive counter requirements  */
+#define PMU_FL_EXCL_ENABLED	0x8 /* exclusive counter active */
 
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -546,6 +645,12 @@
 
 extern struct x86_pmu x86_pmu __read_mostly;
 
+static inline bool x86_pmu_has_lbr_callstack(void)
+{
+	return  x86_pmu.lbr_sel_map &&
+		x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
+}
+
 DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 
 int x86_perf_event_set_period(struct perf_event *event);
@@ -588,6 +693,12 @@
 	return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
 }
 
+int x86_add_exclusive(unsigned int what);
+
+void x86_del_exclusive(unsigned int what);
+
+void hw_perf_lbr_event_destroy(struct perf_event *event);
+
 int x86_setup_perfctr(struct perf_event *event);
 
 int x86_pmu_hw_config(struct perf_event *event);
@@ -674,10 +785,34 @@
 
 #ifdef CONFIG_CPU_SUP_INTEL
 
+static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
+{
+	/* user explicitly requested branch sampling */
+	if (has_branch_stack(event))
+		return true;
+
+	/* implicit branch sampling to correct PEBS skid */
+	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
+	    x86_pmu.intel_cap.pebs_format < 2)
+		return true;
+
+	return false;
+}
+
+static inline bool intel_pmu_has_bts(struct perf_event *event)
+{
+	if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+	    !event->attr.freq && event->hw.sample_period == 1)
+		return true;
+
+	return false;
+}
+
 int intel_pmu_save_and_restart(struct perf_event *event);
 
 struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event);
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event);
 
 struct intel_shared_regs *allocate_shared_regs(int cpu);
 
@@ -727,13 +862,15 @@
 
 void intel_ds_init(void);
 
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+
 void intel_pmu_lbr_reset(void);
 
 void intel_pmu_lbr_enable(struct perf_event *event);
 
 void intel_pmu_lbr_disable(struct perf_event *event);
 
-void intel_pmu_lbr_enable_all(void);
+void intel_pmu_lbr_enable_all(bool pmi);
 
 void intel_pmu_lbr_disable_all(void);
 
@@ -747,8 +884,18 @@
 
 void intel_pmu_lbr_init_snb(void);
 
+void intel_pmu_lbr_init_hsw(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
+void intel_pt_interrupt(void);
+
+int intel_bts_interrupt(void);
+
+void intel_bts_enable_local(void);
+
+void intel_bts_disable_local(void);
+
 int p4_pmu_init(void);
 
 int p6_pmu_init(void);
@@ -758,6 +905,10 @@
 ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
 			  char *page);
 
+static inline int is_ht_workaround_enabled(void)
+{
+	return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
+}
 #else /* CONFIG_CPU_SUP_INTEL */
 
 static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 2892631..1cee5d2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -382,6 +382,7 @@
 static void amd_pmu_cpu_starting(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+	void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
 	struct amd_nb *nb;
 	int i, nb_id;
 
@@ -399,7 +400,7 @@
 			continue;
 
 		if (nb->nb_id == nb_id) {
-			cpuc->kfree_on_online = cpuc->amd_nb;
+			*onln = cpuc->amd_nb;
 			cpuc->amd_nb = nb;
 			break;
 		}
@@ -429,7 +430,8 @@
 }
 
 static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
 {
 	/*
 	 * if not NB event or no NB, then no constraints
@@ -537,7 +539,8 @@
 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
 
 static struct event_constraint *
-amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
+			       struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned int event_code = amd_get_event_code(hwc);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index a61f5c6..989d3c2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -796,7 +796,7 @@
  * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
  * is using the new offset.
  */
-static int force_ibs_eilvt_setup(void)
+static void force_ibs_eilvt_setup(void)
 {
 	int offset;
 	int ret;
@@ -811,26 +811,24 @@
 
 	if (offset == APIC_EILVT_NR_MAX) {
 		printk(KERN_DEBUG "No EILVT entry available\n");
-		return -EBUSY;
+		return;
 	}
 
 	ret = setup_ibs_ctl(offset);
 	if (ret)
 		goto out;
 
-	if (!ibs_eilvt_valid()) {
-		ret = -EFAULT;
+	if (!ibs_eilvt_valid())
 		goto out;
-	}
 
 	pr_info("IBS: LVT offset %d assigned\n", offset);
 
-	return 0;
+	return;
 out:
 	preempt_disable();
 	put_eilvt(offset);
 	preempt_enable();
-	return ret;
+	return;
 }
 
 static void ibs_eilvt_setup(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2589906..9da2400 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/watchdog.h>
 
 #include <asm/cpufeature.h>
 #include <asm/hardirq.h>
@@ -113,6 +114,12 @@
 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
 	INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+
+	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
 	EVENT_CONSTRAINT_END
 };
 
@@ -131,15 +138,12 @@
 	INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-	/*
-	 * Errata BV98 -- MEM_*_RETIRED events can leak between counters of SMT
-	 * siblings; disable these events because they can corrupt unrelated
-	 * counters.
-	 */
-	INTEL_EVENT_CONSTRAINT(0xd0, 0x0), /* MEM_UOPS_RETIRED.* */
-	INTEL_EVENT_CONSTRAINT(0xd1, 0x0), /* MEM_LOAD_UOPS_RETIRED.* */
-	INTEL_EVENT_CONSTRAINT(0xd2, 0x0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-	INTEL_EVENT_CONSTRAINT(0xd3, 0x0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
 	EVENT_CONSTRAINT_END
 };
 
@@ -217,6 +221,21 @@
 	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
 	/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
 	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
+
+	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+	EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_bdw_event_constraints[] = {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
+	INTEL_UEVENT_CONSTRAINT(0x148, 0x4),	/* L1D_PEND_MISS.PENDING */
+	INTEL_EVENT_CONSTRAINT(0xa3, 0x4),	/* CYCLE_ACTIVITY.* */
 	EVENT_CONSTRAINT_END
 };
 
@@ -415,6 +434,202 @@
 
 };
 
+/*
+ * Notes on the events:
+ * - data reads do not include code reads (comparable to earlier tables)
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
+ * - remote node access includes remote memory, remote cache, remote mmio.
+ * - prefetches are not included in the counts because they are not
+ *   reliably counted.
+ */
+
+#define HSW_DEMAND_DATA_RD		BIT_ULL(0)
+#define HSW_DEMAND_RFO			BIT_ULL(1)
+#define HSW_ANY_RESPONSE		BIT_ULL(16)
+#define HSW_SUPPLIER_NONE		BIT_ULL(17)
+#define HSW_L3_MISS_LOCAL_DRAM		BIT_ULL(22)
+#define HSW_L3_MISS_REMOTE_HOP0		BIT_ULL(27)
+#define HSW_L3_MISS_REMOTE_HOP1		BIT_ULL(28)
+#define HSW_L3_MISS_REMOTE_HOP2P	BIT_ULL(29)
+#define HSW_L3_MISS			(HSW_L3_MISS_LOCAL_DRAM| \
+					 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+					 HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_SNOOP_NONE			BIT_ULL(31)
+#define HSW_SNOOP_NOT_NEEDED		BIT_ULL(32)
+#define HSW_SNOOP_MISS			BIT_ULL(33)
+#define HSW_SNOOP_HIT_NO_FWD		BIT_ULL(34)
+#define HSW_SNOOP_HIT_WITH_FWD		BIT_ULL(35)
+#define HSW_SNOOP_HITM			BIT_ULL(36)
+#define HSW_SNOOP_NON_DRAM		BIT_ULL(37)
+#define HSW_ANY_SNOOP			(HSW_SNOOP_NONE| \
+					 HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
+					 HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
+					 HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
+#define HSW_SNOOP_DRAM			(HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
+#define HSW_DEMAND_READ			HSW_DEMAND_DATA_RD
+#define HSW_DEMAND_WRITE		HSW_DEMAND_RFO
+#define HSW_L3_MISS_REMOTE		(HSW_L3_MISS_REMOTE_HOP0|\
+					 HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_LLC_ACCESS			HSW_ANY_RESPONSE
+
+#define BDW_L3_MISS_LOCAL		BIT(26)
+#define BDW_L3_MISS			(BDW_L3_MISS_LOCAL| \
+					 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+					 HSW_L3_MISS_REMOTE_HOP2P)
+
+
+static __initconst const u64 hsw_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+		[ C(RESULT_MISS)   ] = 0x151,	/* L1D.REPLACEMENT */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x280,	/* ICACHE.MISSES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+		[ C(RESULT_MISS)   ] = 0x108,	/* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+		[ C(RESULT_MISS)   ] = 0x149,	/* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x6085,	/* ITLB_MISSES.STLB_HIT */
+		[ C(RESULT_MISS)   ] = 0x185,	/* ITLB_MISSES.MISS_CAUSES_A_WALK */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0xc4,	/* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0xc5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(NODE) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
+		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+};
+
+static __initconst const u64 hsw_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+				       HSW_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
+				       HSW_L3_MISS|HSW_ANY_SNOOP,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+				       HSW_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
+				       HSW_L3_MISS|HSW_ANY_SNOOP,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(NODE) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+				       HSW_L3_MISS_LOCAL_DRAM|
+				       HSW_SNOOP_DRAM,
+		[ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
+				       HSW_L3_MISS_REMOTE|
+				       HSW_SNOOP_DRAM,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+				       HSW_L3_MISS_LOCAL_DRAM|
+				       HSW_SNOOP_DRAM,
+		[ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
+				       HSW_L3_MISS_REMOTE|
+				       HSW_SNOOP_DRAM,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+};
+
 static __initconst const u64 westmere_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1029,21 +1244,10 @@
  },
 };
 
-static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
-{
-	/* user explicitly requested branch sampling */
-	if (has_branch_stack(event))
-		return true;
-
-	/* implicit branch sampling to correct PEBS skid */
-	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
-	    x86_pmu.intel_cap.pebs_format < 2)
-		return true;
-
-	return false;
-}
-
-static void intel_pmu_disable_all(void)
+/*
+ * Use from PMIs where the LBRs are already disabled.
+ */
+static void __intel_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
@@ -1051,17 +1255,24 @@
 
 	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
 		intel_pmu_disable_bts();
+	else
+		intel_bts_disable_local();
 
 	intel_pmu_pebs_disable_all();
+}
+
+static void intel_pmu_disable_all(void)
+{
+	__intel_pmu_disable_all();
 	intel_pmu_lbr_disable_all();
 }
 
-static void intel_pmu_enable_all(int added)
+static void __intel_pmu_enable_all(int added, bool pmi)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	intel_pmu_pebs_enable_all();
-	intel_pmu_lbr_enable_all();
+	intel_pmu_lbr_enable_all(pmi);
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
 			x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
 
@@ -1073,7 +1284,13 @@
 			return;
 
 		intel_pmu_enable_bts(event->hw.config);
-	}
+	} else
+		intel_bts_enable_local();
+}
+
+static void intel_pmu_enable_all(int added)
+{
+	__intel_pmu_enable_all(added, false);
 }
 
 /*
@@ -1207,7 +1424,7 @@
 	 * must disable before any actual event
 	 * because any event may be combined with LBR
 	 */
-	if (intel_pmu_needs_lbr_smpl(event))
+	if (needs_branch_stack(event))
 		intel_pmu_lbr_disable(event);
 
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -1268,7 +1485,7 @@
 	 * must enabled before any actual event
 	 * because any event may be combined with LBR
 	 */
-	if (intel_pmu_needs_lbr_smpl(event))
+	if (needs_branch_stack(event))
 		intel_pmu_lbr_enable(event);
 
 	if (event->attr.exclude_host)
@@ -1334,6 +1551,18 @@
 	if (ds)
 		ds->bts_index = ds->bts_buffer_base;
 
+	/* Ack all overflows and disable fixed counters */
+	if (x86_pmu.version >= 2) {
+		intel_pmu_ack_status(intel_pmu_get_status());
+		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+	}
+
+	/* Reset LBRs and LBR freezing */
+	if (x86_pmu.lbr_nr) {
+		update_debugctlmsr(get_debugctlmsr() &
+			~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
+	}
+
 	local_irq_restore(flags);
 }
 
@@ -1357,8 +1586,9 @@
 	 */
 	if (!x86_pmu.late_ack)
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
-	intel_pmu_disable_all();
+	__intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
+	handled += intel_bts_interrupt();
 	status = intel_pmu_get_status();
 	if (!status)
 		goto done;
@@ -1399,6 +1629,14 @@
 	}
 
 	/*
+	 * Intel PT
+	 */
+	if (__test_and_clear_bit(55, (unsigned long *)&status)) {
+		handled++;
+		intel_pt_interrupt();
+	}
+
+	/*
 	 * Checkpointed counters can lead to 'spurious' PMIs because the
 	 * rollback caused by the PMI will have cleared the overflow status
 	 * bit. Therefore always force probe these counters.
@@ -1433,7 +1671,7 @@
 		goto again;
 
 done:
-	intel_pmu_enable_all(0);
+	__intel_pmu_enable_all(0, true);
 	/*
 	 * Only unmask the NMI after the overflow counters
 	 * have been reset. This avoids spurious NMIs on
@@ -1464,7 +1702,7 @@
 
 static int intel_alt_er(int idx)
 {
-	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
+	if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
 		return idx;
 
 	if (idx == EXTRA_REG_RSP_0)
@@ -1624,7 +1862,8 @@
 }
 
 struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
 {
 	struct event_constraint *c;
 
@@ -1641,7 +1880,8 @@
 }
 
 static struct event_constraint *
-intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			    struct perf_event *event)
 {
 	struct event_constraint *c;
 
@@ -1657,7 +1897,278 @@
 	if (c)
 		return c;
 
-	return x86_get_event_constraints(cpuc, event);
+	return x86_get_event_constraints(cpuc, idx, event);
+}
+
+static void
+intel_start_scheduling(struct cpu_hw_events *cpuc)
+{
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct intel_excl_states *xl, *xlo;
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid; /* sibling thread */
+
+	/*
+	 * nothing needed if in group validation mode
+	 */
+	if (cpuc->is_fake || !is_ht_workaround_enabled())
+		return;
+
+	/*
+	 * no exclusion needed
+	 */
+	if (!excl_cntrs)
+		return;
+
+	xlo = &excl_cntrs->states[o_tid];
+	xl = &excl_cntrs->states[tid];
+
+	xl->sched_started = true;
+	xl->num_alloc_cntrs = 0;
+	/*
+	 * lock shared state until we are done scheduling
+	 * in stop_event_scheduling()
+	 * makes scheduling appear as a transaction
+	 */
+	WARN_ON_ONCE(!irqs_disabled());
+	raw_spin_lock(&excl_cntrs->lock);
+
+	/*
+	 * save initial state of sibling thread
+	 */
+	memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
+}
+
+static void
+intel_stop_scheduling(struct cpu_hw_events *cpuc)
+{
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct intel_excl_states *xl, *xlo;
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid; /* sibling thread */
+
+	/*
+	 * nothing needed if in group validation mode
+	 */
+	if (cpuc->is_fake || !is_ht_workaround_enabled())
+		return;
+	/*
+	 * no exclusion needed
+	 */
+	if (!excl_cntrs)
+		return;
+
+	xlo = &excl_cntrs->states[o_tid];
+	xl = &excl_cntrs->states[tid];
+
+	/*
+	 * make new sibling thread state visible
+	 */
+	memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
+
+	xl->sched_started = false;
+	/*
+	 * release shared state lock (acquired in intel_start_scheduling())
+	 */
+	raw_spin_unlock(&excl_cntrs->lock);
+}
+
+static struct event_constraint *
+intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+			   int idx, struct event_constraint *c)
+{
+	struct event_constraint *cx;
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct intel_excl_states *xl, *xlo;
+	int is_excl, i;
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid; /* alternate */
+
+	/*
+	 * validating a group does not require
+	 * enforcing cross-thread  exclusion
+	 */
+	if (cpuc->is_fake || !is_ht_workaround_enabled())
+		return c;
+
+	/*
+	 * no exclusion needed
+	 */
+	if (!excl_cntrs)
+		return c;
+	/*
+	 * event requires exclusive counter access
+	 * across HT threads
+	 */
+	is_excl = c->flags & PERF_X86_EVENT_EXCL;
+
+	/*
+	 * xl = state of current HT
+	 * xlo = state of sibling HT
+	 */
+	xl = &excl_cntrs->states[tid];
+	xlo = &excl_cntrs->states[o_tid];
+
+	/*
+	 * do not allow scheduling of more than max_alloc_cntrs
+	 * which is set to half the available generic counters.
+	 * this helps avoid counter starvation of sibling thread
+	 * by ensuring at most half the counters cannot be in
+	 * exclusive mode. There is not designated counters for the
+	 * limits. Any N/2 counters can be used. This helps with
+	 * events with specifix counter constraints
+	 */
+	if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
+		return &emptyconstraint;
+
+	cx = c;
+
+	/*
+	 * because we modify the constraint, we need
+	 * to make a copy. Static constraints come
+	 * from static const tables.
+	 *
+	 * only needed when constraint has not yet
+	 * been cloned (marked dynamic)
+	 */
+	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+
+		/* sanity check */
+		if (idx < 0)
+			return &emptyconstraint;
+
+		/*
+		 * grab pre-allocated constraint entry
+		 */
+		cx = &cpuc->constraint_list[idx];
+
+		/*
+		 * initialize dynamic constraint
+		 * with static constraint
+		 */
+		memcpy(cx, c, sizeof(*cx));
+
+		/*
+		 * mark constraint as dynamic, so we
+		 * can free it later on
+		 */
+		cx->flags |= PERF_X86_EVENT_DYNAMIC;
+	}
+
+	/*
+	 * From here on, the constraint is dynamic.
+	 * Either it was just allocated above, or it
+	 * was allocated during a earlier invocation
+	 * of this function
+	 */
+
+	/*
+	 * Modify static constraint with current dynamic
+	 * state of thread
+	 *
+	 * EXCLUSIVE: sibling counter measuring exclusive event
+	 * SHARED   : sibling counter measuring non-exclusive event
+	 * UNUSED   : sibling counter unused
+	 */
+	for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
+		/*
+		 * exclusive event in sibling counter
+		 * our corresponding counter cannot be used
+		 * regardless of our event
+		 */
+		if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
+			__clear_bit(i, cx->idxmsk);
+		/*
+		 * if measuring an exclusive event, sibling
+		 * measuring non-exclusive, then counter cannot
+		 * be used
+		 */
+		if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
+			__clear_bit(i, cx->idxmsk);
+	}
+
+	/*
+	 * recompute actual bit weight for scheduling algorithm
+	 */
+	cx->weight = hweight64(cx->idxmsk64);
+
+	/*
+	 * if we return an empty mask, then switch
+	 * back to static empty constraint to avoid
+	 * the cost of freeing later on
+	 */
+	if (cx->weight == 0)
+		cx = &emptyconstraint;
+
+	return cx;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			    struct perf_event *event)
+{
+	struct event_constraint *c1 = event->hw.constraint;
+	struct event_constraint *c2;
+
+	/*
+	 * first time only
+	 * - static constraint: no change across incremental scheduling calls
+	 * - dynamic constraint: handled by intel_get_excl_constraints()
+	 */
+	c2 = __intel_get_event_constraints(cpuc, idx, event);
+	if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+		bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
+		c1->weight = c2->weight;
+		c2 = c1;
+	}
+
+	if (cpuc->excl_cntrs)
+		return intel_get_excl_constraints(cpuc, event, idx, c2);
+
+	return c2;
+}
+
+static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
+		struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct intel_excl_states *xlo, *xl;
+	unsigned long flags = 0; /* keep compiler happy */
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid;
+
+	/*
+	 * nothing needed if in group validation mode
+	 */
+	if (cpuc->is_fake)
+		return;
+
+	WARN_ON_ONCE(!excl_cntrs);
+
+	if (!excl_cntrs)
+		return;
+
+	xl = &excl_cntrs->states[tid];
+	xlo = &excl_cntrs->states[o_tid];
+
+	/*
+	 * put_constraint may be called from x86_schedule_events()
+	 * which already has the lock held so here make locking
+	 * conditional
+	 */
+	if (!xl->sched_started)
+		raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
+
+	/*
+	 * if event was actually assigned, then mark the
+	 * counter state as unused now
+	 */
+	if (hwc->idx >= 0)
+		xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
+
+	if (!xl->sched_started)
+		raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
 }
 
 static void
@@ -1678,7 +2189,57 @@
 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
 					struct perf_event *event)
 {
+	struct event_constraint *c = event->hw.constraint;
+
 	intel_put_shared_regs_event_constraints(cpuc, event);
+
+	/*
+	 * is PMU has exclusive counter restrictions, then
+	 * all events are subject to and must call the
+	 * put_excl_constraints() routine
+	 */
+	if (c && cpuc->excl_cntrs)
+		intel_put_excl_constraints(cpuc, event);
+
+	/* cleanup dynamic constraint */
+	if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
+		event->hw.constraint = NULL;
+}
+
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
+				    struct perf_event *event, int cntr)
+{
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct event_constraint *c = event->hw.constraint;
+	struct intel_excl_states *xlo, *xl;
+	int tid = cpuc->excl_thread_id;
+	int o_tid = 1 - tid;
+	int is_excl;
+
+	if (cpuc->is_fake || !c)
+		return;
+
+	is_excl = c->flags & PERF_X86_EVENT_EXCL;
+
+	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+		return;
+
+	WARN_ON_ONCE(!excl_cntrs);
+
+	if (!excl_cntrs)
+		return;
+
+	xl = &excl_cntrs->states[tid];
+	xlo = &excl_cntrs->states[o_tid];
+
+	WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
+
+	if (cntr >= 0) {
+		if (is_excl)
+			xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
+		else
+			xlo->init_state[cntr] = INTEL_EXCL_SHARED;
+	}
 }
 
 static void intel_pebs_aliases_core2(struct perf_event *event)
@@ -1747,10 +2308,21 @@
 	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
 		x86_pmu.pebs_aliases(event);
 
-	if (intel_pmu_needs_lbr_smpl(event)) {
+	if (needs_branch_stack(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
 		if (ret)
 			return ret;
+
+		/*
+		 * BTS is set up earlier in this path, so don't account twice
+		 */
+		if (!intel_pmu_has_bts(event)) {
+			/* disallow lbr if conflicting events are present */
+			if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+				return -EBUSY;
+
+			event->destroy = hw_perf_lbr_event_destroy;
+		}
 	}
 
 	if (event->attr.type != PERF_TYPE_RAW)
@@ -1891,9 +2463,12 @@
 			EVENT_CONSTRAINT(0, 0x4, 0);
 
 static struct event_constraint *
-hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
 {
-	struct event_constraint *c = intel_get_event_constraints(cpuc, event);
+	struct event_constraint *c;
+
+	c = intel_get_event_constraints(cpuc, idx, event);
 
 	/* Handle special quirk on in_tx_checkpointed only in counter 2 */
 	if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
@@ -1905,6 +2480,32 @@
 	return c;
 }
 
+/*
+ * Broadwell:
+ *
+ * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
+ * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
+ * the two to enforce a minimum period of 128 (the smallest value that has bits
+ * 0-5 cleared and >= 100).
+ *
+ * Because of how the code in x86_perf_event_set_period() works, the truncation
+ * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
+ * to make up for the 'lost' events due to carrying the 'error' in period_left.
+ *
+ * Therefore the effective (average) period matches the requested period,
+ * despite coarser hardware granularity.
+ */
+static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+{
+	if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+			X86_CONFIG(.event=0xc0, .umask=0x01)) {
+		if (left < 128)
+			left = 128;
+		left &= ~0x3fu;
+	}
+	return left;
+}
+
 PMU_FORMAT_ATTR(event,	"config:0-7"	);
 PMU_FORMAT_ATTR(umask,	"config:8-15"	);
 PMU_FORMAT_ATTR(edge,	"config:18"	);
@@ -1979,16 +2580,52 @@
 	return regs;
 }
 
+static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
+{
+	struct intel_excl_cntrs *c;
+	int i;
+
+	c = kzalloc_node(sizeof(struct intel_excl_cntrs),
+			 GFP_KERNEL, cpu_to_node(cpu));
+	if (c) {
+		raw_spin_lock_init(&c->lock);
+		for (i = 0; i < X86_PMC_IDX_MAX; i++) {
+			c->states[0].state[i] = INTEL_EXCL_UNUSED;
+			c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
+
+			c->states[1].state[i] = INTEL_EXCL_UNUSED;
+			c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
+		}
+		c->core_id = -1;
+	}
+	return c;
+}
+
 static int intel_pmu_cpu_prepare(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
-	if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
-		return NOTIFY_OK;
+	if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
+		cpuc->shared_regs = allocate_shared_regs(cpu);
+		if (!cpuc->shared_regs)
+			return NOTIFY_BAD;
+	}
 
-	cpuc->shared_regs = allocate_shared_regs(cpu);
-	if (!cpuc->shared_regs)
-		return NOTIFY_BAD;
+	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+		size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
+
+		cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+		if (!cpuc->constraint_list)
+			return NOTIFY_BAD;
+
+		cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
+		if (!cpuc->excl_cntrs) {
+			kfree(cpuc->constraint_list);
+			kfree(cpuc->shared_regs);
+			return NOTIFY_BAD;
+		}
+		cpuc->excl_thread_id = 0;
+	}
 
 	return NOTIFY_OK;
 }
@@ -2010,13 +2647,15 @@
 	if (!cpuc->shared_regs)
 		return;
 
-	if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
+	if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
+		void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
+
 		for_each_cpu(i, topology_thread_cpumask(cpu)) {
 			struct intel_shared_regs *pc;
 
 			pc = per_cpu(cpu_hw_events, i).shared_regs;
 			if (pc && pc->core_id == core_id) {
-				cpuc->kfree_on_online = cpuc->shared_regs;
+				*onln = cpuc->shared_regs;
 				cpuc->shared_regs = pc;
 				break;
 			}
@@ -2027,6 +2666,44 @@
 
 	if (x86_pmu.lbr_sel_map)
 		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
+
+	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+		int h = x86_pmu.num_counters >> 1;
+
+		for_each_cpu(i, topology_thread_cpumask(cpu)) {
+			struct intel_excl_cntrs *c;
+
+			c = per_cpu(cpu_hw_events, i).excl_cntrs;
+			if (c && c->core_id == core_id) {
+				cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
+				cpuc->excl_cntrs = c;
+				cpuc->excl_thread_id = 1;
+				break;
+			}
+		}
+		cpuc->excl_cntrs->core_id = core_id;
+		cpuc->excl_cntrs->refcnt++;
+		/*
+		 * set hard limit to half the number of generic counters
+		 */
+		cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
+		cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
+	}
+}
+
+static void free_excl_cntrs(int cpu)
+{
+	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+	struct intel_excl_cntrs *c;
+
+	c = cpuc->excl_cntrs;
+	if (c) {
+		if (c->core_id == -1 || --c->refcnt == 0)
+			kfree(c);
+		cpuc->excl_cntrs = NULL;
+		kfree(cpuc->constraint_list);
+		cpuc->constraint_list = NULL;
+	}
 }
 
 static void intel_pmu_cpu_dying(int cpu)
@@ -2041,19 +2718,9 @@
 		cpuc->shared_regs = NULL;
 	}
 
-	fini_debug_store_on_cpu(cpu);
-}
+	free_excl_cntrs(cpu);
 
-static void intel_pmu_flush_branch_stack(void)
-{
-	/*
-	 * Intel LBR does not tag entries with the
-	 * PID of the current task, then we need to
-	 * flush it on ctxsw
-	 * For now, we simply reset it
-	 */
-	if (x86_pmu.lbr_nr)
-		intel_pmu_lbr_reset();
+	fini_debug_store_on_cpu(cpu);
 }
 
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
@@ -2107,7 +2774,7 @@
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
 	.guest_get_msrs		= intel_guest_get_msrs,
-	.flush_branch_stack	= intel_pmu_flush_branch_stack,
+	.sched_task		= intel_pmu_lbr_sched_task,
 };
 
 static __init void intel_clovertown_quirk(void)
@@ -2264,6 +2931,27 @@
 	}
 }
 
+/*
+ * enable software workaround for errata:
+ * SNB: BJ122
+ * IVB: BV98
+ * HSW: HSD29
+ *
+ * Only needed when HT is enabled. However detecting
+ * if HT is enabled is difficult (model specific). So instead,
+ * we enable the workaround in the early boot, and verify if
+ * it is needed in a later initcall phase once we have valid
+ * topology information to check if HT is actually enabled
+ */
+static __init void intel_ht_bug(void)
+{
+	x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
+
+	x86_pmu.commit_scheduling = intel_commit_scheduling;
+	x86_pmu.start_scheduling = intel_start_scheduling;
+	x86_pmu.stop_scheduling = intel_stop_scheduling;
+}
+
 EVENT_ATTR_STR(mem-loads,	mem_ld_hsw,	"event=0xcd,umask=0x1,ldlat=3");
 EVENT_ATTR_STR(mem-stores,	mem_st_hsw,	"event=0xd0,umask=0x82")
 
@@ -2443,7 +3131,7 @@
 		x86_pmu.event_constraints = intel_slm_event_constraints;
 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_slm_extra_regs;
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		pr_cont("Silvermont events, ");
 		break;
 
@@ -2461,7 +3149,7 @@
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_westmere_extra_regs;
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 
 		x86_pmu.cpu_events = nhm_events_attrs;
 
@@ -2478,6 +3166,7 @@
 	case 42: /* 32nm SandyBridge         */
 	case 45: /* 32nm SandyBridge-E/EN/EP */
 		x86_add_quirk(intel_sandybridge_quirk);
+		x86_add_quirk(intel_ht_bug);
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
@@ -2492,9 +3181,11 @@
 			x86_pmu.extra_regs = intel_snbep_extra_regs;
 		else
 			x86_pmu.extra_regs = intel_snb_extra_regs;
+
+
 		/* all extra regs are per-cpu when HT is on */
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
-		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
 		x86_pmu.cpu_events = snb_events_attrs;
 
@@ -2510,6 +3201,7 @@
 
 	case 58: /* 22nm IvyBridge       */
 	case 62: /* 22nm IvyBridge-EP/EX */
+		x86_add_quirk(intel_ht_bug);
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		/* dTLB-load-misses on IVB is different than SNB */
@@ -2528,8 +3220,8 @@
 		else
 			x86_pmu.extra_regs = intel_snb_extra_regs;
 		/* all extra regs are per-cpu when HT is on */
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
-		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
 		x86_pmu.cpu_events = snb_events_attrs;
 
@@ -2545,19 +3237,20 @@
 	case 63: /* 22nm Haswell Server */
 	case 69: /* 22nm Haswell ULT */
 	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+		x86_add_quirk(intel_ht_bug);
 		x86_pmu.late_ack = true;
-		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_snb();
+		intel_pmu_lbr_init_hsw();
 
 		x86_pmu.event_constraints = intel_hsw_event_constraints;
 		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_snbep_extra_regs;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
 		/* all extra regs are per-cpu when HT is on */
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
-		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
@@ -2566,6 +3259,39 @@
 		pr_cont("Haswell events, ");
 		break;
 
+	case 61: /* 14nm Broadwell Core-M */
+	case 86: /* 14nm Broadwell Xeon D */
+		x86_pmu.late_ack = true;
+		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+		/* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
+		hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
+									 BDW_L3_MISS|HSW_SNOOP_DRAM;
+		hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
+									  HSW_SNOOP_DRAM;
+		hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
+									     BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+		hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
+									      BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+
+		intel_pmu_lbr_init_snb();
+
+		x86_pmu.event_constraints = intel_bdw_event_constraints;
+		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_snbep_extra_regs;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+		/* all extra regs are per-cpu when HT is on */
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+		x86_pmu.hw_config = hsw_hw_config;
+		x86_pmu.get_event_constraints = hsw_get_event_constraints;
+		x86_pmu.cpu_events = hsw_events_attrs;
+		x86_pmu.limit_period = bdw_limit_period;
+		pr_cont("Broadwell events, ");
+		break;
+
 	default:
 		switch (x86_pmu.version) {
 		case 1:
@@ -2651,3 +3377,47 @@
 
 	return 0;
 }
+
+/*
+ * HT bug: phase 2 init
+ * Called once we have valid topology information to check
+ * whether or not HT is enabled
+ * If HT is off, then we disable the workaround
+ */
+static __init int fixup_ht_bug(void)
+{
+	int cpu = smp_processor_id();
+	int w, c;
+	/*
+	 * problem not present on this CPU model, nothing to do
+	 */
+	if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
+		return 0;
+
+	w = cpumask_weight(topology_thread_cpumask(cpu));
+	if (w > 1) {
+		pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
+		return 0;
+	}
+
+	watchdog_nmi_disable_all();
+
+	x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
+
+	x86_pmu.commit_scheduling = NULL;
+	x86_pmu.start_scheduling = NULL;
+	x86_pmu.stop_scheduling = NULL;
+
+	watchdog_nmi_enable_all();
+
+	get_online_cpus();
+
+	for_each_online_cpu(c) {
+		free_excl_cntrs(c);
+	}
+
+	put_online_cpus();
+	pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
+	return 0;
+}
+subsys_initcall(fixup_ht_bug)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
new file mode 100644
index 0000000..ac1f0c5
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -0,0 +1,525 @@
+/*
+ * BTS PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/coredump.h>
+
+#include <asm-generic/sizes.h>
+#include <asm/perf_event.h>
+
+#include "perf_event.h"
+
+struct bts_ctx {
+	struct perf_output_handle	handle;
+	struct debug_store		ds_back;
+	int				started;
+};
+
+static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
+
+#define BTS_RECORD_SIZE		24
+#define BTS_SAFETY_MARGIN	4080
+
+struct bts_phys {
+	struct page	*page;
+	unsigned long	size;
+	unsigned long	offset;
+	unsigned long	displacement;
+};
+
+struct bts_buffer {
+	size_t		real_size;	/* multiple of BTS_RECORD_SIZE */
+	unsigned int	nr_pages;
+	unsigned int	nr_bufs;
+	unsigned int	cur_buf;
+	bool		snapshot;
+	local_t		data_size;
+	local_t		lost;
+	local_t		head;
+	unsigned long	end;
+	void		**data_pages;
+	struct bts_phys	buf[0];
+};
+
+struct pmu bts_pmu;
+
+void intel_pmu_enable_bts(u64 config);
+void intel_pmu_disable_bts(void);
+
+static size_t buf_size(struct page *page)
+{
+	return 1 << (PAGE_SHIFT + page_private(page));
+}
+
+static void *
+bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+{
+	struct bts_buffer *buf;
+	struct page *page;
+	int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
+	unsigned long offset;
+	size_t size = nr_pages << PAGE_SHIFT;
+	int pg, nbuf, pad;
+
+	/* count all the high order buffers */
+	for (pg = 0, nbuf = 0; pg < nr_pages;) {
+		page = virt_to_page(pages[pg]);
+		if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
+			return NULL;
+		pg += 1 << page_private(page);
+		nbuf++;
+	}
+
+	/*
+	 * to avoid interrupts in overwrite mode, only allow one physical
+	 */
+	if (overwrite && nbuf > 1)
+		return NULL;
+
+	buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->nr_pages = nr_pages;
+	buf->nr_bufs = nbuf;
+	buf->snapshot = overwrite;
+	buf->data_pages = pages;
+	buf->real_size = size - size % BTS_RECORD_SIZE;
+
+	for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
+		unsigned int __nr_pages;
+
+		page = virt_to_page(pages[pg]);
+		__nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+		buf->buf[nbuf].page = page;
+		buf->buf[nbuf].offset = offset;
+		buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+		buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
+		pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
+		buf->buf[nbuf].size -= pad;
+
+		pg += __nr_pages;
+		offset += __nr_pages << PAGE_SHIFT;
+	}
+
+	return buf;
+}
+
+static void bts_buffer_free_aux(void *data)
+{
+	kfree(data);
+}
+
+static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
+{
+	return buf->buf[idx].offset + buf->buf[idx].displacement;
+}
+
+static void
+bts_config_buffer(struct bts_buffer *buf)
+{
+	int cpu = raw_smp_processor_id();
+	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	struct bts_phys *phys = &buf->buf[buf->cur_buf];
+	unsigned long index, thresh = 0, end = phys->size;
+	struct page *page = phys->page;
+
+	index = local_read(&buf->head);
+
+	if (!buf->snapshot) {
+		if (buf->end < phys->offset + buf_size(page))
+			end = buf->end - phys->offset - phys->displacement;
+
+		index -= phys->offset + phys->displacement;
+
+		if (end - index > BTS_SAFETY_MARGIN)
+			thresh = end - BTS_SAFETY_MARGIN;
+		else if (end - index > BTS_RECORD_SIZE)
+			thresh = end - BTS_RECORD_SIZE;
+		else
+			thresh = end;
+	}
+
+	ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
+	ds->bts_index = ds->bts_buffer_base + index;
+	ds->bts_absolute_maximum = ds->bts_buffer_base + end;
+	ds->bts_interrupt_threshold = !buf->snapshot
+		? ds->bts_buffer_base + thresh
+		: ds->bts_absolute_maximum + BTS_RECORD_SIZE;
+}
+
+static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
+{
+	unsigned long index = head - phys->offset;
+
+	memset(page_address(phys->page) + index, 0, phys->size - index);
+}
+
+static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
+{
+	if (buf->snapshot)
+		return false;
+
+	if (local_read(&buf->data_size) >= bts->handle.size ||
+	    bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
+		return true;
+
+	return false;
+}
+
+static void bts_update(struct bts_ctx *bts)
+{
+	int cpu = raw_smp_processor_id();
+	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	struct bts_buffer *buf = perf_get_aux(&bts->handle);
+	unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
+
+	if (!buf)
+		return;
+
+	head = index + bts_buffer_offset(buf, buf->cur_buf);
+	old = local_xchg(&buf->head, head);
+
+	if (!buf->snapshot) {
+		if (old == head)
+			return;
+
+		if (ds->bts_index >= ds->bts_absolute_maximum)
+			local_inc(&buf->lost);
+
+		/*
+		 * old and head are always in the same physical buffer, so we
+		 * can subtract them to get the data size.
+		 */
+		local_add(head - old, &buf->data_size);
+	} else {
+		local_set(&buf->data_size, head);
+	}
+}
+
+static void __bts_event_start(struct perf_event *event)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct bts_buffer *buf = perf_get_aux(&bts->handle);
+	u64 config = 0;
+
+	if (!buf || bts_buffer_is_full(buf, bts))
+		return;
+
+	event->hw.state = 0;
+
+	if (!buf->snapshot)
+		config |= ARCH_PERFMON_EVENTSEL_INT;
+	if (!event->attr.exclude_kernel)
+		config |= ARCH_PERFMON_EVENTSEL_OS;
+	if (!event->attr.exclude_user)
+		config |= ARCH_PERFMON_EVENTSEL_USR;
+
+	bts_config_buffer(buf);
+
+	/*
+	 * local barrier to make sure that ds configuration made it
+	 * before we enable BTS
+	 */
+	wmb();
+
+	intel_pmu_enable_bts(config);
+}
+
+static void bts_event_start(struct perf_event *event, int flags)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	__bts_event_start(event);
+
+	/* PMI handler: this counter is running and likely generating PMIs */
+	ACCESS_ONCE(bts->started) = 1;
+}
+
+static void __bts_event_stop(struct perf_event *event)
+{
+	/*
+	 * No extra synchronization is mandated by the documentation to have
+	 * BTS data stores globally visible.
+	 */
+	intel_pmu_disable_bts();
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
+}
+
+static void bts_event_stop(struct perf_event *event, int flags)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	/* PMI handler: don't restart this counter */
+	ACCESS_ONCE(bts->started) = 0;
+
+	__bts_event_stop(event);
+
+	if (flags & PERF_EF_UPDATE)
+		bts_update(bts);
+}
+
+void intel_bts_enable_local(void)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	if (bts->handle.event && bts->started)
+		__bts_event_start(bts->handle.event);
+}
+
+void intel_bts_disable_local(void)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	if (bts->handle.event)
+		__bts_event_stop(bts->handle.event);
+}
+
+static int
+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
+{
+	unsigned long head, space, next_space, pad, gap, skip, wakeup;
+	unsigned int next_buf;
+	struct bts_phys *phys, *next_phys;
+	int ret;
+
+	if (buf->snapshot)
+		return 0;
+
+	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+	if (WARN_ON_ONCE(head != local_read(&buf->head)))
+		return -EINVAL;
+
+	phys = &buf->buf[buf->cur_buf];
+	space = phys->offset + phys->displacement + phys->size - head;
+	pad = space;
+	if (space > handle->size) {
+		space = handle->size;
+		space -= space % BTS_RECORD_SIZE;
+	}
+	if (space <= BTS_SAFETY_MARGIN) {
+		/* See if next phys buffer has more space */
+		next_buf = buf->cur_buf + 1;
+		if (next_buf >= buf->nr_bufs)
+			next_buf = 0;
+		next_phys = &buf->buf[next_buf];
+		gap = buf_size(phys->page) - phys->displacement - phys->size +
+		      next_phys->displacement;
+		skip = pad + gap;
+		if (handle->size >= skip) {
+			next_space = next_phys->size;
+			if (next_space + skip > handle->size) {
+				next_space = handle->size - skip;
+				next_space -= next_space % BTS_RECORD_SIZE;
+			}
+			if (next_space > space || !space) {
+				if (pad)
+					bts_buffer_pad_out(phys, head);
+				ret = perf_aux_output_skip(handle, skip);
+				if (ret)
+					return ret;
+				/* Advance to next phys buffer */
+				phys = next_phys;
+				space = next_space;
+				head = phys->offset + phys->displacement;
+				/*
+				 * After this, cur_buf and head won't match ds
+				 * anymore, so we must not be racing with
+				 * bts_update().
+				 */
+				buf->cur_buf = next_buf;
+				local_set(&buf->head, head);
+			}
+		}
+	}
+
+	/* Don't go far beyond wakeup watermark */
+	wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
+		 handle->head;
+	if (space > wakeup) {
+		space = wakeup;
+		space -= space % BTS_RECORD_SIZE;
+	}
+
+	buf->end = head + space;
+
+	/*
+	 * If we have no space, the lost notification would have been sent when
+	 * we hit absolute_maximum - see bts_update()
+	 */
+	if (!space)
+		return -ENOSPC;
+
+	return 0;
+}
+
+int intel_bts_interrupt(void)
+{
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct perf_event *event = bts->handle.event;
+	struct bts_buffer *buf;
+	s64 old_head;
+	int err;
+
+	if (!event || !bts->started)
+		return 0;
+
+	buf = perf_get_aux(&bts->handle);
+	/*
+	 * Skip snapshot counters: they don't use the interrupt, but
+	 * there's no other way of telling, because the pointer will
+	 * keep moving
+	 */
+	if (!buf || buf->snapshot)
+		return 0;
+
+	old_head = local_read(&buf->head);
+	bts_update(bts);
+
+	/* no new data */
+	if (old_head == local_read(&buf->head))
+		return 0;
+
+	perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+			    !!local_xchg(&buf->lost, 0));
+
+	buf = perf_aux_output_begin(&bts->handle, event);
+	if (!buf)
+		return 1;
+
+	err = bts_buffer_reset(buf, &bts->handle);
+	if (err)
+		perf_aux_output_end(&bts->handle, 0, false);
+
+	return 1;
+}
+
+static void bts_event_del(struct perf_event *event, int mode)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct bts_buffer *buf = perf_get_aux(&bts->handle);
+
+	bts_event_stop(event, PERF_EF_UPDATE);
+
+	if (buf) {
+		if (buf->snapshot)
+			bts->handle.head =
+				local_xchg(&buf->data_size,
+					   buf->nr_pages << PAGE_SHIFT);
+		perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+				    !!local_xchg(&buf->lost, 0));
+	}
+
+	cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
+	cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
+	cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
+	cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
+}
+
+static int bts_event_add(struct perf_event *event, int mode)
+{
+	struct bts_buffer *buf;
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = -EBUSY;
+
+	event->hw.state = PERF_HES_STOPPED;
+
+	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+		return -EBUSY;
+
+	if (bts->handle.event)
+		return -EBUSY;
+
+	buf = perf_aux_output_begin(&bts->handle, event);
+	if (!buf)
+		return -EINVAL;
+
+	ret = bts_buffer_reset(buf, &bts->handle);
+	if (ret) {
+		perf_aux_output_end(&bts->handle, 0, false);
+		return ret;
+	}
+
+	bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
+	bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
+	bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
+
+	if (mode & PERF_EF_START) {
+		bts_event_start(event, 0);
+		if (hwc->state & PERF_HES_STOPPED) {
+			bts_event_del(event, 0);
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+static void bts_event_destroy(struct perf_event *event)
+{
+	x86_del_exclusive(x86_lbr_exclusive_bts);
+}
+
+static int bts_event_init(struct perf_event *event)
+{
+	if (event->attr.type != bts_pmu.type)
+		return -ENOENT;
+
+	if (x86_add_exclusive(x86_lbr_exclusive_bts))
+		return -EBUSY;
+
+	event->destroy = bts_event_destroy;
+
+	return 0;
+}
+
+static void bts_event_read(struct perf_event *event)
+{
+}
+
+static __init int bts_init(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
+		return -ENODEV;
+
+	bts_pmu.capabilities	= PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
+	bts_pmu.task_ctx_nr	= perf_sw_context;
+	bts_pmu.event_init	= bts_event_init;
+	bts_pmu.add		= bts_event_add;
+	bts_pmu.del		= bts_event_del;
+	bts_pmu.start		= bts_event_start;
+	bts_pmu.stop		= bts_event_stop;
+	bts_pmu.read		= bts_event_read;
+	bts_pmu.setup_aux	= bts_buffer_setup_aux;
+	bts_pmu.free_aux	= bts_buffer_free_aux;
+
+	return perf_pmu_register(&bts_pmu, "intel_bts", -1);
+}
+
+module_init(bts_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
new file mode 100644
index 0000000..e4d1b8b
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -0,0 +1,1379 @@
+/*
+ * Intel Cache Quality-of-Service Monitoring (CQM) support.
+ *
+ * Based very, very heavily on work by Peter Zijlstra.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/cpu_device_id.h>
+#include "perf_event.h"
+
+#define MSR_IA32_PQR_ASSOC	0x0c8f
+#define MSR_IA32_QM_CTR		0x0c8e
+#define MSR_IA32_QM_EVTSEL	0x0c8d
+
+static unsigned int cqm_max_rmid = -1;
+static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+
+struct intel_cqm_state {
+	raw_spinlock_t		lock;
+	int			rmid;
+	int			cnt;
+};
+
+static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+
+/*
+ * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
+ * Also protects event->hw.cqm_rmid
+ *
+ * Hold either for stability, both for modification of ->hw.cqm_rmid.
+ */
+static DEFINE_MUTEX(cache_mutex);
+static DEFINE_RAW_SPINLOCK(cache_lock);
+
+/*
+ * Groups of events that have the same target(s), one RMID per group.
+ */
+static LIST_HEAD(cache_groups);
+
+/*
+ * Mask of CPUs for reading CQM values. We only need one per-socket.
+ */
+static cpumask_t cqm_cpumask;
+
+#define RMID_VAL_ERROR		(1ULL << 63)
+#define RMID_VAL_UNAVAIL	(1ULL << 62)
+
+#define QOS_L3_OCCUP_EVENT_ID	(1 << 0)
+
+#define QOS_EVENT_MASK	QOS_L3_OCCUP_EVENT_ID
+
+/*
+ * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
+ *
+ * This rmid is always free and is guaranteed to have an associated
+ * near-zero occupancy value, i.e. no cachelines are tagged with this
+ * RMID, once __intel_cqm_rmid_rotate() returns.
+ */
+static unsigned int intel_cqm_rotation_rmid;
+
+#define INVALID_RMID		(-1)
+
+/*
+ * Is @rmid valid for programming the hardware?
+ *
+ * rmid 0 is reserved by the hardware for all non-monitored tasks, which
+ * means that we should never come across an rmid with that value.
+ * Likewise, an rmid value of -1 is used to indicate "no rmid currently
+ * assigned" and is used as part of the rotation code.
+ */
+static inline bool __rmid_valid(unsigned int rmid)
+{
+	if (!rmid || rmid == INVALID_RMID)
+		return false;
+
+	return true;
+}
+
+static u64 __rmid_read(unsigned int rmid)
+{
+	u64 val;
+
+	/*
+	 * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
+	 * it just says that to increase confusion.
+	 */
+	wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
+	rdmsrl(MSR_IA32_QM_CTR, val);
+
+	/*
+	 * Aside from the ERROR and UNAVAIL bits, assume this thing returns
+	 * the number of cachelines tagged with @rmid.
+	 */
+	return val;
+}
+
+enum rmid_recycle_state {
+	RMID_YOUNG = 0,
+	RMID_AVAILABLE,
+	RMID_DIRTY,
+};
+
+struct cqm_rmid_entry {
+	unsigned int rmid;
+	enum rmid_recycle_state state;
+	struct list_head list;
+	unsigned long queue_time;
+};
+
+/*
+ * cqm_rmid_free_lru - A least recently used list of RMIDs.
+ *
+ * Oldest entry at the head, newest (most recently used) entry at the
+ * tail. This list is never traversed, it's only used to keep track of
+ * the lru order. That is, we only pick entries of the head or insert
+ * them on the tail.
+ *
+ * All entries on the list are 'free', and their RMIDs are not currently
+ * in use. To mark an RMID as in use, remove its entry from the lru
+ * list.
+ *
+ *
+ * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
+ *
+ * This list is contains RMIDs that no one is currently using but that
+ * may have a non-zero occupancy value associated with them. The
+ * rotation worker moves RMIDs from the limbo list to the free list once
+ * the occupancy value drops below __intel_cqm_threshold.
+ *
+ * Both lists are protected by cache_mutex.
+ */
+static LIST_HEAD(cqm_rmid_free_lru);
+static LIST_HEAD(cqm_rmid_limbo_lru);
+
+/*
+ * We use a simple array of pointers so that we can lookup a struct
+ * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
+ * and __put_rmid() from having to worry about dealing with struct
+ * cqm_rmid_entry - they just deal with rmids, i.e. integers.
+ *
+ * Once this array is initialized it is read-only. No locks are required
+ * to access it.
+ *
+ * All entries for all RMIDs can be looked up in the this array at all
+ * times.
+ */
+static struct cqm_rmid_entry **cqm_rmid_ptrs;
+
+static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
+{
+	struct cqm_rmid_entry *entry;
+
+	entry = cqm_rmid_ptrs[rmid];
+	WARN_ON(entry->rmid != rmid);
+
+	return entry;
+}
+
+/*
+ * Returns < 0 on fail.
+ *
+ * We expect to be called with cache_mutex held.
+ */
+static int __get_rmid(void)
+{
+	struct cqm_rmid_entry *entry;
+
+	lockdep_assert_held(&cache_mutex);
+
+	if (list_empty(&cqm_rmid_free_lru))
+		return INVALID_RMID;
+
+	entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
+	list_del(&entry->list);
+
+	return entry->rmid;
+}
+
+static void __put_rmid(unsigned int rmid)
+{
+	struct cqm_rmid_entry *entry;
+
+	lockdep_assert_held(&cache_mutex);
+
+	WARN_ON(!__rmid_valid(rmid));
+	entry = __rmid_entry(rmid);
+
+	entry->queue_time = jiffies;
+	entry->state = RMID_YOUNG;
+
+	list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
+}
+
+static int intel_cqm_setup_rmid_cache(void)
+{
+	struct cqm_rmid_entry *entry;
+	unsigned int nr_rmids;
+	int r = 0;
+
+	nr_rmids = cqm_max_rmid + 1;
+	cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+				nr_rmids, GFP_KERNEL);
+	if (!cqm_rmid_ptrs)
+		return -ENOMEM;
+
+	for (; r <= cqm_max_rmid; r++) {
+		struct cqm_rmid_entry *entry;
+
+		entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+		if (!entry)
+			goto fail;
+
+		INIT_LIST_HEAD(&entry->list);
+		entry->rmid = r;
+		cqm_rmid_ptrs[r] = entry;
+
+		list_add_tail(&entry->list, &cqm_rmid_free_lru);
+	}
+
+	/*
+	 * RMID 0 is special and is always allocated. It's used for all
+	 * tasks that are not monitored.
+	 */
+	entry = __rmid_entry(0);
+	list_del(&entry->list);
+
+	mutex_lock(&cache_mutex);
+	intel_cqm_rotation_rmid = __get_rmid();
+	mutex_unlock(&cache_mutex);
+
+	return 0;
+fail:
+	while (r--)
+		kfree(cqm_rmid_ptrs[r]);
+
+	kfree(cqm_rmid_ptrs);
+	return -ENOMEM;
+}
+
+/*
+ * Determine if @a and @b measure the same set of tasks.
+ *
+ * If @a and @b measure the same set of tasks then we want to share a
+ * single RMID.
+ */
+static bool __match_event(struct perf_event *a, struct perf_event *b)
+{
+	/* Per-cpu and task events don't mix */
+	if ((a->attach_state & PERF_ATTACH_TASK) !=
+	    (b->attach_state & PERF_ATTACH_TASK))
+		return false;
+
+#ifdef CONFIG_CGROUP_PERF
+	if (a->cgrp != b->cgrp)
+		return false;
+#endif
+
+	/* If not task event, we're machine wide */
+	if (!(b->attach_state & PERF_ATTACH_TASK))
+		return true;
+
+	/*
+	 * Events that target same task are placed into the same cache group.
+	 */
+	if (a->hw.target == b->hw.target)
+		return true;
+
+	/*
+	 * Are we an inherited event?
+	 */
+	if (b->parent == a)
+		return true;
+
+	return false;
+}
+
+#ifdef CONFIG_CGROUP_PERF
+static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
+{
+	if (event->attach_state & PERF_ATTACH_TASK)
+		return perf_cgroup_from_task(event->hw.target);
+
+	return event->cgrp;
+}
+#endif
+
+/*
+ * Determine if @a's tasks intersect with @b's tasks
+ *
+ * There are combinations of events that we explicitly prohibit,
+ *
+ *		   PROHIBITS
+ *     system-wide    -> 	cgroup and task
+ *     cgroup 	      ->	system-wide
+ *     		      ->	task in cgroup
+ *     task 	      -> 	system-wide
+ *     		      ->	task in cgroup
+ *
+ * Call this function before allocating an RMID.
+ */
+static bool __conflict_event(struct perf_event *a, struct perf_event *b)
+{
+#ifdef CONFIG_CGROUP_PERF
+	/*
+	 * We can have any number of cgroups but only one system-wide
+	 * event at a time.
+	 */
+	if (a->cgrp && b->cgrp) {
+		struct perf_cgroup *ac = a->cgrp;
+		struct perf_cgroup *bc = b->cgrp;
+
+		/*
+		 * This condition should have been caught in
+		 * __match_event() and we should be sharing an RMID.
+		 */
+		WARN_ON_ONCE(ac == bc);
+
+		if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+		    cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+			return true;
+
+		return false;
+	}
+
+	if (a->cgrp || b->cgrp) {
+		struct perf_cgroup *ac, *bc;
+
+		/*
+		 * cgroup and system-wide events are mutually exclusive
+		 */
+		if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
+		    (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
+			return true;
+
+		/*
+		 * Ensure neither event is part of the other's cgroup
+		 */
+		ac = event_to_cgroup(a);
+		bc = event_to_cgroup(b);
+		if (ac == bc)
+			return true;
+
+		/*
+		 * Must have cgroup and non-intersecting task events.
+		 */
+		if (!ac || !bc)
+			return false;
+
+		/*
+		 * We have cgroup and task events, and the task belongs
+		 * to a cgroup. Check for for overlap.
+		 */
+		if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+		    cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+			return true;
+
+		return false;
+	}
+#endif
+	/*
+	 * If one of them is not a task, same story as above with cgroups.
+	 */
+	if (!(a->attach_state & PERF_ATTACH_TASK) ||
+	    !(b->attach_state & PERF_ATTACH_TASK))
+		return true;
+
+	/*
+	 * Must be non-overlapping.
+	 */
+	return false;
+}
+
+struct rmid_read {
+	unsigned int rmid;
+	atomic64_t value;
+};
+
+static void __intel_cqm_event_count(void *info);
+
+/*
+ * Exchange the RMID of a group of events.
+ */
+static unsigned int
+intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid)
+{
+	struct perf_event *event;
+	unsigned int old_rmid = group->hw.cqm_rmid;
+	struct list_head *head = &group->hw.cqm_group_entry;
+
+	lockdep_assert_held(&cache_mutex);
+
+	/*
+	 * If our RMID is being deallocated, perform a read now.
+	 */
+	if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
+		struct rmid_read rr = {
+			.value = ATOMIC64_INIT(0),
+			.rmid = old_rmid,
+		};
+
+		on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
+				 &rr, 1);
+		local64_set(&group->count, atomic64_read(&rr.value));
+	}
+
+	raw_spin_lock_irq(&cache_lock);
+
+	group->hw.cqm_rmid = rmid;
+	list_for_each_entry(event, head, hw.cqm_group_entry)
+		event->hw.cqm_rmid = rmid;
+
+	raw_spin_unlock_irq(&cache_lock);
+
+	return old_rmid;
+}
+
+/*
+ * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
+ * cachelines are still tagged with RMIDs in limbo, we progressively
+ * increment the threshold until we find an RMID in limbo with <=
+ * __intel_cqm_threshold lines tagged. This is designed to mitigate the
+ * problem where cachelines tagged with an RMID are not steadily being
+ * evicted.
+ *
+ * On successful rotations we decrease the threshold back towards zero.
+ *
+ * __intel_cqm_max_threshold provides an upper bound on the threshold,
+ * and is measured in bytes because it's exposed to userland.
+ */
+static unsigned int __intel_cqm_threshold;
+static unsigned int __intel_cqm_max_threshold;
+
+/*
+ * Test whether an RMID has a zero occupancy value on this cpu.
+ */
+static void intel_cqm_stable(void *arg)
+{
+	struct cqm_rmid_entry *entry;
+
+	list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+		if (entry->state != RMID_AVAILABLE)
+			break;
+
+		if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
+			entry->state = RMID_DIRTY;
+	}
+}
+
+/*
+ * If we have group events waiting for an RMID that don't conflict with
+ * events already running, assign @rmid.
+ */
+static bool intel_cqm_sched_in_event(unsigned int rmid)
+{
+	struct perf_event *leader, *event;
+
+	lockdep_assert_held(&cache_mutex);
+
+	leader = list_first_entry(&cache_groups, struct perf_event,
+				  hw.cqm_groups_entry);
+	event = leader;
+
+	list_for_each_entry_continue(event, &cache_groups,
+				     hw.cqm_groups_entry) {
+		if (__rmid_valid(event->hw.cqm_rmid))
+			continue;
+
+		if (__conflict_event(event, leader))
+			continue;
+
+		intel_cqm_xchg_rmid(event, rmid);
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Initially use this constant for both the limbo queue time and the
+ * rotation timer interval, pmu::hrtimer_interval_ms.
+ *
+ * They don't need to be the same, but the two are related since if you
+ * rotate faster than you recycle RMIDs, you may run out of available
+ * RMIDs.
+ */
+#define RMID_DEFAULT_QUEUE_TIME 250	/* ms */
+
+static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
+
+/*
+ * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
+ * @nr_available: number of freeable RMIDs on the limbo list
+ *
+ * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
+ * cachelines are tagged with those RMIDs. After this we can reuse them
+ * and know that the current set of active RMIDs is stable.
+ *
+ * Return %true or %false depending on whether stabilization needs to be
+ * reattempted.
+ *
+ * If we return %true then @nr_available is updated to indicate the
+ * number of RMIDs on the limbo list that have been queued for the
+ * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
+ * are above __intel_cqm_threshold.
+ */
+static bool intel_cqm_rmid_stabilize(unsigned int *available)
+{
+	struct cqm_rmid_entry *entry, *tmp;
+
+	lockdep_assert_held(&cache_mutex);
+
+	*available = 0;
+	list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+		unsigned long min_queue_time;
+		unsigned long now = jiffies;
+
+		/*
+		 * We hold RMIDs placed into limbo for a minimum queue
+		 * time. Before the minimum queue time has elapsed we do
+		 * not recycle RMIDs.
+		 *
+		 * The reasoning is that until a sufficient time has
+		 * passed since we stopped using an RMID, any RMID
+		 * placed onto the limbo list will likely still have
+		 * data tagged in the cache, which means we'll probably
+		 * fail to recycle it anyway.
+		 *
+		 * We can save ourselves an expensive IPI by skipping
+		 * any RMIDs that have not been queued for the minimum
+		 * time.
+		 */
+		min_queue_time = entry->queue_time +
+			msecs_to_jiffies(__rmid_queue_time_ms);
+
+		if (time_after(min_queue_time, now))
+			break;
+
+		entry->state = RMID_AVAILABLE;
+		(*available)++;
+	}
+
+	/*
+	 * Fast return if none of the RMIDs on the limbo list have been
+	 * sitting on the queue for the minimum queue time.
+	 */
+	if (!*available)
+		return false;
+
+	/*
+	 * Test whether an RMID is free for each package.
+	 */
+	on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
+
+	list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
+		/*
+		 * Exhausted all RMIDs that have waited min queue time.
+		 */
+		if (entry->state == RMID_YOUNG)
+			break;
+
+		if (entry->state == RMID_DIRTY)
+			continue;
+
+		list_del(&entry->list);	/* remove from limbo */
+
+		/*
+		 * The rotation RMID gets priority if it's
+		 * currently invalid. In which case, skip adding
+		 * the RMID to the the free lru.
+		 */
+		if (!__rmid_valid(intel_cqm_rotation_rmid)) {
+			intel_cqm_rotation_rmid = entry->rmid;
+			continue;
+		}
+
+		/*
+		 * If we have groups waiting for RMIDs, hand
+		 * them one now provided they don't conflict.
+		 */
+		if (intel_cqm_sched_in_event(entry->rmid))
+			continue;
+
+		/*
+		 * Otherwise place it onto the free list.
+		 */
+		list_add_tail(&entry->list, &cqm_rmid_free_lru);
+	}
+
+
+	return __rmid_valid(intel_cqm_rotation_rmid);
+}
+
+/*
+ * Pick a victim group and move it to the tail of the group list.
+ * @next: The first group without an RMID
+ */
+static void __intel_cqm_pick_and_rotate(struct perf_event *next)
+{
+	struct perf_event *rotor;
+	unsigned int rmid;
+
+	lockdep_assert_held(&cache_mutex);
+
+	rotor = list_first_entry(&cache_groups, struct perf_event,
+				 hw.cqm_groups_entry);
+
+	/*
+	 * The group at the front of the list should always have a valid
+	 * RMID. If it doesn't then no groups have RMIDs assigned and we
+	 * don't need to rotate the list.
+	 */
+	if (next == rotor)
+		return;
+
+	rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
+	__put_rmid(rmid);
+
+	list_rotate_left(&cache_groups);
+}
+
+/*
+ * Deallocate the RMIDs from any events that conflict with @event, and
+ * place them on the back of the group list.
+ */
+static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
+{
+	struct perf_event *group, *g;
+	unsigned int rmid;
+
+	lockdep_assert_held(&cache_mutex);
+
+	list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
+		if (group == event)
+			continue;
+
+		rmid = group->hw.cqm_rmid;
+
+		/*
+		 * Skip events that don't have a valid RMID.
+		 */
+		if (!__rmid_valid(rmid))
+			continue;
+
+		/*
+		 * No conflict? No problem! Leave the event alone.
+		 */
+		if (!__conflict_event(group, event))
+			continue;
+
+		intel_cqm_xchg_rmid(group, INVALID_RMID);
+		__put_rmid(rmid);
+	}
+}
+
+/*
+ * Attempt to rotate the groups and assign new RMIDs.
+ *
+ * We rotate for two reasons,
+ *   1. To handle the scheduling of conflicting events
+ *   2. To recycle RMIDs
+ *
+ * Rotating RMIDs is complicated because the hardware doesn't give us
+ * any clues.
+ *
+ * There's problems with the hardware interface; when you change the
+ * task:RMID map cachelines retain their 'old' tags, giving a skewed
+ * picture. In order to work around this, we must always keep one free
+ * RMID - intel_cqm_rotation_rmid.
+ *
+ * Rotation works by taking away an RMID from a group (the old RMID),
+ * and assigning the free RMID to another group (the new RMID). We must
+ * then wait for the old RMID to not be used (no cachelines tagged).
+ * This ensure that all cachelines are tagged with 'active' RMIDs. At
+ * this point we can start reading values for the new RMID and treat the
+ * old RMID as the free RMID for the next rotation.
+ *
+ * Return %true or %false depending on whether we did any rotating.
+ */
+static bool __intel_cqm_rmid_rotate(void)
+{
+	struct perf_event *group, *start = NULL;
+	unsigned int threshold_limit;
+	unsigned int nr_needed = 0;
+	unsigned int nr_available;
+	bool rotated = false;
+
+	mutex_lock(&cache_mutex);
+
+again:
+	/*
+	 * Fast path through this function if there are no groups and no
+	 * RMIDs that need cleaning.
+	 */
+	if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
+		goto out;
+
+	list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
+		if (!__rmid_valid(group->hw.cqm_rmid)) {
+			if (!start)
+				start = group;
+			nr_needed++;
+		}
+	}
+
+	/*
+	 * We have some event groups, but they all have RMIDs assigned
+	 * and no RMIDs need cleaning.
+	 */
+	if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
+		goto out;
+
+	if (!nr_needed)
+		goto stabilize;
+
+	/*
+	 * We have more event groups without RMIDs than available RMIDs,
+	 * or we have event groups that conflict with the ones currently
+	 * scheduled.
+	 *
+	 * We force deallocate the rmid of the group at the head of
+	 * cache_groups. The first event group without an RMID then gets
+	 * assigned intel_cqm_rotation_rmid. This ensures we always make
+	 * forward progress.
+	 *
+	 * Rotate the cache_groups list so the previous head is now the
+	 * tail.
+	 */
+	__intel_cqm_pick_and_rotate(start);
+
+	/*
+	 * If the rotation is going to succeed, reduce the threshold so
+	 * that we don't needlessly reuse dirty RMIDs.
+	 */
+	if (__rmid_valid(intel_cqm_rotation_rmid)) {
+		intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
+		intel_cqm_rotation_rmid = __get_rmid();
+
+		intel_cqm_sched_out_conflicting_events(start);
+
+		if (__intel_cqm_threshold)
+			__intel_cqm_threshold--;
+	}
+
+	rotated = true;
+
+stabilize:
+	/*
+	 * We now need to stablize the RMID we freed above (if any) to
+	 * ensure that the next time we rotate we have an RMID with zero
+	 * occupancy value.
+	 *
+	 * Alternatively, if we didn't need to perform any rotation,
+	 * we'll have a bunch of RMIDs in limbo that need stabilizing.
+	 */
+	threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
+
+	while (intel_cqm_rmid_stabilize(&nr_available) &&
+	       __intel_cqm_threshold < threshold_limit) {
+		unsigned int steal_limit;
+
+		/*
+		 * Don't spin if nobody is actively waiting for an RMID,
+		 * the rotation worker will be kicked as soon as an
+		 * event needs an RMID anyway.
+		 */
+		if (!nr_needed)
+			break;
+
+		/* Allow max 25% of RMIDs to be in limbo. */
+		steal_limit = (cqm_max_rmid + 1) / 4;
+
+		/*
+		 * We failed to stabilize any RMIDs so our rotation
+		 * logic is now stuck. In order to make forward progress
+		 * we have a few options:
+		 *
+		 *   1. rotate ("steal") another RMID
+		 *   2. increase the threshold
+		 *   3. do nothing
+		 *
+		 * We do both of 1. and 2. until we hit the steal limit.
+		 *
+		 * The steal limit prevents all RMIDs ending up on the
+		 * limbo list. This can happen if every RMID has a
+		 * non-zero occupancy above threshold_limit, and the
+		 * occupancy values aren't dropping fast enough.
+		 *
+		 * Note that there is prioritisation at work here - we'd
+		 * rather increase the number of RMIDs on the limbo list
+		 * than increase the threshold, because increasing the
+		 * threshold skews the event data (because we reuse
+		 * dirty RMIDs) - threshold bumps are a last resort.
+		 */
+		if (nr_available < steal_limit)
+			goto again;
+
+		__intel_cqm_threshold++;
+	}
+
+out:
+	mutex_unlock(&cache_mutex);
+	return rotated;
+}
+
+static void intel_cqm_rmid_rotate(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
+
+static struct pmu intel_cqm_pmu;
+
+static void intel_cqm_rmid_rotate(struct work_struct *work)
+{
+	unsigned long delay;
+
+	__intel_cqm_rmid_rotate();
+
+	delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
+	schedule_delayed_work(&intel_cqm_rmid_work, delay);
+}
+
+/*
+ * Find a group and setup RMID.
+ *
+ * If we're part of a group, we use the group's RMID.
+ */
+static void intel_cqm_setup_event(struct perf_event *event,
+				  struct perf_event **group)
+{
+	struct perf_event *iter;
+	unsigned int rmid;
+	bool conflict = false;
+
+	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+		rmid = iter->hw.cqm_rmid;
+
+		if (__match_event(iter, event)) {
+			/* All tasks in a group share an RMID */
+			event->hw.cqm_rmid = rmid;
+			*group = iter;
+			return;
+		}
+
+		/*
+		 * We only care about conflicts for events that are
+		 * actually scheduled in (and hence have a valid RMID).
+		 */
+		if (__conflict_event(iter, event) && __rmid_valid(rmid))
+			conflict = true;
+	}
+
+	if (conflict)
+		rmid = INVALID_RMID;
+	else
+		rmid = __get_rmid();
+
+	event->hw.cqm_rmid = rmid;
+}
+
+static void intel_cqm_event_read(struct perf_event *event)
+{
+	unsigned long flags;
+	unsigned int rmid;
+	u64 val;
+
+	/*
+	 * Task events are handled by intel_cqm_event_count().
+	 */
+	if (event->cpu == -1)
+		return;
+
+	raw_spin_lock_irqsave(&cache_lock, flags);
+	rmid = event->hw.cqm_rmid;
+
+	if (!__rmid_valid(rmid))
+		goto out;
+
+	val = __rmid_read(rmid);
+
+	/*
+	 * Ignore this reading on error states and do not update the value.
+	 */
+	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+		goto out;
+
+	local64_set(&event->count, val);
+out:
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+}
+
+static void __intel_cqm_event_count(void *info)
+{
+	struct rmid_read *rr = info;
+	u64 val;
+
+	val = __rmid_read(rr->rmid);
+
+	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+		return;
+
+	atomic64_add(val, &rr->value);
+}
+
+static inline bool cqm_group_leader(struct perf_event *event)
+{
+	return !list_empty(&event->hw.cqm_groups_entry);
+}
+
+static u64 intel_cqm_event_count(struct perf_event *event)
+{
+	unsigned long flags;
+	struct rmid_read rr = {
+		.value = ATOMIC64_INIT(0),
+	};
+
+	/*
+	 * We only need to worry about task events. System-wide events
+	 * are handled like usual, i.e. entirely with
+	 * intel_cqm_event_read().
+	 */
+	if (event->cpu != -1)
+		return __perf_event_count(event);
+
+	/*
+	 * Only the group leader gets to report values. This stops us
+	 * reporting duplicate values to userspace, and gives us a clear
+	 * rule for which task gets to report the values.
+	 *
+	 * Note that it is impossible to attribute these values to
+	 * specific packages - we forfeit that ability when we create
+	 * task events.
+	 */
+	if (!cqm_group_leader(event))
+		return 0;
+
+	/*
+	 * Notice that we don't perform the reading of an RMID
+	 * atomically, because we can't hold a spin lock across the
+	 * IPIs.
+	 *
+	 * Speculatively perform the read, since @event might be
+	 * assigned a different (possibly invalid) RMID while we're
+	 * busying performing the IPI calls. It's therefore necessary to
+	 * check @event's RMID afterwards, and if it has changed,
+	 * discard the result of the read.
+	 */
+	rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
+
+	if (!__rmid_valid(rr.rmid))
+		goto out;
+
+	on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
+
+	raw_spin_lock_irqsave(&cache_lock, flags);
+	if (event->hw.cqm_rmid == rr.rmid)
+		local64_set(&event->count, atomic64_read(&rr.value));
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+out:
+	return __perf_event_count(event);
+}
+
+static void intel_cqm_event_start(struct perf_event *event, int mode)
+{
+	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	unsigned int rmid = event->hw.cqm_rmid;
+	unsigned long flags;
+
+	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
+		return;
+
+	event->hw.cqm_state &= ~PERF_HES_STOPPED;
+
+	raw_spin_lock_irqsave(&state->lock, flags);
+
+	if (state->cnt++)
+		WARN_ON_ONCE(state->rmid != rmid);
+	else
+		WARN_ON_ONCE(state->rmid);
+
+	state->rmid = rmid;
+	wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
+
+	raw_spin_unlock_irqrestore(&state->lock, flags);
+}
+
+static void intel_cqm_event_stop(struct perf_event *event, int mode)
+{
+	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	unsigned long flags;
+
+	if (event->hw.cqm_state & PERF_HES_STOPPED)
+		return;
+
+	event->hw.cqm_state |= PERF_HES_STOPPED;
+
+	raw_spin_lock_irqsave(&state->lock, flags);
+	intel_cqm_event_read(event);
+
+	if (!--state->cnt) {
+		state->rmid = 0;
+		wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+	} else {
+		WARN_ON_ONCE(!state->rmid);
+	}
+
+	raw_spin_unlock_irqrestore(&state->lock, flags);
+}
+
+static int intel_cqm_event_add(struct perf_event *event, int mode)
+{
+	unsigned long flags;
+	unsigned int rmid;
+
+	raw_spin_lock_irqsave(&cache_lock, flags);
+
+	event->hw.cqm_state = PERF_HES_STOPPED;
+	rmid = event->hw.cqm_rmid;
+
+	if (__rmid_valid(rmid) && (mode & PERF_EF_START))
+		intel_cqm_event_start(event, mode);
+
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	return 0;
+}
+
+static void intel_cqm_event_del(struct perf_event *event, int mode)
+{
+	intel_cqm_event_stop(event, mode);
+}
+
+static void intel_cqm_event_destroy(struct perf_event *event)
+{
+	struct perf_event *group_other = NULL;
+
+	mutex_lock(&cache_mutex);
+
+	/*
+	 * If there's another event in this group...
+	 */
+	if (!list_empty(&event->hw.cqm_group_entry)) {
+		group_other = list_first_entry(&event->hw.cqm_group_entry,
+					       struct perf_event,
+					       hw.cqm_group_entry);
+		list_del(&event->hw.cqm_group_entry);
+	}
+
+	/*
+	 * And we're the group leader..
+	 */
+	if (cqm_group_leader(event)) {
+		/*
+		 * If there was a group_other, make that leader, otherwise
+		 * destroy the group and return the RMID.
+		 */
+		if (group_other) {
+			list_replace(&event->hw.cqm_groups_entry,
+				     &group_other->hw.cqm_groups_entry);
+		} else {
+			unsigned int rmid = event->hw.cqm_rmid;
+
+			if (__rmid_valid(rmid))
+				__put_rmid(rmid);
+			list_del(&event->hw.cqm_groups_entry);
+		}
+	}
+
+	mutex_unlock(&cache_mutex);
+}
+
+static int intel_cqm_event_init(struct perf_event *event)
+{
+	struct perf_event *group = NULL;
+	bool rotate = false;
+
+	if (event->attr.type != intel_cqm_pmu.type)
+		return -ENOENT;
+
+	if (event->attr.config & ~QOS_EVENT_MASK)
+		return -EINVAL;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&event->hw.cqm_group_entry);
+	INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
+
+	event->destroy = intel_cqm_event_destroy;
+
+	mutex_lock(&cache_mutex);
+
+	/* Will also set rmid */
+	intel_cqm_setup_event(event, &group);
+
+	if (group) {
+		list_add_tail(&event->hw.cqm_group_entry,
+			      &group->hw.cqm_group_entry);
+	} else {
+		list_add_tail(&event->hw.cqm_groups_entry,
+			      &cache_groups);
+
+		/*
+		 * All RMIDs are either in use or have recently been
+		 * used. Kick the rotation worker to clean/free some.
+		 *
+		 * We only do this for the group leader, rather than for
+		 * every event in a group to save on needless work.
+		 */
+		if (!__rmid_valid(event->hw.cqm_rmid))
+			rotate = true;
+	}
+
+	mutex_unlock(&cache_mutex);
+
+	if (rotate)
+		schedule_delayed_work(&intel_cqm_rmid_work, 0);
+
+	return 0;
+}
+
+EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
+EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
+EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
+EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
+EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
+
+static struct attribute *intel_cqm_events_attr[] = {
+	EVENT_PTR(intel_cqm_llc),
+	EVENT_PTR(intel_cqm_llc_pkg),
+	EVENT_PTR(intel_cqm_llc_unit),
+	EVENT_PTR(intel_cqm_llc_scale),
+	EVENT_PTR(intel_cqm_llc_snapshot),
+	NULL,
+};
+
+static struct attribute_group intel_cqm_events_group = {
+	.name = "events",
+	.attrs = intel_cqm_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+static struct attribute *intel_cqm_formats_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group intel_cqm_format_group = {
+	.name = "format",
+	.attrs = intel_cqm_formats_attr,
+};
+
+static ssize_t
+max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
+			   char *page)
+{
+	ssize_t rv;
+
+	mutex_lock(&cache_mutex);
+	rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
+	mutex_unlock(&cache_mutex);
+
+	return rv;
+}
+
+static ssize_t
+max_recycle_threshold_store(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	unsigned int bytes, cachelines;
+	int ret;
+
+	ret = kstrtouint(buf, 0, &bytes);
+	if (ret)
+		return ret;
+
+	mutex_lock(&cache_mutex);
+
+	__intel_cqm_max_threshold = bytes;
+	cachelines = bytes / cqm_l3_scale;
+
+	/*
+	 * The new maximum takes effect immediately.
+	 */
+	if (__intel_cqm_threshold > cachelines)
+		__intel_cqm_threshold = cachelines;
+
+	mutex_unlock(&cache_mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(max_recycle_threshold);
+
+static struct attribute *intel_cqm_attrs[] = {
+	&dev_attr_max_recycle_threshold.attr,
+	NULL,
+};
+
+static const struct attribute_group intel_cqm_group = {
+	.attrs = intel_cqm_attrs,
+};
+
+static const struct attribute_group *intel_cqm_attr_groups[] = {
+	&intel_cqm_events_group,
+	&intel_cqm_format_group,
+	&intel_cqm_group,
+	NULL,
+};
+
+static struct pmu intel_cqm_pmu = {
+	.hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
+	.attr_groups	     = intel_cqm_attr_groups,
+	.task_ctx_nr	     = perf_sw_context,
+	.event_init	     = intel_cqm_event_init,
+	.add		     = intel_cqm_event_add,
+	.del		     = intel_cqm_event_del,
+	.start		     = intel_cqm_event_start,
+	.stop		     = intel_cqm_event_stop,
+	.read		     = intel_cqm_event_read,
+	.count		     = intel_cqm_event_count,
+};
+
+static inline void cqm_pick_event_reader(int cpu)
+{
+	int phys_id = topology_physical_package_id(cpu);
+	int i;
+
+	for_each_cpu(i, &cqm_cpumask) {
+		if (phys_id == topology_physical_package_id(i))
+			return;	/* already got reader for this socket */
+	}
+
+	cpumask_set_cpu(cpu, &cqm_cpumask);
+}
+
+static void intel_cqm_cpu_prepare(unsigned int cpu)
+{
+	struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+	raw_spin_lock_init(&state->lock);
+	state->rmid = 0;
+	state->cnt  = 0;
+
+	WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
+	WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
+}
+
+static void intel_cqm_cpu_exit(unsigned int cpu)
+{
+	int phys_id = topology_physical_package_id(cpu);
+	int i;
+
+	/*
+	 * Is @cpu a designated cqm reader?
+	 */
+	if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
+		return;
+
+	for_each_online_cpu(i) {
+		if (i == cpu)
+			continue;
+
+		if (phys_id == topology_physical_package_id(i)) {
+			cpumask_set_cpu(i, &cqm_cpumask);
+			break;
+		}
+	}
+}
+
+static int intel_cqm_cpu_notifier(struct notifier_block *nb,
+				  unsigned long action, void *hcpu)
+{
+	unsigned int cpu  = (unsigned long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		intel_cqm_cpu_prepare(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		intel_cqm_cpu_exit(cpu);
+		break;
+	case CPU_STARTING:
+		cqm_pick_event_reader(cpu);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static const struct x86_cpu_id intel_cqm_match[] = {
+	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
+	{}
+};
+
+static int __init intel_cqm_init(void)
+{
+	char *str, scale[20];
+	int i, cpu, ret;
+
+	if (!x86_match_cpu(intel_cqm_match))
+		return -ENODEV;
+
+	cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
+
+	/*
+	 * It's possible that not all resources support the same number
+	 * of RMIDs. Instead of making scheduling much more complicated
+	 * (where we have to match a task's RMID to a cpu that supports
+	 * that many RMIDs) just find the minimum RMIDs supported across
+	 * all cpus.
+	 *
+	 * Also, check that the scales match on all cpus.
+	 */
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(cpu) {
+		struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+		if (c->x86_cache_max_rmid < cqm_max_rmid)
+			cqm_max_rmid = c->x86_cache_max_rmid;
+
+		if (c->x86_cache_occ_scale != cqm_l3_scale) {
+			pr_err("Multiple LLC scale values, disabling\n");
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	/*
+	 * A reasonable upper limit on the max threshold is the number
+	 * of lines tagged per RMID if all RMIDs have the same number of
+	 * lines tagged in the LLC.
+	 *
+	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
+	 */
+	__intel_cqm_max_threshold =
+		boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
+
+	snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
+	str = kstrdup(scale, GFP_KERNEL);
+	if (!str) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	event_attr_intel_cqm_llc_scale.event_str = str;
+
+	ret = intel_cqm_setup_rmid_cache();
+	if (ret)
+		goto out;
+
+	for_each_online_cpu(i) {
+		intel_cqm_cpu_prepare(i);
+		cqm_pick_event_reader(i);
+	}
+
+	__perf_cpu_notifier(intel_cqm_cpu_notifier);
+
+	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
+	if (ret)
+		pr_err("Intel CQM perf registration failed: %d\n", ret);
+	else
+		pr_info("Intel CQM monitoring enabled\n");
+
+out:
+	cpu_notifier_register_done();
+
+	return ret;
+}
+device_initcall(intel_cqm_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 0739833..ca69ea5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -461,7 +461,8 @@
 
 	debugctlmsr |= DEBUGCTLMSR_TR;
 	debugctlmsr |= DEBUGCTLMSR_BTS;
-	debugctlmsr |= DEBUGCTLMSR_BTINT;
+	if (config & ARCH_PERFMON_EVENTSEL_INT)
+		debugctlmsr |= DEBUGCTLMSR_BTINT;
 
 	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
 		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
@@ -611,6 +612,10 @@
 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 	/* Allow all events as PEBS with no flags */
 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 	EVENT_CONSTRAINT_END
@@ -622,6 +627,10 @@
 	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 	/* Allow all events as PEBS with no flags */
 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
         EVENT_CONSTRAINT_END
@@ -633,16 +642,16 @@
 	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
-	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
-	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
-	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
-	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
 	/* Allow all events as PEBS with no flags */
 	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 	EVENT_CONSTRAINT_END
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 58f1a94..94e5b50 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -39,6 +39,7 @@
 #define LBR_IND_JMP_BIT		6 /* do not capture indirect jumps */
 #define LBR_REL_JMP_BIT		7 /* do not capture relative jumps */
 #define LBR_FAR_BIT		8 /* do not capture far branches */
+#define LBR_CALL_STACK_BIT	9 /* enable call stack */
 
 #define LBR_KERNEL	(1 << LBR_KERNEL_BIT)
 #define LBR_USER	(1 << LBR_USER_BIT)
@@ -49,6 +50,7 @@
 #define LBR_REL_JMP	(1 << LBR_REL_JMP_BIT)
 #define LBR_IND_JMP	(1 << LBR_IND_JMP_BIT)
 #define LBR_FAR		(1 << LBR_FAR_BIT)
+#define LBR_CALL_STACK	(1 << LBR_CALL_STACK_BIT)
 
 #define LBR_PLM (LBR_KERNEL | LBR_USER)
 
@@ -69,33 +71,31 @@
 #define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
 #define LBR_FROM_FLAG_ABORT    (1ULL << 61)
 
-#define for_each_branch_sample_type(x) \
-	for ((x) = PERF_SAMPLE_BRANCH_USER; \
-	     (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
-
 /*
  * x86control flow change classification
  * x86control flow changes include branches, interrupts, traps, faults
  */
 enum {
-	X86_BR_NONE     = 0,      /* unknown */
+	X86_BR_NONE		= 0,      /* unknown */
 
-	X86_BR_USER     = 1 << 0, /* branch target is user */
-	X86_BR_KERNEL   = 1 << 1, /* branch target is kernel */
+	X86_BR_USER		= 1 << 0, /* branch target is user */
+	X86_BR_KERNEL		= 1 << 1, /* branch target is kernel */
 
-	X86_BR_CALL     = 1 << 2, /* call */
-	X86_BR_RET      = 1 << 3, /* return */
-	X86_BR_SYSCALL  = 1 << 4, /* syscall */
-	X86_BR_SYSRET   = 1 << 5, /* syscall return */
-	X86_BR_INT      = 1 << 6, /* sw interrupt */
-	X86_BR_IRET     = 1 << 7, /* return from interrupt */
-	X86_BR_JCC      = 1 << 8, /* conditional */
-	X86_BR_JMP      = 1 << 9, /* jump */
-	X86_BR_IRQ      = 1 << 10,/* hw interrupt or trap or fault */
-	X86_BR_IND_CALL = 1 << 11,/* indirect calls */
-	X86_BR_ABORT    = 1 << 12,/* transaction abort */
-	X86_BR_IN_TX    = 1 << 13,/* in transaction */
-	X86_BR_NO_TX    = 1 << 14,/* not in transaction */
+	X86_BR_CALL		= 1 << 2, /* call */
+	X86_BR_RET		= 1 << 3, /* return */
+	X86_BR_SYSCALL		= 1 << 4, /* syscall */
+	X86_BR_SYSRET		= 1 << 5, /* syscall return */
+	X86_BR_INT		= 1 << 6, /* sw interrupt */
+	X86_BR_IRET		= 1 << 7, /* return from interrupt */
+	X86_BR_JCC		= 1 << 8, /* conditional */
+	X86_BR_JMP		= 1 << 9, /* jump */
+	X86_BR_IRQ		= 1 << 10,/* hw interrupt or trap or fault */
+	X86_BR_IND_CALL		= 1 << 11,/* indirect calls */
+	X86_BR_ABORT		= 1 << 12,/* transaction abort */
+	X86_BR_IN_TX		= 1 << 13,/* in transaction */
+	X86_BR_NO_TX		= 1 << 14,/* not in transaction */
+	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
+	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -112,13 +112,15 @@
 	 X86_BR_JMP	 |\
 	 X86_BR_IRQ	 |\
 	 X86_BR_ABORT	 |\
-	 X86_BR_IND_CALL)
+	 X86_BR_IND_CALL |\
+	 X86_BR_ZERO_CALL)
 
 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
 
 #define X86_BR_ANY_CALL		 \
 	(X86_BR_CALL		|\
 	 X86_BR_IND_CALL	|\
+	 X86_BR_ZERO_CALL	|\
 	 X86_BR_SYSCALL		|\
 	 X86_BR_IRQ		|\
 	 X86_BR_INT)
@@ -130,17 +132,32 @@
  * otherwise it becomes near impossible to get a reliable stack.
  */
 
-static void __intel_pmu_lbr_enable(void)
+static void __intel_pmu_lbr_enable(bool pmi)
 {
-	u64 debugctl;
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 debugctl, lbr_select = 0, orig_debugctl;
 
-	if (cpuc->lbr_sel)
-		wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
+	/*
+	 * No need to reprogram LBR_SELECT in a PMI, as it
+	 * did not change.
+	 */
+	if (cpuc->lbr_sel && !pmi) {
+		lbr_select = cpuc->lbr_sel->config;
+		wrmsrl(MSR_LBR_SELECT, lbr_select);
+	}
 
 	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-	debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
-	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+	orig_debugctl = debugctl;
+	debugctl |= DEBUGCTLMSR_LBR;
+	/*
+	 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
+	 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
+	 * may cause superfluous increase/decrease of LBR_TOS.
+	 */
+	if (!(lbr_select & LBR_CALL_STACK))
+		debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+	if (orig_debugctl != debugctl)
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 }
 
 static void __intel_pmu_lbr_disable(void)
@@ -181,9 +198,116 @@
 		intel_pmu_lbr_reset_64();
 }
 
+/*
+ * TOS = most recently recorded branch
+ */
+static inline u64 intel_pmu_lbr_tos(void)
+{
+	u64 tos;
+
+	rdmsrl(x86_pmu.lbr_tos, tos);
+	return tos;
+}
+
+enum {
+	LBR_NONE,
+	LBR_VALID,
+};
+
+static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
+{
+	int i;
+	unsigned lbr_idx, mask;
+	u64 tos;
+
+	if (task_ctx->lbr_callstack_users == 0 ||
+	    task_ctx->lbr_stack_state == LBR_NONE) {
+		intel_pmu_lbr_reset();
+		return;
+	}
+
+	mask = x86_pmu.lbr_nr - 1;
+	tos = intel_pmu_lbr_tos();
+	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+		lbr_idx = (tos - i) & mask;
+		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+	}
+	task_ctx->lbr_stack_state = LBR_NONE;
+}
+
+static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
+{
+	int i;
+	unsigned lbr_idx, mask;
+	u64 tos;
+
+	if (task_ctx->lbr_callstack_users == 0) {
+		task_ctx->lbr_stack_state = LBR_NONE;
+		return;
+	}
+
+	mask = x86_pmu.lbr_nr - 1;
+	tos = intel_pmu_lbr_tos();
+	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+		lbr_idx = (tos - i) & mask;
+		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+	}
+	task_ctx->lbr_stack_state = LBR_VALID;
+}
+
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct x86_perf_task_context *task_ctx;
+
+	if (!x86_pmu.lbr_nr)
+		return;
+
+	/*
+	 * If LBR callstack feature is enabled and the stack was saved when
+	 * the task was scheduled out, restore the stack. Otherwise flush
+	 * the LBR stack.
+	 */
+	task_ctx = ctx ? ctx->task_ctx_data : NULL;
+	if (task_ctx) {
+		if (sched_in) {
+			__intel_pmu_lbr_restore(task_ctx);
+			cpuc->lbr_context = ctx;
+		} else {
+			__intel_pmu_lbr_save(task_ctx);
+		}
+		return;
+	}
+
+	/*
+	 * When sampling the branck stack in system-wide, it may be
+	 * necessary to flush the stack on context switch. This happens
+	 * when the branch stack does not tag its entries with the pid
+	 * of the current task. Otherwise it becomes impossible to
+	 * associate a branch entry with a task. This ambiguity is more
+	 * likely to appear when the branch stack supports priv level
+	 * filtering and the user sets it to monitor only at the user
+	 * level (which could be a useful measurement in system-wide
+	 * mode). In that case, the risk is high of having a branch
+	 * stack with branch from multiple tasks.
+ 	 */
+	if (sched_in) {
+		intel_pmu_lbr_reset();
+		cpuc->lbr_context = ctx;
+	}
+}
+
+static inline bool branch_user_callstack(unsigned br_sel)
+{
+	return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
+}
+
 void intel_pmu_lbr_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct x86_perf_task_context *task_ctx;
 
 	if (!x86_pmu.lbr_nr)
 		return;
@@ -198,18 +322,33 @@
 	}
 	cpuc->br_sel = event->hw.branch_reg.reg;
 
+	if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+					event->ctx->task_ctx_data) {
+		task_ctx = event->ctx->task_ctx_data;
+		task_ctx->lbr_callstack_users++;
+	}
+
 	cpuc->lbr_users++;
+	perf_sched_cb_inc(event->ctx->pmu);
 }
 
 void intel_pmu_lbr_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct x86_perf_task_context *task_ctx;
 
 	if (!x86_pmu.lbr_nr)
 		return;
 
+	if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+					event->ctx->task_ctx_data) {
+		task_ctx = event->ctx->task_ctx_data;
+		task_ctx->lbr_callstack_users--;
+	}
+
 	cpuc->lbr_users--;
 	WARN_ON_ONCE(cpuc->lbr_users < 0);
+	perf_sched_cb_dec(event->ctx->pmu);
 
 	if (cpuc->enabled && !cpuc->lbr_users) {
 		__intel_pmu_lbr_disable();
@@ -218,12 +357,12 @@
 	}
 }
 
-void intel_pmu_lbr_enable_all(void)
+void intel_pmu_lbr_enable_all(bool pmi)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	if (cpuc->lbr_users)
-		__intel_pmu_lbr_enable();
+		__intel_pmu_lbr_enable(pmi);
 }
 
 void intel_pmu_lbr_disable_all(void)
@@ -234,18 +373,6 @@
 		__intel_pmu_lbr_disable();
 }
 
-/*
- * TOS = most recently recorded branch
- */
-static inline u64 intel_pmu_lbr_tos(void)
-{
-	u64 tos;
-
-	rdmsrl(x86_pmu.lbr_tos, tos);
-
-	return tos;
-}
-
 static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 {
 	unsigned long mask = x86_pmu.lbr_nr - 1;
@@ -350,7 +477,7 @@
  * - in case there is no HW filter
  * - in case the HW filter has errata or limitations
  */
-static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 {
 	u64 br_type = event->attr.branch_sample_type;
 	int mask = 0;
@@ -387,11 +514,21 @@
 	if (br_type & PERF_SAMPLE_BRANCH_COND)
 		mask |= X86_BR_JCC;
 
+	if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+		if (!x86_pmu_has_lbr_callstack())
+			return -EOPNOTSUPP;
+		if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
+			return -EINVAL;
+		mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
+			X86_BR_CALL_STACK;
+	}
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
 	 */
 	event->hw.branch_reg.reg = mask;
+	return 0;
 }
 
 /*
@@ -403,14 +540,14 @@
 {
 	struct hw_perf_event_extra *reg;
 	u64 br_type = event->attr.branch_sample_type;
-	u64 mask = 0, m;
-	u64 v;
+	u64 mask = 0, v;
+	int i;
 
-	for_each_branch_sample_type(m) {
-		if (!(br_type & m))
+	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+		if (!(br_type & (1ULL << i)))
 			continue;
 
-		v = x86_pmu.lbr_sel_map[m];
+		v = x86_pmu.lbr_sel_map[i];
 		if (v == LBR_NOT_SUPP)
 			return -EOPNOTSUPP;
 
@@ -420,8 +557,12 @@
 	reg = &event->hw.branch_reg;
 	reg->idx = EXTRA_REG_LBR;
 
-	/* LBR_SELECT operates in suppress mode so invert mask */
-	reg->config = ~mask & x86_pmu.lbr_sel_mask;
+	/*
+	 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
+	 * in suppress mode. So LBR_SELECT should be set to
+	 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
+	 */
+	reg->config = mask ^ x86_pmu.lbr_sel_mask;
 
 	return 0;
 }
@@ -439,7 +580,9 @@
 	/*
 	 * setup SW LBR filter
 	 */
-	intel_pmu_setup_sw_lbr_filter(event);
+	ret = intel_pmu_setup_sw_lbr_filter(event);
+	if (ret)
+		return ret;
 
 	/*
 	 * setup HW LBR filter, if any
@@ -568,6 +711,12 @@
 		ret = X86_BR_INT;
 		break;
 	case 0xe8: /* call near rel */
+		insn_get_immediate(&insn);
+		if (insn.immediate1.value == 0) {
+			/* zero length call */
+			ret = X86_BR_ZERO_CALL;
+			break;
+		}
 	case 0x9a: /* call far absolute */
 		ret = X86_BR_CALL;
 		break;
@@ -678,35 +827,49 @@
 /*
  * Map interface branch filters onto LBR filters
  */
-static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
-	[PERF_SAMPLE_BRANCH_ANY]	= LBR_ANY,
-	[PERF_SAMPLE_BRANCH_USER]	= LBR_USER,
-	[PERF_SAMPLE_BRANCH_KERNEL]	= LBR_KERNEL,
-	[PERF_SAMPLE_BRANCH_HV]		= LBR_IGN,
-	[PERF_SAMPLE_BRANCH_ANY_RETURN]	= LBR_RETURN | LBR_REL_JMP
-					| LBR_IND_JMP | LBR_FAR,
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
+	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
+	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
+	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
+	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_REL_JMP
+						| LBR_IND_JMP | LBR_FAR,
 	/*
 	 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
 	 */
-	[PERF_SAMPLE_BRANCH_ANY_CALL] =
+	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
 	 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
 	/*
 	 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
 	 */
-	[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
-	[PERF_SAMPLE_BRANCH_COND]     = LBR_JCC,
+	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
+	[PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
 };
 
-static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
-	[PERF_SAMPLE_BRANCH_ANY]	= LBR_ANY,
-	[PERF_SAMPLE_BRANCH_USER]	= LBR_USER,
-	[PERF_SAMPLE_BRANCH_KERNEL]	= LBR_KERNEL,
-	[PERF_SAMPLE_BRANCH_HV]		= LBR_IGN,
-	[PERF_SAMPLE_BRANCH_ANY_RETURN]	= LBR_RETURN | LBR_FAR,
-	[PERF_SAMPLE_BRANCH_ANY_CALL]	= LBR_REL_CALL | LBR_IND_CALL
-					| LBR_FAR,
-	[PERF_SAMPLE_BRANCH_IND_CALL]	= LBR_IND_CALL,
-	[PERF_SAMPLE_BRANCH_COND]       = LBR_JCC,
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
+	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
+	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
+	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
+	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
+	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
+						| LBR_FAR,
+	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
+	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
+};
+
+static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
+	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
+	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
+	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
+	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
+	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
+						| LBR_FAR,
+	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
+	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
+	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
+						| LBR_RETURN | LBR_CALL_STACK,
 };
 
 /* core */
@@ -765,6 +928,20 @@
 	pr_cont("16-deep LBR, ");
 }
 
+/* haswell */
+void intel_pmu_lbr_init_hsw(void)
+{
+	x86_pmu.lbr_nr	 = 16;
+	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
+	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+	pr_cont("16-deep LBR, ");
+}
+
 /* atom */
 void __init intel_pmu_lbr_init_atom(void)
 {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
new file mode 100644
index 0000000..f277064
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -0,0 +1,1103 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include <asm/perf_event.h>
+#include <asm/insn.h>
+#include <asm/io.h>
+
+#include "perf_event.h"
+#include "intel_pt.h"
+
+static DEFINE_PER_CPU(struct pt, pt_ctx);
+
+static struct pt_pmu pt_pmu;
+
+enum cpuid_regs {
+	CR_EAX = 0,
+	CR_ECX,
+	CR_EDX,
+	CR_EBX
+};
+
+/*
+ * Capabilities of Intel PT hardware, such as number of address bits or
+ * supported output schemes, are cached and exported to userspace as "caps"
+ * attribute group of pt pmu device
+ * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
+ * relevant bits together with intel_pt traces.
+ *
+ * These are necessary for both trace decoding (payloads_lip, contains address
+ * width encoded in IP-related packets), and event configuration (bitmasks with
+ * permitted values for certain bit fields).
+ */
+#define PT_CAP(_n, _l, _r, _m)						\
+	[PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l,	\
+			    .reg = _r, .mask = _m }
+
+static struct pt_cap_desc {
+	const char	*name;
+	u32		leaf;
+	u8		reg;
+	u32		mask;
+} pt_caps[] = {
+	PT_CAP(max_subleaf,		0, CR_EAX, 0xffffffff),
+	PT_CAP(cr3_filtering,		0, CR_EBX, BIT(0)),
+	PT_CAP(topa_output,		0, CR_ECX, BIT(0)),
+	PT_CAP(topa_multiple_entries,	0, CR_ECX, BIT(1)),
+	PT_CAP(payloads_lip,		0, CR_ECX, BIT(31)),
+};
+
+static u32 pt_cap_get(enum pt_capabilities cap)
+{
+	struct pt_cap_desc *cd = &pt_caps[cap];
+	u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg];
+	unsigned int shift = __ffs(cd->mask);
+
+	return (c & cd->mask) >> shift;
+}
+
+static ssize_t pt_cap_show(struct device *cdev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct dev_ext_attribute *ea =
+		container_of(attr, struct dev_ext_attribute, attr);
+	enum pt_capabilities cap = (long)ea->var;
+
+	return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
+}
+
+static struct attribute_group pt_cap_group = {
+	.name	= "caps",
+};
+
+PMU_FORMAT_ATTR(tsc,		"config:10"	);
+PMU_FORMAT_ATTR(noretcomp,	"config:11"	);
+
+static struct attribute *pt_formats_attr[] = {
+	&format_attr_tsc.attr,
+	&format_attr_noretcomp.attr,
+	NULL,
+};
+
+static struct attribute_group pt_format_group = {
+	.name	= "format",
+	.attrs	= pt_formats_attr,
+};
+
+static const struct attribute_group *pt_attr_groups[] = {
+	&pt_cap_group,
+	&pt_format_group,
+	NULL,
+};
+
+static int __init pt_pmu_hw_init(void)
+{
+	struct dev_ext_attribute *de_attrs;
+	struct attribute **attrs;
+	size_t size;
+	int ret;
+	long i;
+
+	attrs = NULL;
+	ret = -ENODEV;
+	if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+		goto fail;
+
+	for (i = 0; i < PT_CPUID_LEAVES; i++) {
+		cpuid_count(20, i,
+			    &pt_pmu.caps[CR_EAX + i*4],
+			    &pt_pmu.caps[CR_EBX + i*4],
+			    &pt_pmu.caps[CR_ECX + i*4],
+			    &pt_pmu.caps[CR_EDX + i*4]);
+	}
+
+	ret = -ENOMEM;
+	size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
+	attrs = kzalloc(size, GFP_KERNEL);
+	if (!attrs)
+		goto fail;
+
+	size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
+	de_attrs = kzalloc(size, GFP_KERNEL);
+	if (!de_attrs)
+		goto fail;
+
+	for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
+		struct dev_ext_attribute *de_attr = de_attrs + i;
+
+		de_attr->attr.attr.name = pt_caps[i].name;
+
+		sysfs_attr_init(&de_attrs->attr.attr);
+
+		de_attr->attr.attr.mode		= S_IRUGO;
+		de_attr->attr.show		= pt_cap_show;
+		de_attr->var			= (void *)i;
+
+		attrs[i] = &de_attr->attr.attr;
+	}
+
+	pt_cap_group.attrs = attrs;
+
+	return 0;
+
+fail:
+	kfree(attrs);
+
+	return ret;
+}
+
+#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC)
+
+static bool pt_event_valid(struct perf_event *event)
+{
+	u64 config = event->attr.config;
+
+	if ((config & PT_CONFIG_MASK) != config)
+		return false;
+
+	return true;
+}
+
+/*
+ * PT configuration helpers
+ * These all are cpu affine and operate on a local PT
+ */
+
+static bool pt_is_running(void)
+{
+	u64 ctl;
+
+	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+	return !!(ctl & RTIT_CTL_TRACEEN);
+}
+
+static void pt_config(struct perf_event *event)
+{
+	u64 reg;
+
+	reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+
+	if (!event->attr.exclude_kernel)
+		reg |= RTIT_CTL_OS;
+	if (!event->attr.exclude_user)
+		reg |= RTIT_CTL_USR;
+
+	reg |= (event->attr.config & PT_CONFIG_MASK);
+
+	wrmsrl(MSR_IA32_RTIT_CTL, reg);
+}
+
+static void pt_config_start(bool start)
+{
+	u64 ctl;
+
+	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
+	if (start)
+		ctl |= RTIT_CTL_TRACEEN;
+	else
+		ctl &= ~RTIT_CTL_TRACEEN;
+	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+	/*
+	 * A wrmsr that disables trace generation serializes other PT
+	 * registers and causes all data packets to be written to memory,
+	 * but a fence is required for the data to become globally visible.
+	 *
+	 * The below WMB, separating data store and aux_head store matches
+	 * the consumer's RMB that separates aux_head load and data load.
+	 */
+	if (!start)
+		wmb();
+}
+
+static void pt_config_buffer(void *buf, unsigned int topa_idx,
+			     unsigned int output_off)
+{
+	u64 reg;
+
+	wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
+
+	reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
+
+	wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+}
+
+/*
+ * Keep ToPA table-related metadata on the same page as the actual table,
+ * taking up a few words from the top
+ */
+
+#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
+
+/**
+ * struct topa - page-sized ToPA table with metadata at the top
+ * @table:	actual ToPA table entries, as understood by PT hardware
+ * @list:	linkage to struct pt_buffer's list of tables
+ * @phys:	physical address of this page
+ * @offset:	offset of the first entry in this table in the buffer
+ * @size:	total size of all entries in this table
+ * @last:	index of the last initialized entry in this table
+ */
+struct topa {
+	struct topa_entry	table[TENTS_PER_PAGE];
+	struct list_head	list;
+	u64			phys;
+	u64			offset;
+	size_t			size;
+	int			last;
+};
+
+/* make -1 stand for the last table entry */
+#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
+
+/**
+ * topa_alloc() - allocate page-sized ToPA table
+ * @cpu:	CPU on which to allocate.
+ * @gfp:	Allocation flags.
+ *
+ * Return:	On success, return the pointer to ToPA table page.
+ */
+static struct topa *topa_alloc(int cpu, gfp_t gfp)
+{
+	int node = cpu_to_node(cpu);
+	struct topa *topa;
+	struct page *p;
+
+	p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
+	if (!p)
+		return NULL;
+
+	topa = page_address(p);
+	topa->last = 0;
+	topa->phys = page_to_phys(p);
+
+	/*
+	 * In case of singe-entry ToPA, always put the self-referencing END
+	 * link as the 2nd entry in the table
+	 */
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
+		TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
+		TOPA_ENTRY(topa, 1)->end = 1;
+	}
+
+	return topa;
+}
+
+/**
+ * topa_free() - free a page-sized ToPA table
+ * @topa:	Table to deallocate.
+ */
+static void topa_free(struct topa *topa)
+{
+	free_page((unsigned long)topa);
+}
+
+/**
+ * topa_insert_table() - insert a ToPA table into a buffer
+ * @buf:	 PT buffer that's being extended.
+ * @topa:	 New topa table to be inserted.
+ *
+ * If it's the first table in this buffer, set up buffer's pointers
+ * accordingly; otherwise, add a END=1 link entry to @topa to the current
+ * "last" table and adjust the last table pointer to @topa.
+ */
+static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
+{
+	struct topa *last = buf->last;
+
+	list_add_tail(&topa->list, &buf->tables);
+
+	if (!buf->first) {
+		buf->first = buf->last = buf->cur = topa;
+		return;
+	}
+
+	topa->offset = last->offset + last->size;
+	buf->last = topa;
+
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+		return;
+
+	BUG_ON(last->last != TENTS_PER_PAGE - 1);
+
+	TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
+	TOPA_ENTRY(last, -1)->end = 1;
+}
+
+/**
+ * topa_table_full() - check if a ToPA table is filled up
+ * @topa:	ToPA table.
+ */
+static bool topa_table_full(struct topa *topa)
+{
+	/* single-entry ToPA is a special case */
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+		return !!topa->last;
+
+	return topa->last == TENTS_PER_PAGE - 1;
+}
+
+/**
+ * topa_insert_pages() - create a list of ToPA tables
+ * @buf:	PT buffer being initialized.
+ * @gfp:	Allocation flags.
+ *
+ * This initializes a list of ToPA tables with entries from
+ * the data_pages provided by rb_alloc_aux().
+ *
+ * Return:	0 on success or error code.
+ */
+static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
+{
+	struct topa *topa = buf->last;
+	int order = 0;
+	struct page *p;
+
+	p = virt_to_page(buf->data_pages[buf->nr_pages]);
+	if (PagePrivate(p))
+		order = page_private(p);
+
+	if (topa_table_full(topa)) {
+		topa = topa_alloc(buf->cpu, gfp);
+		if (!topa)
+			return -ENOMEM;
+
+		topa_insert_table(buf, topa);
+	}
+
+	TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
+	TOPA_ENTRY(topa, -1)->size = order;
+	if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
+		TOPA_ENTRY(topa, -1)->intr = 1;
+		TOPA_ENTRY(topa, -1)->stop = 1;
+	}
+
+	topa->last++;
+	topa->size += sizes(order);
+
+	buf->nr_pages += 1ul << order;
+
+	return 0;
+}
+
+/**
+ * pt_topa_dump() - print ToPA tables and their entries
+ * @buf:	PT buffer.
+ */
+static void pt_topa_dump(struct pt_buffer *buf)
+{
+	struct topa *topa;
+
+	list_for_each_entry(topa, &buf->tables, list) {
+		int i;
+
+		pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
+			 topa->phys, topa->offset, topa->size);
+		for (i = 0; i < TENTS_PER_PAGE; i++) {
+			pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
+				 &topa->table[i],
+				 (unsigned long)topa->table[i].base << TOPA_SHIFT,
+				 sizes(topa->table[i].size),
+				 topa->table[i].end ?  'E' : ' ',
+				 topa->table[i].intr ? 'I' : ' ',
+				 topa->table[i].stop ? 'S' : ' ',
+				 *(u64 *)&topa->table[i]);
+			if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
+			     topa->table[i].stop) ||
+			    topa->table[i].end)
+				break;
+		}
+	}
+}
+
+/**
+ * pt_buffer_advance() - advance to the next output region
+ * @buf:	PT buffer.
+ *
+ * Advance the current pointers in the buffer to the next ToPA entry.
+ */
+static void pt_buffer_advance(struct pt_buffer *buf)
+{
+	buf->output_off = 0;
+	buf->cur_idx++;
+
+	if (buf->cur_idx == buf->cur->last) {
+		if (buf->cur == buf->last)
+			buf->cur = buf->first;
+		else
+			buf->cur = list_entry(buf->cur->list.next, struct topa,
+					      list);
+		buf->cur_idx = 0;
+	}
+}
+
+/**
+ * pt_update_head() - calculate current offsets and sizes
+ * @pt:		Per-cpu pt context.
+ *
+ * Update buffer's current write pointer position and data size.
+ */
+static void pt_update_head(struct pt *pt)
+{
+	struct pt_buffer *buf = perf_get_aux(&pt->handle);
+	u64 topa_idx, base, old;
+
+	/* offset of the first region in this table from the beginning of buf */
+	base = buf->cur->offset + buf->output_off;
+
+	/* offset of the current output region within this table */
+	for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
+		base += sizes(buf->cur->table[topa_idx].size);
+
+	if (buf->snapshot) {
+		local_set(&buf->data_size, base);
+	} else {
+		old = (local64_xchg(&buf->head, base) &
+		       ((buf->nr_pages << PAGE_SHIFT) - 1));
+		if (base < old)
+			base += buf->nr_pages << PAGE_SHIFT;
+
+		local_add(base - old, &buf->data_size);
+	}
+}
+
+/**
+ * pt_buffer_region() - obtain current output region's address
+ * @buf:	PT buffer.
+ */
+static void *pt_buffer_region(struct pt_buffer *buf)
+{
+	return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
+}
+
+/**
+ * pt_buffer_region_size() - obtain current output region's size
+ * @buf:	PT buffer.
+ */
+static size_t pt_buffer_region_size(struct pt_buffer *buf)
+{
+	return sizes(buf->cur->table[buf->cur_idx].size);
+}
+
+/**
+ * pt_handle_status() - take care of possible status conditions
+ * @pt:		Per-cpu pt context.
+ */
+static void pt_handle_status(struct pt *pt)
+{
+	struct pt_buffer *buf = perf_get_aux(&pt->handle);
+	int advance = 0;
+	u64 status;
+
+	rdmsrl(MSR_IA32_RTIT_STATUS, status);
+
+	if (status & RTIT_STATUS_ERROR) {
+		pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
+		pt_topa_dump(buf);
+		status &= ~RTIT_STATUS_ERROR;
+	}
+
+	if (status & RTIT_STATUS_STOPPED) {
+		status &= ~RTIT_STATUS_STOPPED;
+
+		/*
+		 * On systems that only do single-entry ToPA, hitting STOP
+		 * means we are already losing data; need to let the decoder
+		 * know.
+		 */
+		if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
+		    buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+			local_inc(&buf->lost);
+			advance++;
+		}
+	}
+
+	/*
+	 * Also on single-entry ToPA implementations, interrupt will come
+	 * before the output reaches its output region's boundary.
+	 */
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
+	    pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
+		void *head = pt_buffer_region(buf);
+
+		/* everything within this margin needs to be zeroed out */
+		memset(head + buf->output_off, 0,
+		       pt_buffer_region_size(buf) -
+		       buf->output_off);
+		advance++;
+	}
+
+	if (advance)
+		pt_buffer_advance(buf);
+
+	wrmsrl(MSR_IA32_RTIT_STATUS, status);
+}
+
+/**
+ * pt_read_offset() - translate registers into buffer pointers
+ * @buf:	PT buffer.
+ *
+ * Set buffer's output pointers from MSR values.
+ */
+static void pt_read_offset(struct pt_buffer *buf)
+{
+	u64 offset, base_topa;
+
+	rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
+	buf->cur = phys_to_virt(base_topa);
+
+	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
+	/* offset within current output region */
+	buf->output_off = offset >> 32;
+	/* index of current output region within this table */
+	buf->cur_idx = (offset & 0xffffff80) >> 7;
+}
+
+/**
+ * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
+ * @buf:	PT buffer.
+ * @pg:		Page offset in the buffer.
+ *
+ * When advancing to the next output region (ToPA entry), given a page offset
+ * into the buffer, we need to find the offset of the first page in the next
+ * region.
+ */
+static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
+{
+	struct topa_entry *te = buf->topa_index[pg];
+
+	/* one region */
+	if (buf->first == buf->last && buf->first->last == 1)
+		return pg;
+
+	do {
+		pg++;
+		pg &= buf->nr_pages - 1;
+	} while (buf->topa_index[pg] == te);
+
+	return pg;
+}
+
+/**
+ * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
+ * @buf:	PT buffer.
+ * @handle:	Current output handle.
+ *
+ * Place INT and STOP marks to prevent overwriting old data that the consumer
+ * hasn't yet collected.
+ */
+static int pt_buffer_reset_markers(struct pt_buffer *buf,
+				   struct perf_output_handle *handle)
+
+{
+	unsigned long idx, npages, end;
+
+	if (buf->snapshot)
+		return 0;
+
+	/* can't stop in the middle of an output region */
+	if (buf->output_off + handle->size + 1 <
+	    sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
+		return -EINVAL;
+
+
+	/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+		return 0;
+
+	/* clear STOP and INT from current entry */
+	buf->topa_index[buf->stop_pos]->stop = 0;
+	buf->topa_index[buf->intr_pos]->intr = 0;
+
+	if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
+		npages = (handle->size + 1) >> PAGE_SHIFT;
+		end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages;
+		/*if (end > handle->wakeup >> PAGE_SHIFT)
+		  end = handle->wakeup >> PAGE_SHIFT;*/
+		idx = end & (buf->nr_pages - 1);
+		buf->stop_pos = idx;
+		idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1;
+		idx &= buf->nr_pages - 1;
+		buf->intr_pos = idx;
+	}
+
+	buf->topa_index[buf->stop_pos]->stop = 1;
+	buf->topa_index[buf->intr_pos]->intr = 1;
+
+	return 0;
+}
+
+/**
+ * pt_buffer_setup_topa_index() - build topa_index[] table of regions
+ * @buf:	PT buffer.
+ *
+ * topa_index[] references output regions indexed by offset into the
+ * buffer for purposes of quick reverse lookup.
+ */
+static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
+{
+	struct topa *cur = buf->first, *prev = buf->last;
+	struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
+		*te_prev = TOPA_ENTRY(prev, prev->last - 1);
+	int pg = 0, idx = 0, ntopa = 0;
+
+	while (pg < buf->nr_pages) {
+		int tidx;
+
+		/* pages within one topa entry */
+		for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
+			buf->topa_index[pg] = te_prev;
+
+		te_prev = te_cur;
+
+		if (idx == cur->last - 1) {
+			/* advance to next topa table */
+			idx = 0;
+			cur = list_entry(cur->list.next, struct topa, list);
+			ntopa++;
+		} else
+			idx++;
+		te_cur = TOPA_ENTRY(cur, idx);
+	}
+
+}
+
+/**
+ * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
+ * @buf:	PT buffer.
+ * @head:	Write pointer (aux_head) from AUX buffer.
+ *
+ * Find the ToPA table and entry corresponding to given @head and set buffer's
+ * "current" pointers accordingly.
+ */
+static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
+{
+	int pg;
+
+	if (buf->snapshot)
+		head &= (buf->nr_pages << PAGE_SHIFT) - 1;
+
+	pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
+	pg = pt_topa_next_entry(buf, pg);
+
+	buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
+	buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
+			(unsigned long)buf->cur) / sizeof(struct topa_entry);
+	buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
+
+	local64_set(&buf->head, head);
+	local_set(&buf->data_size, 0);
+}
+
+/**
+ * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
+ * @buf:	PT buffer.
+ */
+static void pt_buffer_fini_topa(struct pt_buffer *buf)
+{
+	struct topa *topa, *iter;
+
+	list_for_each_entry_safe(topa, iter, &buf->tables, list) {
+		/*
+		 * right now, this is in free_aux() path only, so
+		 * no need to unlink this table from the list
+		 */
+		topa_free(topa);
+	}
+}
+
+/**
+ * pt_buffer_init_topa() - initialize ToPA table for pt buffer
+ * @buf:	PT buffer.
+ * @size:	Total size of all regions within this ToPA.
+ * @gfp:	Allocation flags.
+ */
+static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
+			       gfp_t gfp)
+{
+	struct topa *topa;
+	int err;
+
+	topa = topa_alloc(buf->cpu, gfp);
+	if (!topa)
+		return -ENOMEM;
+
+	topa_insert_table(buf, topa);
+
+	while (buf->nr_pages < nr_pages) {
+		err = topa_insert_pages(buf, gfp);
+		if (err) {
+			pt_buffer_fini_topa(buf);
+			return -ENOMEM;
+		}
+	}
+
+	pt_buffer_setup_topa_index(buf);
+
+	/* link last table to the first one, unless we're double buffering */
+	if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
+		TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
+		TOPA_ENTRY(buf->last, -1)->end = 1;
+	}
+
+	pt_topa_dump(buf);
+	return 0;
+}
+
+/**
+ * pt_buffer_setup_aux() - set up topa tables for a PT buffer
+ * @cpu:	Cpu on which to allocate, -1 means current.
+ * @pages:	Array of pointers to buffer pages passed from perf core.
+ * @nr_pages:	Number of pages in the buffer.
+ * @snapshot:	If this is a snapshot/overwrite counter.
+ *
+ * This is a pmu::setup_aux callback that sets up ToPA tables and all the
+ * bookkeeping for an AUX buffer.
+ *
+ * Return:	Our private PT buffer structure.
+ */
+static void *
+pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
+{
+	struct pt_buffer *buf;
+	int node, ret;
+
+	if (!nr_pages)
+		return NULL;
+
+	if (cpu == -1)
+		cpu = raw_smp_processor_id();
+	node = cpu_to_node(cpu);
+
+	buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
+			   GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->cpu = cpu;
+	buf->snapshot = snapshot;
+	buf->data_pages = pages;
+
+	INIT_LIST_HEAD(&buf->tables);
+
+	ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
+	if (ret) {
+		kfree(buf);
+		return NULL;
+	}
+
+	return buf;
+}
+
+/**
+ * pt_buffer_free_aux() - perf AUX deallocation path callback
+ * @data:	PT buffer.
+ */
+static void pt_buffer_free_aux(void *data)
+{
+	struct pt_buffer *buf = data;
+
+	pt_buffer_fini_topa(buf);
+	kfree(buf);
+}
+
+/**
+ * pt_buffer_is_full() - check if the buffer is full
+ * @buf:	PT buffer.
+ * @pt:		Per-cpu pt handle.
+ *
+ * If the user hasn't read data from the output region that aux_head
+ * points to, the buffer is considered full: the user needs to read at
+ * least this region and update aux_tail to point past it.
+ */
+static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
+{
+	if (buf->snapshot)
+		return false;
+
+	if (local_read(&buf->data_size) >= pt->handle.size)
+		return true;
+
+	return false;
+}
+
+/**
+ * intel_pt_interrupt() - PT PMI handler
+ */
+void intel_pt_interrupt(void)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct pt_buffer *buf;
+	struct perf_event *event = pt->handle.event;
+
+	/*
+	 * There may be a dangling PT bit in the interrupt status register
+	 * after PT has been disabled by pt_event_stop(). Make sure we don't
+	 * do anything (particularly, re-enable) for this event here.
+	 */
+	if (!ACCESS_ONCE(pt->handle_nmi))
+		return;
+
+	pt_config_start(false);
+
+	if (!event)
+		return;
+
+	buf = perf_get_aux(&pt->handle);
+	if (!buf)
+		return;
+
+	pt_read_offset(buf);
+
+	pt_handle_status(pt);
+
+	pt_update_head(pt);
+
+	perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+			    local_xchg(&buf->lost, 0));
+
+	if (!event->hw.state) {
+		int ret;
+
+		buf = perf_aux_output_begin(&pt->handle, event);
+		if (!buf) {
+			event->hw.state = PERF_HES_STOPPED;
+			return;
+		}
+
+		pt_buffer_reset_offsets(buf, pt->handle.head);
+		ret = pt_buffer_reset_markers(buf, &pt->handle);
+		if (ret) {
+			perf_aux_output_end(&pt->handle, 0, true);
+			return;
+		}
+
+		pt_config_buffer(buf->cur->table, buf->cur_idx,
+				 buf->output_off);
+		wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+		pt_config(event);
+	}
+}
+
+/*
+ * PMU callbacks
+ */
+
+static void pt_event_start(struct perf_event *event, int mode)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+	if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) {
+		event->hw.state = PERF_HES_STOPPED;
+		return;
+	}
+
+	ACCESS_ONCE(pt->handle_nmi) = 1;
+	event->hw.state = 0;
+
+	pt_config_buffer(buf->cur->table, buf->cur_idx,
+			 buf->output_off);
+	wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+	pt_config(event);
+}
+
+static void pt_event_stop(struct perf_event *event, int mode)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+	/*
+	 * Protect against the PMI racing with disabling wrmsr,
+	 * see comment in intel_pt_interrupt().
+	 */
+	ACCESS_ONCE(pt->handle_nmi) = 0;
+	pt_config_start(false);
+
+	if (event->hw.state == PERF_HES_STOPPED)
+		return;
+
+	event->hw.state = PERF_HES_STOPPED;
+
+	if (mode & PERF_EF_UPDATE) {
+		struct pt *pt = this_cpu_ptr(&pt_ctx);
+		struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+		if (!buf)
+			return;
+
+		if (WARN_ON_ONCE(pt->handle.event != event))
+			return;
+
+		pt_read_offset(buf);
+
+		pt_handle_status(pt);
+
+		pt_update_head(pt);
+	}
+}
+
+static void pt_event_del(struct perf_event *event, int mode)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct pt_buffer *buf;
+
+	pt_event_stop(event, PERF_EF_UPDATE);
+
+	buf = perf_get_aux(&pt->handle);
+
+	if (buf) {
+		if (buf->snapshot)
+			pt->handle.head =
+				local_xchg(&buf->data_size,
+					   buf->nr_pages << PAGE_SHIFT);
+		perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+				    local_xchg(&buf->lost, 0));
+	}
+}
+
+static int pt_event_add(struct perf_event *event, int mode)
+{
+	struct pt_buffer *buf;
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = -EBUSY;
+
+	if (pt->handle.event)
+		goto out;
+
+	buf = perf_aux_output_begin(&pt->handle, event);
+	if (!buf) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pt_buffer_reset_offsets(buf, pt->handle.head);
+	if (!buf->snapshot) {
+		ret = pt_buffer_reset_markers(buf, &pt->handle);
+		if (ret) {
+			perf_aux_output_end(&pt->handle, 0, true);
+			goto out;
+		}
+	}
+
+	if (mode & PERF_EF_START) {
+		pt_event_start(event, 0);
+		if (hwc->state == PERF_HES_STOPPED) {
+			pt_event_del(event, 0);
+			ret = -EBUSY;
+		}
+	} else {
+		hwc->state = PERF_HES_STOPPED;
+	}
+
+	ret = 0;
+out:
+
+	if (ret)
+		hwc->state = PERF_HES_STOPPED;
+
+	return ret;
+}
+
+static void pt_event_read(struct perf_event *event)
+{
+}
+
+static void pt_event_destroy(struct perf_event *event)
+{
+	x86_del_exclusive(x86_lbr_exclusive_pt);
+}
+
+static int pt_event_init(struct perf_event *event)
+{
+	if (event->attr.type != pt_pmu.pmu.type)
+		return -ENOENT;
+
+	if (!pt_event_valid(event))
+		return -EINVAL;
+
+	if (x86_add_exclusive(x86_lbr_exclusive_pt))
+		return -EBUSY;
+
+	event->destroy = pt_event_destroy;
+
+	return 0;
+}
+
+static __init int pt_init(void)
+{
+	int ret, cpu, prior_warn = 0;
+
+	BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		u64 ctl;
+
+		ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
+		if (!ret && (ctl & RTIT_CTL_TRACEEN))
+			prior_warn++;
+	}
+	put_online_cpus();
+
+	if (prior_warn) {
+		x86_add_exclusive(x86_lbr_exclusive_pt);
+		pr_warn("PT is enabled at boot time, doing nothing\n");
+
+		return -EBUSY;
+	}
+
+	ret = pt_pmu_hw_init();
+	if (ret)
+		return ret;
+
+	if (!pt_cap_get(PT_CAP_topa_output)) {
+		pr_warn("ToPA output is not supported on this CPU\n");
+		return -ENODEV;
+	}
+
+	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+		pt_pmu.pmu.capabilities =
+			PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
+
+	pt_pmu.pmu.capabilities	|= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
+	pt_pmu.pmu.attr_groups	= pt_attr_groups;
+	pt_pmu.pmu.task_ctx_nr	= perf_sw_context;
+	pt_pmu.pmu.event_init	= pt_event_init;
+	pt_pmu.pmu.add		= pt_event_add;
+	pt_pmu.pmu.del		= pt_event_del;
+	pt_pmu.pmu.start	= pt_event_start;
+	pt_pmu.pmu.stop		= pt_event_stop;
+	pt_pmu.pmu.read		= pt_event_read;
+	pt_pmu.pmu.setup_aux	= pt_buffer_setup_aux;
+	pt_pmu.pmu.free_aux	= pt_buffer_free_aux;
+	ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
+
+	return ret;
+}
+
+module_init(pt_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 21af6149e..12d9548 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -1132,8 +1132,7 @@
 		}
 	}
 
-	if (ubox_dev)
-		pci_dev_put(ubox_dev);
+	pci_dev_put(ubox_dev);
 
 	return err ? pcibios_err_to_errno(err) : 0;
 }
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 6063909..3d423a1 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -41,6 +41,7 @@
 		{ X86_FEATURE_HWP_ACT_WINDOW,	CR_EAX, 9, 0x00000006, 0 },
 		{ X86_FEATURE_HWP_EPP,		CR_EAX,10, 0x00000006, 0 },
 		{ X86_FEATURE_HWP_PKG_REQ,	CR_EAX,11, 0x00000006, 0 },
+		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 24d0796..1deffe6 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -354,6 +354,7 @@
 {
 	struct insn insn;
 	kprobe_opcode_t buf[MAX_INSN_SIZE];
+	int length;
 	unsigned long recovered_insn =
 		recover_probed_instruction(buf, (unsigned long)src);
 
@@ -361,16 +362,18 @@
 		return 0;
 	kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 	insn_get_length(&insn);
+	length = insn.length;
+
 	/* Another subsystem puts a breakpoint, failed to recover */
 	if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
 		return 0;
-	memcpy(dest, insn.kaddr, insn.length);
+	memcpy(dest, insn.kaddr, length);
 
 #ifdef CONFIG_X86_64
 	if (insn_rip_relative(&insn)) {
 		s64 newdisp;
 		u8 *disp;
-		kernel_insn_init(&insn, dest, insn.length);
+		kernel_insn_init(&insn, dest, length);
 		insn_get_displacement(&insn);
 		/*
 		 * The copied instruction uses the %rip-relative addressing
@@ -394,7 +397,7 @@
 		*(s32 *) disp = (s32) newdisp;
 	}
 #endif
-	return insn.length;
+	return length;
 }
 
 static int arch_copy_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7035f6b..50e547e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -77,9 +77,6 @@
 #include <asm/realmode.h>
 #include <asm/misc.h>
 
-/* State of each CPU */
-DEFINE_PER_CPU(int, cpu_state) = { 0 };
-
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
 EXPORT_SYMBOL(smp_num_siblings);
@@ -257,7 +254,7 @@
 	lock_vector_lock();
 	set_cpu_online(smp_processor_id(), true);
 	unlock_vector_lock();
-	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+	cpu_set_state_online(smp_processor_id());
 	x86_platform.nmi_init();
 
 	/* enable local interrupts */
@@ -954,7 +951,10 @@
 	 */
 	mtrr_save_state();
 
-	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+	/* x86 CPUs take themselves offline, so delayed offline is OK. */
+	err = cpu_check_up_prepare(cpu);
+	if (err && err != -EBUSY)
+		return err;
 
 	/* the FPU context is blank, nobody can own it */
 	__cpu_disable_lazy_restore(cpu);
@@ -1197,7 +1197,7 @@
 	switch_to_new_gdt(me);
 	/* already set me in cpu_online_mask in boot_cpu_init() */
 	cpumask_set_cpu(me, cpu_callout_mask);
-	per_cpu(cpu_state, me) = CPU_ONLINE;
+	cpu_set_state_online(me);
 }
 
 void __init native_smp_cpus_done(unsigned int max_cpus)
@@ -1324,14 +1324,10 @@
 	numa_remove_cpu(cpu);
 }
 
-static DEFINE_PER_CPU(struct completion, die_complete);
-
 void cpu_disable_common(void)
 {
 	int cpu = smp_processor_id();
 
-	init_completion(&per_cpu(die_complete, smp_processor_id()));
-
 	remove_siblinginfo(cpu);
 
 	/* It's now safe to remove this processor from the online map */
@@ -1355,24 +1351,27 @@
 	return 0;
 }
 
-void cpu_die_common(unsigned int cpu)
+int common_cpu_die(unsigned int cpu)
 {
-	wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ);
-}
+	int ret = 0;
 
-void native_cpu_die(unsigned int cpu)
-{
 	/* We don't do anything here: idle task is faking death itself. */
 
-	cpu_die_common(cpu);
-
 	/* They ack this in play_dead() by setting CPU_DEAD */
-	if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+	if (cpu_wait_death(cpu, 5)) {
 		if (system_state == SYSTEM_RUNNING)
 			pr_info("CPU %u is now offline\n", cpu);
 	} else {
 		pr_err("CPU %u didn't die...\n", cpu);
+		ret = -1;
 	}
+
+	return ret;
+}
+
+void native_cpu_die(unsigned int cpu)
+{
+	common_cpu_die(cpu);
 }
 
 void play_dead_common(void)
@@ -1381,10 +1380,8 @@
 	reset_lazy_tlbstate();
 	amd_e400_remove_cpu(raw_smp_processor_id());
 
-	mb();
 	/* Ack it */
-	__this_cpu_write(cpu_state, CPU_DEAD);
-	complete(&per_cpu(die_complete, smp_processor_id()));
+	(void)cpu_report_death();
 
 	/*
 	 * With physical CPU hotplug, we should halt the cpu
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index b79133a..5ecbfe5 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -57,7 +57,7 @@
 	/* test 3: check the value hasn't changed */
 	/* If this test fails, we managed to overwrite the data */
 	if (!rodata_test_data) {
-		printk(KERN_ERR "rodata_test: Test 3 failes (end data)\n");
+		printk(KERN_ERR "rodata_test: Test 3 fails (end data)\n");
 		return -ENODEV;
 	}
 	/* test 4: check if the rodata section is 4Kb aligned */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f4fa991..324ab52 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -123,7 +123,7 @@
 		 * but we need to notify RCU.
 		 */
 		rcu_nmi_enter();
-		prev_state = IN_KERNEL;  /* the value is irrelevant. */
+		prev_state = CONTEXT_KERNEL;  /* the value is irrelevant. */
 	}
 
 	/*
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7413ee3..8648438 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -90,14 +90,10 @@
 
 	set_cpu_online(cpu, true);
 
-	this_cpu_write(cpu_state, CPU_ONLINE);
-
-	wmb();
+	cpu_set_state_online(cpu);  /* Implies full memory barrier. */
 
 	/* We can take interrupts now: we're officially "up". */
 	local_irq_enable();
-
-	wmb();			/* make sure everything is out */
 }
 
 /*
@@ -451,7 +447,13 @@
 	xen_setup_timer(cpu);
 	xen_init_lock_cpu(cpu);
 
-	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+	/*
+	 * PV VCPUs are always successfully taken down (see 'while' loop
+	 * in xen_cpu_die()), so -EBUSY is an error.
+	 */
+	rc = cpu_check_up_prepare(cpu);
+	if (rc)
+		return rc;
 
 	/* make sure interrupts start blocked */
 	per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
@@ -467,10 +469,8 @@
 	rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
 	BUG_ON(rc);
 
-	while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
+	while (cpu_report_state(cpu) != CPU_ONLINE)
 		HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
-		barrier();
-	}
 
 	return 0;
 }
@@ -499,11 +499,11 @@
 		schedule_timeout(HZ/10);
 	}
 
-	cpu_die_common(cpu);
-
-	xen_smp_intr_free(cpu);
-	xen_uninit_lock_cpu(cpu);
-	xen_teardown_timer(cpu);
+	if (common_cpu_die(cpu) == 0) {
+		xen_smp_intr_free(cpu);
+		xen_uninit_lock_cpu(cpu);
+		xen_teardown_timer(cpu);
+	}
 }
 
 static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
@@ -735,6 +735,16 @@
 static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
 	int rc;
+
+	/*
+	 * This can happen if CPU was offlined earlier and
+	 * offlining timed out in common_cpu_die().
+	 */
+	if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
+		xen_smp_intr_free(cpu);
+		xen_uninit_lock_cpu(cpu);
+	}
+
 	/*
 	 * xen_smp_intr_init() needs to run before native_cpu_up()
 	 * so that IPI vectors are set up on the booting CPU before
@@ -756,12 +766,6 @@
 	return rc;
 }
 
-static void xen_hvm_cpu_die(unsigned int cpu)
-{
-	xen_cpu_die(cpu);
-	native_cpu_die(cpu);
-}
-
 void __init xen_hvm_smp_init(void)
 {
 	if (!xen_have_vector_callback)
@@ -769,7 +773,7 @@
 	smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
 	smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
 	smp_ops.cpu_up = xen_hvm_cpu_up;
-	smp_ops.cpu_die = xen_hvm_cpu_die;
+	smp_ops.cpu_die = xen_cpu_die;
 	smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
 	smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
 	smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu;
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 3d733ba..6b37904 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -405,11 +405,6 @@
 	regs->areg[8] = (unsigned long) &frame->uc;
 	regs->threadptr = tp;
 
-	/* Set access mode to USER_DS.  Nomenclature is outdated, but
-	 * functionality is used in uaccess.h
-	 */
-	set_fs(USER_DS);
-
 #if DEBUG_SIG
 	printk("SIG rt deliver (%s:%d): signal=%d sp=%p pc=%08x\n",
 		current->comm, current->pid, signal, frame, regs->pc);
diff --git a/block/blk-map.c b/block/blk-map.c
index b8d2725..da310a1 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -124,10 +124,10 @@
 {
 	struct iovec iov;
 	struct iov_iter i;
+	int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i);
 
-	iov.iov_base = ubuf;
-	iov.iov_len = len;
-	iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len);
+	if (unlikely(ret < 0))
+		return ret;
 
 	return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
 }
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index e1f71c3..55b6f15 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -335,16 +335,14 @@
 		struct iov_iter i;
 		struct iovec *iov = NULL;
 
-		ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
-					    0, NULL, &iov);
-		if (ret < 0) {
-			kfree(iov);
+		ret = import_iovec(rq_data_dir(rq),
+				   hdr->dxferp, hdr->iovec_count,
+				   0, &iov, &i);
+		if (ret < 0)
 			goto out_free_cdb;
-		}
 
 		/* SG_IO howto says that the shorter of the two wins */
-		iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count,
-			      min_t(unsigned, ret, hdr->dxfer_len));
+		iov_iter_truncate(&i, hdr->dxfer_len);
 
 		ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL);
 		kfree(iov);
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 297110c..9c4fd7a 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -26,7 +26,7 @@
 #include <linux/pfn.h>
 #include <linux/export.h>
 #include <linux/io.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include <linux/uaccess.h>
 
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
index 02e76ac..69f6b4a 100644
--- a/drivers/char/tile-srom.c
+++ b/drivers/char/tile-srom.c
@@ -27,7 +27,6 @@
 #include <linux/types.h>	/* size_t */
 #include <linux/proc_fs.h>
 #include <linux/fcntl.h>	/* O_ACCMODE */
-#include <linux/aio.h>
 #include <linux/pagemap.h>
 #include <linux/hugetlb.h>
 #include <linux/uaccess.h>
diff --git a/drivers/gpu/drm/drm_trace.h b/drivers/gpu/drm/drm_trace.h
index 27cc95f..ce3c428 100644
--- a/drivers/gpu/drm/drm_trace.h
+++ b/drivers/gpu/drm/drm_trace.h
@@ -7,7 +7,6 @@
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM drm
-#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
 #define TRACE_INCLUDE_FILE drm_trace
 
 TRACE_EVENT(drm_vblank_event,
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 6058a01..d776621 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -12,7 +12,6 @@
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM i915
-#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
 #define TRACE_INCLUDE_FILE i915_trace
 
 /* pipe updates */
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h
index ce075cb..fdce406 100644
--- a/drivers/gpu/drm/radeon/radeon_trace.h
+++ b/drivers/gpu/drm/radeon/radeon_trace.h
@@ -9,7 +9,6 @@
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM radeon
-#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
 #define TRACE_INCLUDE_FILE radeon_trace
 
 TRACE_EVENT(radeon_bo_create,
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 152b006..15338af 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -92,7 +92,7 @@
 	depends on HID
 
 config HID_A4TECH
-	tristate "A4 tech mice" if EXPERT
+	tristate "A4 tech mice"
 	depends on HID
 	default !EXPERT
 	---help---
@@ -113,7 +113,7 @@
 	game controllers.
 
 config HID_APPLE
-	tristate "Apple {i,Power,Mac}Books" if EXPERT
+	tristate "Apple {i,Power,Mac}Books"
 	depends on HID
 	default !EXPERT
 	---help---
@@ -141,7 +141,7 @@
 	Support for Aureal Cy se W-01RN Remote Controller and other Aureal derived remotes.
 
 config HID_BELKIN
-	tristate "Belkin Flip KVM and Wireless keyboard" if EXPERT
+	tristate "Belkin Flip KVM and Wireless keyboard"
 	depends on HID
 	default !EXPERT
 	---help---
@@ -158,14 +158,14 @@
 	 - BETOP 2185 PC & BFM MODE
 
 config HID_CHERRY
-	tristate "Cherry Cymotion keyboard" if EXPERT
+	tristate "Cherry Cymotion keyboard"
 	depends on HID
 	default !EXPERT
 	---help---
 	Support for Cherry Cymotion keyboard.
 
 config HID_CHICONY
-	tristate "Chicony Tactical pad" if EXPERT
+	tristate "Chicony Tactical pad"
 	depends on HID
 	default !EXPERT
 	---help---
@@ -196,7 +196,7 @@
 	customizable USB descriptor fields are exposed as sysfs attributes.
 
 config HID_CYPRESS
-	tristate "Cypress mouse and barcode readers" if EXPERT
+	tristate "Cypress mouse and barcode readers"
 	depends on HID
 	default !EXPERT
 	---help---
@@ -245,7 +245,7 @@
 	different devices than those handled by CONFIG_TOUCHSCREEN_USB_ELO.
 
 config HID_EZKEY
-	tristate "Ezkey BTC 8193 keyboard" if EXPERT
+	tristate "Ezkey BTC 8193 keyboard"
 	depends on HID
 	default !EXPERT
 	---help---
@@ -286,12 +286,6 @@
 	Currently the following devices are know to be supported:
 	  - MSI GT683R
 
-config HID_HUION
-	tristate "Huion tablets"
-	depends on USB_HID
-	---help---
-	Support for Huion 580 tablet.
-
 config HID_KEYTOUCH
 	tristate "Keytouch HID devices"
 	depends on HID
@@ -312,9 +306,9 @@
 
 config HID_UCLOGIC
 	tristate "UC-Logic"
-	depends on HID
+	depends on USB_HID
 	---help---
-	Support for UC-Logic tablets.
+	Support for UC-Logic and Huion tablets.
 
 config HID_WALTOP
 	tristate "Waltop"
@@ -344,7 +338,7 @@
 	Support for Twinhan IR remote control.
 
 config HID_KENSINGTON
-	tristate "Kensington Slimblade Trackball" if EXPERT
+	tristate "Kensington Slimblade Trackball"
 	depends on HID
 	default !EXPERT
 	---help---
@@ -372,7 +366,7 @@
 	- ThinkPad Compact USB Keyboard with TrackPoint (supports Fn keys)
 
 config HID_LOGITECH
-	tristate "Logitech devices" if EXPERT
+	tristate "Logitech devices"
 	depends on HID
 	default !EXPERT
 	---help---
@@ -461,14 +455,14 @@
 	Apple Wireless "Magic" Mouse and the Apple Wireless "Magic" Trackpad.
 
 config HID_MICROSOFT
-	tristate "Microsoft non-fully HID-compliant devices" if EXPERT
+	tristate "Microsoft non-fully HID-compliant devices"
 	depends on HID
 	default !EXPERT
 	---help---
 	Support for Microsoft devices that are not fully compliant with HID standard.
 
 config HID_MONTEREY
-	tristate "Monterey Genius KB29E keyboard" if EXPERT
+	tristate "Monterey Genius KB29E keyboard"
 	depends on HID
 	default !EXPERT
 	---help---
@@ -638,7 +632,6 @@
 
 config HID_PLANTRONICS
 	tristate "Plantronics USB HID Driver"
-	default !EXPERT
 	depends on HID
 	---help---
 	Provides HID support for Plantronics telephony devices.
@@ -885,6 +878,21 @@
 	  for events and handle data streams. Each sensor driver can format
 	  data and present to user mode using input or IIO interface.
 
+config HID_SENSOR_CUSTOM_SENSOR
+	tristate "HID Sensors hub custom sensor support"
+	depends on HID_SENSOR_HUB
+	default n
+	---help---
+	  HID Sensor hub specification allows definition of some custom and
+	  generic sensors. Unlike other HID sensors, they can't be exported
+	  via Linux IIO because of custom fields. This is up to the manufacturer
+	  to decide how to interpret these special sensor ids and process in
+	  the user space. Currently some manufacturers are using these ids for
+	  sensor calibration and debugging other sensors. Manufacturers
+	  should't use these special custom sensor ids to export any of the
+	  standard sensors.
+	  Select this config option for custom/generic sensor support.
+
 endmenu
 
 endif # HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 6f19958..e4a21df 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -41,7 +41,6 @@
 obj-$(CONFIG_HID_HOLTEK)	+= hid-holtek-kbd.o
 obj-$(CONFIG_HID_HOLTEK)	+= hid-holtek-mouse.o
 obj-$(CONFIG_HID_HOLTEK)	+= hid-holtekff.o
-obj-$(CONFIG_HID_HUION)		+= hid-huion.o
 obj-$(CONFIG_HID_HYPERV_MOUSE)	+= hid-hyperv.o
 obj-$(CONFIG_HID_ICADE)		+= hid-icade.o
 obj-$(CONFIG_HID_KENSINGTON)	+= hid-kensington.o
@@ -101,6 +100,7 @@
 obj-$(CONFIG_HID_WALTOP)	+= hid-waltop.o
 obj-$(CONFIG_HID_WIIMOTE)	+= hid-wiimote.o
 obj-$(CONFIG_HID_SENSOR_HUB)	+= hid-sensor-hub.o
+obj-$(CONFIG_HID_SENSOR_CUSTOM_SENSOR)	+= hid-sensor-custom.o
 
 obj-$(CONFIG_USB_HID)		+= usbhid/
 obj-$(CONFIG_USB_MOUSE)		+= usbhid/
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 56ce8c2..722a925 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1562,12 +1562,26 @@
 	return count;
 }
 
+static ssize_t
+show_country(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+
+	return sprintf(buf, "%02x\n", hdev->country & 0xff);
+}
+
 static struct bin_attribute dev_bin_attr_report_desc = {
 	.attr = { .name = "report_descriptor", .mode = 0444 },
 	.read = read_report_descriptor,
 	.size = HID_MAX_DESCRIPTOR_SIZE,
 };
 
+static struct device_attribute dev_attr_country = {
+	.attr = { .name = "country", .mode = 0444 },
+	.show = show_country,
+};
+
 int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 {
 	static const char *types[] = { "Device", "Pointer", "Mouse", "Device",
@@ -1646,6 +1660,11 @@
 		bus = "<UNKNOWN>";
 	}
 
+	ret = device_create_file(&hdev->dev, &dev_attr_country);
+	if (ret)
+		hid_warn(hdev,
+			 "can't create sysfs country code attribute err: %d\n", ret);
+
 	ret = device_create_bin_file(&hdev->dev, &dev_bin_attr_report_desc);
 	if (ret)
 		hid_warn(hdev,
@@ -1661,6 +1680,7 @@
 
 void hid_disconnect(struct hid_device *hdev)
 {
+	device_remove_file(&hdev->dev, &dev_attr_country);
 	device_remove_bin_file(&hdev->dev, &dev_bin_attr_report_desc);
 	if (hdev->claimed & HID_CLAIMED_INPUT)
 		hidinput_disconnect(hdev);
@@ -1824,6 +1844,7 @@
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) },
 #if IS_ENABLED(CONFIG_HID_LENOVO)
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 8bf61d2..c785095 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -165,6 +165,7 @@
     {0, 0x53, "DeviceIndex"},
     {0, 0x54, "ContactCount"},
     {0, 0x55, "ContactMaximumNumber"},
+    {0, 0x59, "ButtonType"},
     {0, 0x5A, "SecondaryBarrelSwitch"},
     {0, 0x5B, "TransducerSerialNumber"},
   { 15, 0, "PhysicalInterfaceDevice" },
@@ -1127,7 +1128,8 @@
 
 				if (!list->hdev || !list->hdev->debug) {
 					ret = -EIO;
-					break;
+					set_current_state(TASK_RUNNING);
+					goto out;
 				}
 
 				/* allow O_NONBLOCK from other threads */
diff --git a/drivers/hid/hid-huion.c b/drivers/hid/hid-huion.c
deleted file mode 100644
index 61b68ca..0000000
--- a/drivers/hid/hid-huion.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- *  HID driver for Huion devices not fully compliant with HID standard
- *
- *  Copyright (c) 2013 Martin Rusko
- *  Copyright (c) 2014 Nikolai Kondrashov
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <linux/device.h>
-#include <linux/hid.h>
-#include <linux/module.h>
-#include <linux/usb.h>
-#include <asm/unaligned.h>
-#include "usbhid/usbhid.h"
-
-#include "hid-ids.h"
-
-/* Report descriptor template placeholder head */
-#define HUION_PH_HEAD	0xFE, 0xED, 0x1D
-
-/* Report descriptor template placeholder IDs */
-enum huion_ph_id {
-	HUION_PH_ID_X_LM,
-	HUION_PH_ID_X_PM,
-	HUION_PH_ID_Y_LM,
-	HUION_PH_ID_Y_PM,
-	HUION_PH_ID_PRESSURE_LM,
-	HUION_PH_ID_NUM
-};
-
-/* Report descriptor template placeholder */
-#define HUION_PH(_ID) HUION_PH_HEAD, HUION_PH_ID_##_ID
-
-/* Fixed report descriptor template */
-static const __u8 huion_tablet_rdesc_template[] = {
-	0x05, 0x0D,             /*  Usage Page (Digitizer),                 */
-	0x09, 0x02,             /*  Usage (Pen),                            */
-	0xA1, 0x01,             /*  Collection (Application),               */
-	0x85, 0x07,             /*      Report ID (7),                      */
-	0x09, 0x20,             /*      Usage (Stylus),                     */
-	0xA0,                   /*      Collection (Physical),              */
-	0x14,                   /*          Logical Minimum (0),            */
-	0x25, 0x01,             /*          Logical Maximum (1),            */
-	0x75, 0x01,             /*          Report Size (1),                */
-	0x09, 0x42,             /*          Usage (Tip Switch),             */
-	0x09, 0x44,             /*          Usage (Barrel Switch),          */
-	0x09, 0x46,             /*          Usage (Tablet Pick),            */
-	0x95, 0x03,             /*          Report Count (3),               */
-	0x81, 0x02,             /*          Input (Variable),               */
-	0x95, 0x03,             /*          Report Count (3),               */
-	0x81, 0x03,             /*          Input (Constant, Variable),     */
-	0x09, 0x32,             /*          Usage (In Range),               */
-	0x95, 0x01,             /*          Report Count (1),               */
-	0x81, 0x02,             /*          Input (Variable),               */
-	0x95, 0x01,             /*          Report Count (1),               */
-	0x81, 0x03,             /*          Input (Constant, Variable),     */
-	0x75, 0x10,             /*          Report Size (16),               */
-	0x95, 0x01,             /*          Report Count (1),               */
-	0xA4,                   /*          Push,                           */
-	0x05, 0x01,             /*          Usage Page (Desktop),           */
-	0x65, 0x13,             /*          Unit (Inch),                    */
-	0x55, 0xFD,             /*          Unit Exponent (-3),             */
-	0x34,                   /*          Physical Minimum (0),           */
-	0x09, 0x30,             /*          Usage (X),                      */
-	0x27, HUION_PH(X_LM),   /*          Logical Maximum (PLACEHOLDER),  */
-	0x47, HUION_PH(X_PM),   /*          Physical Maximum (PLACEHOLDER), */
-	0x81, 0x02,             /*          Input (Variable),               */
-	0x09, 0x31,             /*          Usage (Y),                      */
-	0x27, HUION_PH(Y_LM),   /*          Logical Maximum (PLACEHOLDER),  */
-	0x47, HUION_PH(Y_PM),   /*          Physical Maximum (PLACEHOLDER), */
-	0x81, 0x02,             /*          Input (Variable),               */
-	0xB4,                   /*          Pop,                            */
-	0x09, 0x30,             /*          Usage (Tip Pressure),           */
-	0x27,
-	HUION_PH(PRESSURE_LM),  /*          Logical Maximum (PLACEHOLDER),  */
-	0x81, 0x02,             /*          Input (Variable),               */
-	0xC0,                   /*      End Collection,                     */
-	0xC0                    /*  End Collection                          */
-};
-
-/* Parameter indices */
-enum huion_prm {
-	HUION_PRM_X_LM		= 1,
-	HUION_PRM_Y_LM		= 2,
-	HUION_PRM_PRESSURE_LM	= 4,
-	HUION_PRM_RESOLUTION	= 5,
-	HUION_PRM_NUM
-};
-
-/* Driver data */
-struct huion_drvdata {
-	__u8 *rdesc;
-	unsigned int rsize;
-};
-
-static __u8 *huion_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int *rsize)
-{
-	struct huion_drvdata *drvdata = hid_get_drvdata(hdev);
-	switch (hdev->product) {
-	case USB_DEVICE_ID_HUION_TABLET:
-		if (drvdata->rdesc != NULL) {
-			rdesc = drvdata->rdesc;
-			*rsize = drvdata->rsize;
-		}
-		break;
-	}
-	return rdesc;
-}
-
-/**
- * Enable fully-functional tablet mode and determine device parameters.
- *
- * @hdev:	HID device
- */
-static int huion_tablet_enable(struct hid_device *hdev)
-{
-	int rc;
-	struct usb_device *usb_dev = hid_to_usb_dev(hdev);
-	struct huion_drvdata *drvdata = hid_get_drvdata(hdev);
-	__le16 *buf = NULL;
-	size_t len;
-	s32 params[HUION_PH_ID_NUM];
-	s32 resolution;
-	__u8 *p;
-	s32 v;
-
-	/*
-	 * Read string descriptor containing tablet parameters. The specific
-	 * string descriptor and data were discovered by sniffing the Windows
-	 * driver traffic.
-	 * NOTE: This enables fully-functional tablet mode.
-	 */
-	len = HUION_PRM_NUM * sizeof(*buf);
-	buf = kmalloc(len, GFP_KERNEL);
-	if (buf == NULL) {
-		hid_err(hdev, "failed to allocate parameter buffer\n");
-		rc = -ENOMEM;
-		goto cleanup;
-	}
-	rc = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
-				USB_REQ_GET_DESCRIPTOR, USB_DIR_IN,
-				(USB_DT_STRING << 8) + 0x64,
-				0x0409, buf, len,
-				USB_CTRL_GET_TIMEOUT);
-	if (rc == -EPIPE) {
-		hid_err(hdev, "device parameters not found\n");
-		rc = -ENODEV;
-		goto cleanup;
-	} else if (rc < 0) {
-		hid_err(hdev, "failed to get device parameters: %d\n", rc);
-		rc = -ENODEV;
-		goto cleanup;
-	} else if (rc != len) {
-		hid_err(hdev, "invalid device parameters\n");
-		rc = -ENODEV;
-		goto cleanup;
-	}
-
-	/* Extract device parameters */
-	params[HUION_PH_ID_X_LM] = le16_to_cpu(buf[HUION_PRM_X_LM]);
-	params[HUION_PH_ID_Y_LM] = le16_to_cpu(buf[HUION_PRM_Y_LM]);
-	params[HUION_PH_ID_PRESSURE_LM] =
-		le16_to_cpu(buf[HUION_PRM_PRESSURE_LM]);
-	resolution = le16_to_cpu(buf[HUION_PRM_RESOLUTION]);
-	if (resolution == 0) {
-		params[HUION_PH_ID_X_PM] = 0;
-		params[HUION_PH_ID_Y_PM] = 0;
-	} else {
-		params[HUION_PH_ID_X_PM] = params[HUION_PH_ID_X_LM] *
-						1000 / resolution;
-		params[HUION_PH_ID_Y_PM] = params[HUION_PH_ID_Y_LM] *
-						1000 / resolution;
-	}
-
-	/* Allocate fixed report descriptor */
-	drvdata->rdesc = devm_kmalloc(&hdev->dev,
-				sizeof(huion_tablet_rdesc_template),
-				GFP_KERNEL);
-	if (drvdata->rdesc == NULL) {
-		hid_err(hdev, "failed to allocate fixed rdesc\n");
-		rc = -ENOMEM;
-		goto cleanup;
-	}
-	drvdata->rsize = sizeof(huion_tablet_rdesc_template);
-
-	/* Format fixed report descriptor */
-	memcpy(drvdata->rdesc, huion_tablet_rdesc_template,
-		drvdata->rsize);
-	for (p = drvdata->rdesc;
-	     p <= drvdata->rdesc + drvdata->rsize - 4;) {
-		if (p[0] == 0xFE && p[1] == 0xED && p[2] == 0x1D &&
-		    p[3] < sizeof(params)) {
-			v = params[p[3]];
-			put_unaligned(cpu_to_le32(v), (s32 *)p);
-			p += 4;
-		} else {
-			p++;
-		}
-	}
-
-	rc = 0;
-
-cleanup:
-	kfree(buf);
-	return rc;
-}
-
-static int huion_probe(struct hid_device *hdev, const struct hid_device_id *id)
-{
-	int rc;
-	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
-	struct huion_drvdata *drvdata;
-
-	/* Allocate and assign driver data */
-	drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL);
-	if (drvdata == NULL) {
-		hid_err(hdev, "failed to allocate driver data\n");
-		return -ENOMEM;
-	}
-	hid_set_drvdata(hdev, drvdata);
-
-	switch (id->product) {
-	case USB_DEVICE_ID_HUION_TABLET:
-		/* If this is the pen interface */
-		if (intf->cur_altsetting->desc.bInterfaceNumber == 0) {
-			rc = huion_tablet_enable(hdev);
-			if (rc) {
-				hid_err(hdev, "tablet enabling failed\n");
-				return rc;
-			}
-		}
-		break;
-	}
-
-	rc = hid_parse(hdev);
-	if (rc) {
-		hid_err(hdev, "parse failed\n");
-		return rc;
-	}
-
-	rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
-	if (rc) {
-		hid_err(hdev, "hw start failed\n");
-		return rc;
-	}
-
-	return 0;
-}
-
-static int huion_raw_event(struct hid_device *hdev, struct hid_report *report,
-			u8 *data, int size)
-{
-	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
-
-	/* If this is a pen input report */
-	if (intf->cur_altsetting->desc.bInterfaceNumber == 0 &&
-	    report->type == HID_INPUT_REPORT &&
-	    report->id == 0x07 && size >= 2)
-		/* Invert the in-range bit */
-		data[1] ^= 0x40;
-
-	return 0;
-}
-
-static const struct hid_device_id huion_devices[] = {
-	{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) },
-	{ }
-};
-MODULE_DEVICE_TABLE(hid, huion_devices);
-
-static struct hid_driver huion_driver = {
-	.name = "huion",
-	.id_table = huion_devices,
-	.probe = huion_probe,
-	.report_fixup = huion_report_fixup,
-	.raw_event = huion_raw_event,
-};
-module_hid_driver(huion_driver);
-
-MODULE_AUTHOR("Martin Rusko");
-MODULE_DESCRIPTION("Huion HID driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 9c47867..41f167e 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -459,6 +459,11 @@
 #define USB_DEVICE_ID_UGCI_FLYING	0x0020
 #define USB_DEVICE_ID_UGCI_FIGHTING	0x0030
 
+#define USB_VENDOR_ID_HP		0x03f0
+#define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A	0x0a4a
+#define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A	0x0b4a
+#define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE		0x134a
+
 #define USB_VENDOR_ID_HUION		0x256c
 #define USB_DEVICE_ID_HUION_TABLET	0x006e
 
@@ -533,6 +538,7 @@
 #define USB_DEVICE_ID_KYE_MOUSEPEN_I608X	0x5011
 #define USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2	0x501a
 #define USB_DEVICE_ID_KYE_EASYPEN_M610X	0x5013
+#define USB_DEVICE_ID_KYE_PENSKETCH_M912	0x5015
 
 #define USB_VENDOR_ID_LABTEC		0x1020
 #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD	0x0006
@@ -591,6 +597,9 @@
 #define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST  0xc110
 #define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f
 #define USB_DEVICE_ID_LOGITECH_HARMONY_PS3 0x0306
+#define USB_DEVICE_ID_LOGITECH_MOUSE_C01A	0xc01a
+#define USB_DEVICE_ID_LOGITECH_MOUSE_C05A	0xc05a
+#define USB_DEVICE_ID_LOGITECH_MOUSE_C06A	0xc06a
 #define USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD	0xc20a
 #define USB_DEVICE_ID_LOGITECH_RUMBLEPAD	0xc211
 #define USB_DEVICE_ID_LOGITECH_EXTREME_3D	0xc215
@@ -1022,6 +1031,7 @@
 #define USB_DEVICE_ID_ZYTRONIC_ZXY100	0x0005
 
 #define USB_VENDOR_ID_PRIMAX	0x0461
+#define USB_DEVICE_ID_PRIMAX_MOUSE_4D22	0x4d22
 #define USB_DEVICE_ID_PRIMAX_KEYBOARD	0x4e05
 
 
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 32c2da4..008e89b 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -720,6 +720,29 @@
 		}
 		break;
 
+	case HID_UP_TELEPHONY:
+		switch (usage->hid & HID_USAGE) {
+		case 0x2f: map_key_clear(KEY_MICMUTE);		break;
+		case 0xb0: map_key_clear(KEY_NUMERIC_0);	break;
+		case 0xb1: map_key_clear(KEY_NUMERIC_1);	break;
+		case 0xb2: map_key_clear(KEY_NUMERIC_2);	break;
+		case 0xb3: map_key_clear(KEY_NUMERIC_3);	break;
+		case 0xb4: map_key_clear(KEY_NUMERIC_4);	break;
+		case 0xb5: map_key_clear(KEY_NUMERIC_5);	break;
+		case 0xb6: map_key_clear(KEY_NUMERIC_6);	break;
+		case 0xb7: map_key_clear(KEY_NUMERIC_7);	break;
+		case 0xb8: map_key_clear(KEY_NUMERIC_8);	break;
+		case 0xb9: map_key_clear(KEY_NUMERIC_9);	break;
+		case 0xba: map_key_clear(KEY_NUMERIC_STAR);	break;
+		case 0xbb: map_key_clear(KEY_NUMERIC_POUND);	break;
+		case 0xbc: map_key_clear(KEY_NUMERIC_A);	break;
+		case 0xbd: map_key_clear(KEY_NUMERIC_B);	break;
+		case 0xbe: map_key_clear(KEY_NUMERIC_C);	break;
+		case 0xbf: map_key_clear(KEY_NUMERIC_D);	break;
+		default: goto ignore;
+		}
+		break;
+
 	case HID_UP_CONSUMER:	/* USB HUT v1.12, pages 75-84 */
 		switch (usage->hid & HID_USAGE) {
 		case 0x000: goto ignore;
diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c
index 158fcf5..32e6d8d 100644
--- a/drivers/hid/hid-kye.c
+++ b/drivers/hid/hid-kye.c
@@ -268,6 +268,137 @@
 	0xC0                          /*  End Collection                  */
 };
 
+
+/* Original PenSketch M912 report descriptor size */
+#define PENSKETCH_M912_RDESC_ORIG_SIZE	482
+
+/* Fixed PenSketch M912 report descriptor */
+static __u8 pensketch_m912_rdesc_fixed[] = {
+	0x05, 0x01,                   /*  Usage Page (Desktop),           */
+	0x08,                         /*  Usage (00h),                    */
+	0xA1, 0x01,                   /*  Collection (Application),       */
+	0x85, 0x05,                   /*    Report ID (5),                */
+	0x06, 0x00, 0xFF,             /*    Usage Page (FF00h),           */
+	0x09, 0x01,                   /*    Usage (01h),                  */
+	0x15, 0x81,                   /*    Logical Minimum (-127),       */
+	0x25, 0x7F,                   /*    Logical Maximum (127),        */
+	0x75, 0x08,                   /*    Report Size (8),              */
+	0x95, 0x07,                   /*    Report Count (7),             */
+	0xB1, 0x02,                   /*    Feature (Variable),           */
+	0xC0,                         /*  End Collection,                 */
+	0x05, 0x0D,                   /*  Usage Page (Digitizer),         */
+	0x09, 0x02,                   /*  Usage (Pen),                    */
+	0xA1, 0x01,                   /*  Collection (Application),       */
+	0x85, 0x10,                   /*    Report ID (16),               */
+	0x09, 0x20,                   /*    Usage (Stylus),               */
+	0xA0,                         /*    Collection (Physical),        */
+	0x09, 0x42,                   /*      Usage (Tip Switch),         */
+	0x09, 0x44,                   /*      Usage (Barrel Switch),      */
+	0x09, 0x46,                   /*      Usage (Tablet Pick),        */
+	0x14,                         /*      Logical Minimum (0),        */
+	0x25, 0x01,                   /*      Logical Maximum (1),        */
+	0x75, 0x01,                   /*      Report Size (1),            */
+	0x95, 0x03,                   /*      Report Count (3),           */
+	0x81, 0x02,                   /*      Input (Variable),           */
+	0x95, 0x04,                   /*      Report Count (4),           */
+	0x81, 0x03,                   /*      Input (Constant, Variable), */
+	0x09, 0x32,                   /*      Usage (In Range),           */
+	0x95, 0x01,                   /*      Report Count (1),           */
+	0x81, 0x02,                   /*      Input (Variable),           */
+	0x75, 0x10,                   /*      Report Size (16),           */
+	0x95, 0x01,                   /*      Report Count (1),           */
+	0xA4,                         /*      Push,                       */
+	0x05, 0x01,                   /*      Usage Page (Desktop),       */
+	0x55, 0xFD,                   /*      Unit Exponent (-3),         */
+	0x65, 0x13,                   /*      Unit (Inch),                */
+	0x14,                         /*      Logical Minimum (0),        */
+	0x34,                         /*      Physical Minimum (0),       */
+	0x09, 0x30,                   /*      Usage (X),                  */
+	0x27, 0x00, 0xF0, 0x00, 0x00, /*      Logical Maximum (61440),    */
+	0x46, 0xE0, 0x2E,             /*      Physical Maximum (12000),   */
+	0x81, 0x02,                   /*      Input (Variable),           */
+	0x09, 0x31,                   /*      Usage (Y),                  */
+	0x27, 0x00, 0xB4, 0x00, 0x00, /*      Logical Maximum (46080),    */
+	0x46, 0x28, 0x23,             /*      Physical Maximum (9000),    */
+	0x81, 0x02,                   /*      Input (Variable),           */
+	0xB4,                         /*      Pop,                        */
+	0x09, 0x30,                   /*      Usage (Tip Pressure),       */
+	0x14,                         /*      Logical Minimum (0),        */
+	0x26, 0xFF, 0x07,             /*      Logical Maximum (2047),     */
+	0x81, 0x02,                   /*      Input (Variable),           */
+	0xC0,                         /*    End Collection,               */
+	0xC0,                         /*  End Collection,                 */
+	0x05, 0x0D,                   /*  Usage Page (Digitizer),         */
+	0x09, 0x21,                   /*  Usage (Puck),                   */
+	0xA1, 0x01,                   /*  Collection (Application),       */
+	0x85, 0x11,                   /*    Report ID (17),               */
+	0x09, 0x21,                   /*    Usage (Puck),                 */
+	0xA0,                         /*    Collection (Physical),        */
+	0x05, 0x09,                   /*      Usage Page (Button),        */
+	0x75, 0x01,                   /*      Report Size (1),            */
+	0x19, 0x01,                   /*      Usage Minimum (01h),        */
+	0x29, 0x03,                   /*      Usage Maximum (03h),        */
+	0x14,                         /*      Logical Minimum (0),        */
+	0x25, 0x01,                   /*      Logical Maximum (1),        */
+	0x95, 0x03,                   /*      Report Count (3),           */
+	0x81, 0x02,                   /*      Input (Variable),           */
+	0x95, 0x04,                   /*      Report Count (4),           */
+	0x81, 0x01,                   /*      Input (Constant),           */
+	0x95, 0x01,                   /*      Report Count (1),           */
+	0x0B, 0x32, 0x00, 0x0D, 0x00, /*      Usage (Digitizer In Range), */
+	0x14,                         /*      Logical Minimum (0),        */
+	0x25, 0x01,                   /*      Logical Maximum (1),        */
+	0x81, 0x02,                   /*      Input (Variable),           */
+	0xA4,                         /*      Push,                       */
+	0x05, 0x01,                   /*      Usage Page (Desktop),       */
+	0x75, 0x10,                   /*      Report Size (16),           */
+	0x95, 0x01,                   /*      Report Count (1),           */
+	0x55, 0xFD,                   /*      Unit Exponent (-3),         */
+	0x65, 0x13,                   /*      Unit (Inch),                */
+	0x14,                         /*      Logical Minimum (0),        */
+	0x34,                         /*      Physical Minimum (0),       */
+	0x09, 0x30,                   /*      Usage (X),                  */
+	0x27, 0x00, 0xF0, 0x00, 0x00, /*      Logical Maximum (61440),    */
+	0x46, 0xE0, 0x2E,             /*      Physical Maximum (12000),   */
+	0x81, 0x02,                   /*      Input (Variable),           */
+	0x09, 0x31,                   /*      Usage (Y),                  */
+	0x27, 0x00, 0xB4, 0x00, 0x00, /*      Logical Maximum (46080),    */
+	0x46, 0x28, 0x23,             /*      Physical Maximum (9000),    */
+	0x81, 0x02,                   /*      Input (Variable),           */
+	0x09, 0x38,                   /*      Usage (Wheel),              */
+	0x75, 0x08,                   /*      Report Size (8),            */
+	0x95, 0x01,                   /*      Report Count (1),           */
+	0x15, 0xFF,                   /*      Logical Minimum (-1),       */
+	0x25, 0x01,                   /*      Logical Maximum (1),        */
+	0x34,                         /*      Physical Minimum (0),       */
+	0x44,                         /*      Physical Maximum (0),       */
+	0x81, 0x06,                   /*      Input (Variable, Relative), */
+	0xB4,                         /*      Pop,                        */
+	0xC0,                         /*    End Collection,               */
+	0xC0,                         /*  End Collection,                 */
+	0x05, 0x0C,                   /*  Usage Page (Consumer),          */
+	0x09, 0x01,                   /*  Usage (Consumer Control),       */
+	0xA1, 0x01,                   /*  Collection (Application),       */
+	0x85, 0x12,                   /*    Report ID (18),               */
+	0x14,                         /*    Logical Minimum (0),          */
+	0x25, 0x01,                   /*    Logical Maximum (1),          */
+	0x75, 0x01,                   /*    Report Size (1),              */
+	0x95, 0x08,                   /*    Report Count (8),             */
+	0x05, 0x0C,                   /*    Usage Page (Consumer),        */
+	0x0A, 0x6A, 0x02,             /*    Usage (AC Delete),            */
+	0x0A, 0x1A, 0x02,             /*    Usage (AC Undo),              */
+	0x0A, 0x01, 0x02,             /*    Usage (AC New),               */
+	0x0A, 0x2F, 0x02,             /*    Usage (AC Zoom),              */
+	0x0A, 0x25, 0x02,             /*    Usage (AC Forward),           */
+	0x0A, 0x24, 0x02,             /*    Usage (AC Back),              */
+	0x0A, 0x2D, 0x02,             /*    Usage (AC Zoom In),           */
+	0x0A, 0x2E, 0x02,             /*    Usage (AC Zoom Out),          */
+	0x81, 0x02,                   /*    Input (Variable),             */
+	0x95, 0x30,                   /*    Report Count (48),            */
+	0x81, 0x03,                   /*    Input (Constant, Variable),   */
+	0xC0                          /*  End Collection                  */
+};
+
 static __u8 *kye_consumer_control_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize, int offset, const char *device_name) {
 	/*
@@ -335,6 +466,12 @@
 			*rsize = sizeof(easypen_m610x_rdesc_fixed);
 		}
 		break;
+	case USB_DEVICE_ID_KYE_PENSKETCH_M912:
+		if (*rsize == PENSKETCH_M912_RDESC_ORIG_SIZE) {
+			rdesc = pensketch_m912_rdesc_fixed;
+			*rsize = sizeof(pensketch_m912_rdesc_fixed);
+		}
+		break;
 	case USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE:
 		rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104,
 					"Genius Gila Gaming Mouse");
@@ -418,6 +555,7 @@
 	case USB_DEVICE_ID_KYE_MOUSEPEN_I608X:
 	case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2:
 	case USB_DEVICE_ID_KYE_EASYPEN_M610X:
+	case USB_DEVICE_ID_KYE_PENSKETCH_M912:
 		ret = kye_tablet_enable(hdev);
 		if (ret) {
 			hid_err(hdev, "tablet enabling failed\n");
@@ -457,6 +595,8 @@
 				USB_DEVICE_ID_GENIUS_GX_IMPERATOR) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE,
 				USB_DEVICE_ID_GENIUS_MANTICORE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE,
+				USB_DEVICE_ID_KYE_PENSKETCH_M912) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, kye_devices);
diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index f91ff14..b86c18e 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -27,6 +27,7 @@
 #include "usbhid/usbhid.h"
 #include "hid-ids.h"
 #include "hid-lg.h"
+#include "hid-lg4ff.h"
 
 #define LG_RDESC		0x001
 #define LG_BAD_RELATIVE_KEYS	0x002
@@ -818,4 +819,10 @@
 };
 module_hid_driver(lg_driver);
 
+#ifdef CONFIG_LOGIWHEELS_FF
+int lg4ff_no_autoswitch = 0;
+module_param_named(lg4ff_no_autoswitch, lg4ff_no_autoswitch, int, S_IRUGO);
+MODULE_PARM_DESC(lg4ff_no_autoswitch, "Do not switch multimode wheels to their native mode automatically");
+#endif
+
 MODULE_LICENSE("GPL");
diff --git a/drivers/hid/hid-lg.h b/drivers/hid/hid-lg.h
index 142ce3f..10dd8f0 100644
--- a/drivers/hid/hid-lg.h
+++ b/drivers/hid/hid-lg.h
@@ -24,16 +24,4 @@
 static inline int lg3ff_init(struct hid_device *hdev) { return -1; }
 #endif
 
-#ifdef CONFIG_LOGIWHEELS_FF
-int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field,
-			     struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data);
-int lg4ff_init(struct hid_device *hdev);
-int lg4ff_deinit(struct hid_device *hdev);
-#else
-static inline int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field,
-					   struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data) { return 0; }
-static inline int lg4ff_init(struct hid_device *hdev) { return -1; }
-static inline int lg4ff_deinit(struct hid_device *hdev) { return -1; }
-#endif
-
 #endif
diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
index db0dd9b..1232210 100644
--- a/drivers/hid/hid-lg4ff.c
+++ b/drivers/hid/hid-lg4ff.c
@@ -30,23 +30,44 @@
 
 #include "usbhid/usbhid.h"
 #include "hid-lg.h"
+#include "hid-lg4ff.h"
 #include "hid-ids.h"
 
-#define DFGT_REV_MAJ 0x13
-#define DFGT_REV_MIN 0x22
-#define DFGT2_REV_MIN 0x26
-#define DFP_REV_MAJ 0x11
-#define DFP_REV_MIN 0x06
-#define FFEX_REV_MAJ 0x21
-#define FFEX_REV_MIN 0x00
-#define G25_REV_MAJ 0x12
-#define G25_REV_MIN 0x22
-#define G27_REV_MAJ 0x12
-#define G27_REV_MIN 0x38
-#define G27_2_REV_MIN 0x39
-
 #define to_hid_device(pdev) container_of(pdev, struct hid_device, dev)
 
+#define LG4FF_MMODE_IS_MULTIMODE 0
+#define LG4FF_MMODE_SWITCHED 1
+#define LG4FF_MMODE_NOT_MULTIMODE 2
+
+#define LG4FF_MODE_NATIVE_IDX 0
+#define LG4FF_MODE_DFEX_IDX 1
+#define LG4FF_MODE_DFP_IDX 2
+#define LG4FF_MODE_G25_IDX 3
+#define LG4FF_MODE_DFGT_IDX 4
+#define LG4FF_MODE_G27_IDX 5
+#define LG4FF_MODE_MAX_IDX 6
+
+#define LG4FF_MODE_NATIVE BIT(LG4FF_MODE_NATIVE_IDX)
+#define LG4FF_MODE_DFEX BIT(LG4FF_MODE_DFEX_IDX)
+#define LG4FF_MODE_DFP BIT(LG4FF_MODE_DFP_IDX)
+#define LG4FF_MODE_G25 BIT(LG4FF_MODE_G25_IDX)
+#define LG4FF_MODE_DFGT BIT(LG4FF_MODE_DFGT_IDX)
+#define LG4FF_MODE_G27 BIT(LG4FF_MODE_G27_IDX)
+
+#define LG4FF_DFEX_TAG "DF-EX"
+#define LG4FF_DFEX_NAME "Driving Force / Formula EX"
+#define LG4FF_DFP_TAG "DFP"
+#define LG4FF_DFP_NAME "Driving Force Pro"
+#define LG4FF_G25_TAG "G25"
+#define LG4FF_G25_NAME "G25 Racing Wheel"
+#define LG4FF_G27_TAG "G27"
+#define LG4FF_G27_NAME "G27 Racing Wheel"
+#define LG4FF_DFGT_TAG "DFGT"
+#define LG4FF_DFGT_NAME "Driving Force GT"
+
+#define LG4FF_FFEX_REV_MAJ 0x21
+#define LG4FF_FFEX_REV_MIN 0x00
+
 static void hid_lg4ff_set_range_dfp(struct hid_device *hid, u16 range);
 static void hid_lg4ff_set_range_g25(struct hid_device *hid, u16 range);
 
@@ -59,6 +80,10 @@
 	__u8  led_state;
 	struct led_classdev *led[5];
 #endif
+	u32 alternate_modes;
+	const char *real_tag;
+	const char *real_name;
+	u16 real_product_id;
 	struct list_head list;
 	void (*set_range)(struct hid_device *hid, u16 range);
 };
@@ -77,6 +102,35 @@
 	void (*set_range)(struct hid_device *hid, u16 range);
 };
 
+struct lg4ff_compat_mode_switch {
+	const __u8 cmd_count;	/* Number of commands to send */
+	const __u8 cmd[];
+};
+
+struct lg4ff_wheel_ident_info {
+	const u16 mask;
+	const u16 result;
+	const u16 real_product_id;
+};
+
+struct lg4ff_wheel_ident_checklist {
+	const u32 count;
+	const struct lg4ff_wheel_ident_info *models[];
+};
+
+struct lg4ff_multimode_wheel {
+	const u16 product_id;
+	const u32 alternate_modes;
+	const char *real_tag;
+	const char *real_name;
+};
+
+struct lg4ff_alternate_mode {
+	const u16 product_id;
+	const char *tag;
+	const char *name;
+};
+
 static const struct lg4ff_wheel lg4ff_devices[] = {
 	{USB_DEVICE_ID_LOGITECH_WHEEL,       lg4ff_wheel_effects, 40, 270, NULL},
 	{USB_DEVICE_ID_LOGITECH_MOMO_WHEEL,  lg4ff_wheel_effects, 40, 270, NULL},
@@ -88,48 +142,108 @@
 	{USB_DEVICE_ID_LOGITECH_WII_WHEEL,   lg4ff_wheel_effects, 40, 270, NULL}
 };
 
-struct lg4ff_native_cmd {
-	const __u8 cmd_num;	/* Number of commands to send */
-	const __u8 cmd[];
+static const struct lg4ff_multimode_wheel lg4ff_multimode_wheels[] = {
+	{USB_DEVICE_ID_LOGITECH_DFP_WHEEL,
+	 LG4FF_MODE_NATIVE | LG4FF_MODE_DFP | LG4FF_MODE_DFEX,
+	 LG4FF_DFP_TAG, LG4FF_DFP_NAME},
+	{USB_DEVICE_ID_LOGITECH_G25_WHEEL,
+	 LG4FF_MODE_NATIVE | LG4FF_MODE_G25 | LG4FF_MODE_DFP | LG4FF_MODE_DFEX,
+	 LG4FF_G25_TAG, LG4FF_G25_NAME},
+	{USB_DEVICE_ID_LOGITECH_DFGT_WHEEL,
+	 LG4FF_MODE_NATIVE | LG4FF_MODE_DFGT | LG4FF_MODE_DFP | LG4FF_MODE_DFEX,
+	 LG4FF_DFGT_TAG, LG4FF_DFGT_NAME},
+	{USB_DEVICE_ID_LOGITECH_G27_WHEEL,
+	 LG4FF_MODE_NATIVE | LG4FF_MODE_G27 | LG4FF_MODE_G25 | LG4FF_MODE_DFP | LG4FF_MODE_DFEX,
+	 LG4FF_G27_TAG, LG4FF_G27_NAME},
 };
 
-struct lg4ff_usb_revision {
-	const __u16 rev_maj;
-	const __u16 rev_min;
-	const struct lg4ff_native_cmd *command;
+static const struct lg4ff_alternate_mode lg4ff_alternate_modes[] = {
+	[LG4FF_MODE_NATIVE_IDX] = {0, "native", ""},
+	[LG4FF_MODE_DFEX_IDX] = {USB_DEVICE_ID_LOGITECH_WHEEL, LG4FF_DFEX_TAG, LG4FF_DFEX_NAME},
+	[LG4FF_MODE_DFP_IDX] = {USB_DEVICE_ID_LOGITECH_DFP_WHEEL, LG4FF_DFP_TAG, LG4FF_DFP_NAME},
+	[LG4FF_MODE_G25_IDX] = {USB_DEVICE_ID_LOGITECH_G25_WHEEL, LG4FF_G25_TAG, LG4FF_G25_NAME},
+	[LG4FF_MODE_DFGT_IDX] = {USB_DEVICE_ID_LOGITECH_DFGT_WHEEL, LG4FF_DFGT_TAG, LG4FF_DFGT_NAME},
+	[LG4FF_MODE_G27_IDX] = {USB_DEVICE_ID_LOGITECH_G27_WHEEL, LG4FF_G27_TAG, LG4FF_G27_NAME}
 };
 
-static const struct lg4ff_native_cmd native_dfp = {
+/* Multimode wheel identificators */
+static const struct lg4ff_wheel_ident_info lg4ff_dfp_ident_info = {
+	0xf000,
+	0x1000,
+	USB_DEVICE_ID_LOGITECH_DFP_WHEEL
+};
+
+static const struct lg4ff_wheel_ident_info lg4ff_g25_ident_info = {
+	0xff00,
+	0x1200,
+	USB_DEVICE_ID_LOGITECH_G25_WHEEL
+};
+
+static const struct lg4ff_wheel_ident_info lg4ff_g27_ident_info = {
+	0xfff0,
+	0x1230,
+	USB_DEVICE_ID_LOGITECH_G27_WHEEL
+};
+
+static const struct lg4ff_wheel_ident_info lg4ff_dfgt_ident_info = {
+	0xff00,
+	0x1300,
+	USB_DEVICE_ID_LOGITECH_DFGT_WHEEL
+};
+
+/* Multimode wheel identification checklists */
+static const struct lg4ff_wheel_ident_checklist lg4ff_main_checklist = {
+	4,
+	{&lg4ff_dfgt_ident_info,
+	 &lg4ff_g27_ident_info,
+	 &lg4ff_g25_ident_info,
+	 &lg4ff_dfp_ident_info}
+};
+
+/* Compatibility mode switching commands */
+/* EXT_CMD9 - Understood by G27 and DFGT */
+static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_dfex = {
+	2,
+	{0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00,	/* Revert mode upon USB reset */
+	 0xf8, 0x09, 0x00, 0x01, 0x00, 0x00, 0x00}	/* Switch mode to DF-EX with detach */
+};
+
+static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_dfp = {
+	2,
+	{0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00,	/* Revert mode upon USB reset */
+	 0xf8, 0x09, 0x01, 0x01, 0x00, 0x00, 0x00}	/* Switch mode to DFP with detach */
+};
+
+static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_g25 = {
+	2,
+	{0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00,	/* Revert mode upon USB reset */
+	 0xf8, 0x09, 0x02, 0x01, 0x00, 0x00, 0x00}	/* Switch mode to G25 with detach */
+};
+
+static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_dfgt = {
+	2,
+	{0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00,	/* Revert mode upon USB reset */
+	 0xf8, 0x09, 0x03, 0x01, 0x00, 0x00, 0x00}	/* Switch mode to DFGT with detach */
+};
+
+static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_g27 = {
+	2,
+	{0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00,	/* Revert mode upon USB reset */
+	 0xf8, 0x09, 0x04, 0x01, 0x00, 0x00, 0x00}	/* Switch mode to G27 with detach */
+};
+
+/* EXT_CMD1 - Understood by DFP, G25, G27 and DFGT */
+static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext01_dfp = {
 	1,
 	{0xf8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00}
 };
 
-static const struct lg4ff_native_cmd native_dfgt = {
-	2,
-	{0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 1st command */
-	 0xf8, 0x09, 0x03, 0x01, 0x00, 0x00, 0x00}	/* 2nd command */
-};
-
-static const struct lg4ff_native_cmd native_g25 = {
+/* EXT_CMD16 - Understood by G25 and G27 */
+static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext16_g25 = {
 	1,
 	{0xf8, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00}
 };
 
-static const struct lg4ff_native_cmd native_g27 = {
-	2,
-	{0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 1st command */
-	 0xf8, 0x09, 0x04, 0x01, 0x00, 0x00, 0x00}	/* 2nd command */
-};
-
-static const struct lg4ff_usb_revision lg4ff_revs[] = {
-	{DFGT_REV_MAJ, DFGT_REV_MIN, &native_dfgt},	/* Driving Force GT */
-	{DFGT_REV_MAJ, DFGT2_REV_MIN, &native_dfgt},	/* Driving Force GT v2 */
-	{DFP_REV_MAJ,  DFP_REV_MIN,  &native_dfp},	/* Driving Force Pro */
-	{G25_REV_MAJ,  G25_REV_MIN,  &native_g25},	/* G25 */
-	{G27_REV_MAJ,  G27_REV_MIN,  &native_g27},	/* G27 */
-	{G27_REV_MAJ,  G27_2_REV_MIN,  &native_g27},	/* G27 v2 */
-};
-
 /* Recalculates X axis value accordingly to currently selected range */
 static __s32 lg4ff_adjust_dfp_x_axis(__s32 value, __u16 range)
 {
@@ -396,21 +510,217 @@
 	hid_hw_request(hid, report, HID_REQ_SET_REPORT);
 }
 
-static void hid_lg4ff_switch_native(struct hid_device *hid, const struct lg4ff_native_cmd *cmd)
+static const struct lg4ff_compat_mode_switch *lg4ff_get_mode_switch_command(const u16 real_product_id, const u16 target_product_id)
+{
+	switch (real_product_id) {
+	case USB_DEVICE_ID_LOGITECH_DFP_WHEEL:
+		switch (target_product_id) {
+		case USB_DEVICE_ID_LOGITECH_DFP_WHEEL:
+			return &lg4ff_mode_switch_ext01_dfp;
+		/* DFP can only be switched to its native mode */
+		default:
+			return NULL;
+		}
+		break;
+	case USB_DEVICE_ID_LOGITECH_G25_WHEEL:
+		switch (target_product_id) {
+		case USB_DEVICE_ID_LOGITECH_DFP_WHEEL:
+			return &lg4ff_mode_switch_ext01_dfp;
+		case USB_DEVICE_ID_LOGITECH_G25_WHEEL:
+			return &lg4ff_mode_switch_ext16_g25;
+		/* G25 can only be switched to DFP mode or its native mode */
+		default:
+			return NULL;
+		}
+		break;
+	case USB_DEVICE_ID_LOGITECH_G27_WHEEL:
+		switch (target_product_id) {
+		case USB_DEVICE_ID_LOGITECH_WHEEL:
+			return &lg4ff_mode_switch_ext09_dfex;
+		case USB_DEVICE_ID_LOGITECH_DFP_WHEEL:
+			return &lg4ff_mode_switch_ext09_dfp;
+		case USB_DEVICE_ID_LOGITECH_G25_WHEEL:
+			return &lg4ff_mode_switch_ext09_g25;
+		case USB_DEVICE_ID_LOGITECH_G27_WHEEL:
+			return &lg4ff_mode_switch_ext09_g27;
+		/* G27 can only be switched to DF-EX, DFP, G25 or its native mode */
+		default:
+			return NULL;
+		}
+		break;
+	case USB_DEVICE_ID_LOGITECH_DFGT_WHEEL:
+		switch (target_product_id) {
+		case USB_DEVICE_ID_LOGITECH_WHEEL:
+			return &lg4ff_mode_switch_ext09_dfex;
+		case USB_DEVICE_ID_LOGITECH_DFP_WHEEL:
+			return &lg4ff_mode_switch_ext09_dfp;
+		case USB_DEVICE_ID_LOGITECH_DFGT_WHEEL:
+			return &lg4ff_mode_switch_ext09_dfgt;
+		/* DFGT can only be switched to DF-EX, DFP or its native mode */
+		default:
+			return NULL;
+		}
+		break;
+	/* No other wheels have multiple modes */
+	default:
+		return NULL;
+	}
+}
+
+static int lg4ff_switch_compatibility_mode(struct hid_device *hid, const struct lg4ff_compat_mode_switch *s)
 {
 	struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
 	struct hid_report *report = list_entry(report_list->next, struct hid_report, list);
-	__u8 i, j;
+	__s32 *value = report->field[0]->value;
+	u8 i;
 
-	j = 0;
-	while (j < 7*cmd->cmd_num) {
-		for (i = 0; i < 7; i++)
-			report->field[0]->value[i] = cmd->cmd[j++];
+	for (i = 0; i < s->cmd_count; i++) {
+		u8 j;
+
+		for (j = 0; j < 7; j++)
+			value[j] = s->cmd[j + (7*i)];
 
 		hid_hw_request(hid, report, HID_REQ_SET_REPORT);
 	}
+	hid_hw_wait(hid);
+	return 0;
 }
 
+static ssize_t lg4ff_alternate_modes_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hid_device *hid = to_hid_device(dev);
+	struct lg4ff_device_entry *entry;
+	struct lg_drv_data *drv_data;
+	ssize_t count = 0;
+	int i;
+
+	drv_data = hid_get_drvdata(hid);
+	if (!drv_data) {
+		hid_err(hid, "Private driver data not found!\n");
+		return 0;
+	}
+
+	entry = drv_data->device_props;
+	if (!entry) {
+		hid_err(hid, "Device properties not found!\n");
+		return 0;
+	}
+
+	if (!entry->real_name) {
+		hid_err(hid, "NULL pointer to string\n");
+		return 0;
+	}
+
+	for (i = 0; i < LG4FF_MODE_MAX_IDX; i++) {
+		if (entry->alternate_modes & BIT(i)) {
+			/* Print tag and full name */
+			count += scnprintf(buf + count, PAGE_SIZE - count, "%s: %s",
+					   lg4ff_alternate_modes[i].tag,
+					   !lg4ff_alternate_modes[i].product_id ? entry->real_name : lg4ff_alternate_modes[i].name);
+			if (count >= PAGE_SIZE - 1)
+				return count;
+
+			/* Mark the currently active mode with an asterisk */
+			if (lg4ff_alternate_modes[i].product_id == entry->product_id ||
+			    (lg4ff_alternate_modes[i].product_id == 0 && entry->product_id == entry->real_product_id))
+				count += scnprintf(buf + count, PAGE_SIZE - count, " *\n");
+			else
+				count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
+
+			if (count >= PAGE_SIZE - 1)
+				return count;
+		}
+	}
+
+	return count;
+}
+
+static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct hid_device *hid = to_hid_device(dev);
+	struct lg4ff_device_entry *entry;
+	struct lg_drv_data *drv_data;
+	const struct lg4ff_compat_mode_switch *s;
+	u16 target_product_id = 0;
+	int i, ret;
+	char *lbuf;
+
+	drv_data = hid_get_drvdata(hid);
+	if (!drv_data) {
+		hid_err(hid, "Private driver data not found!\n");
+		return -EINVAL;
+	}
+
+	entry = drv_data->device_props;
+	if (!entry) {
+		hid_err(hid, "Device properties not found!\n");
+		return -EINVAL;
+	}
+
+	/* Allow \n at the end of the input parameter */
+	lbuf = kasprintf(GFP_KERNEL, "%s", buf);
+	if (!lbuf)
+		return -ENOMEM;
+
+	i = strlen(lbuf);
+	if (lbuf[i-1] == '\n') {
+		if (i == 1) {
+			kfree(lbuf);
+			return -EINVAL;
+		}
+		lbuf[i-1] = '\0';
+	}
+
+	for (i = 0; i < LG4FF_MODE_MAX_IDX; i++) {
+		const u16 mode_product_id = lg4ff_alternate_modes[i].product_id;
+		const char *tag = lg4ff_alternate_modes[i].tag;
+
+		if (entry->alternate_modes & BIT(i)) {
+			if (!strcmp(tag, lbuf)) {
+				if (!mode_product_id)
+					target_product_id = entry->real_product_id;
+				else
+					target_product_id = mode_product_id;
+				break;
+			}
+		}
+	}
+
+	if (i == LG4FF_MODE_MAX_IDX) {
+		hid_info(hid, "Requested mode \"%s\" is not supported by the device\n", lbuf);
+		kfree(lbuf);
+		return -EINVAL;
+	}
+	kfree(lbuf); /* Not needed anymore */
+
+	if (target_product_id == entry->product_id) /* Nothing to do */
+		return count;
+
+	/* Automatic switching has to be disabled for the switch to DF-EX mode to work correctly */
+	if (target_product_id == USB_DEVICE_ID_LOGITECH_WHEEL && !lg4ff_no_autoswitch) {
+		hid_info(hid, "\"%s\" cannot be switched to \"DF-EX\" mode. Load the \"hid_logitech\" module with \"lg4ff_no_autoswitch=1\" parameter set and try again\n",
+			 entry->real_name);
+		return -EINVAL;
+	}
+
+	/* Take care of hardware limitations */
+	if ((entry->real_product_id == USB_DEVICE_ID_LOGITECH_DFP_WHEEL || entry->real_product_id == USB_DEVICE_ID_LOGITECH_G25_WHEEL) &&
+	    entry->product_id > target_product_id) {
+		hid_info(hid, "\"%s\" cannot be switched back into \"%s\" mode\n", entry->real_name, lg4ff_alternate_modes[i].name);
+		return -EINVAL;
+	}
+
+	s = lg4ff_get_mode_switch_command(entry->real_product_id, target_product_id);
+	if (!s) {
+		hid_err(hid, "Invalid target product ID %X\n", target_product_id);
+		return -EINVAL;
+	}
+
+	ret = lg4ff_switch_compatibility_mode(hid, s);
+	return (ret == 0 ? count : ret);
+}
+static DEVICE_ATTR(alternate_modes, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, lg4ff_alternate_modes_show, lg4ff_alternate_modes_store);
+
 /* Read current range and display it in terminal */
 static ssize_t range_show(struct device *dev, struct device_attribute *attr,
 			  char *buf)
@@ -472,6 +782,41 @@
 }
 static DEVICE_ATTR_RW(range);
 
+static ssize_t lg4ff_real_id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hid_device *hid = to_hid_device(dev);
+	struct lg4ff_device_entry *entry;
+	struct lg_drv_data *drv_data;
+	size_t count;
+
+	drv_data = hid_get_drvdata(hid);
+	if (!drv_data) {
+		hid_err(hid, "Private driver data not found!\n");
+		return 0;
+	}
+
+	entry = drv_data->device_props;
+	if (!entry) {
+		hid_err(hid, "Device properties not found!\n");
+		return 0;
+	}
+
+	if (!entry->real_tag || !entry->real_name) {
+		hid_err(hid, "NULL pointer to string\n");
+		return 0;
+	}
+
+	count = scnprintf(buf, PAGE_SIZE, "%s: %s\n", entry->real_tag, entry->real_name);
+	return count;
+}
+
+static ssize_t lg4ff_real_id_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	/* Real ID is a read-only value */
+	return -EPERM;
+}
+static DEVICE_ATTR(real_id, S_IRUGO, lg4ff_real_id_show, lg4ff_real_id_store);
+
 #ifdef CONFIG_LEDS_CLASS
 static void lg4ff_set_leds(struct hid_device *hid, __u8 leds)
 {
@@ -555,20 +900,119 @@
 }
 #endif
 
+static u16 lg4ff_identify_multimode_wheel(struct hid_device *hid, const u16 reported_product_id, const u16 bcdDevice)
+{
+	const struct lg4ff_wheel_ident_checklist *checklist;
+	int i, from_idx, to_idx;
+
+	switch (reported_product_id) {
+	case USB_DEVICE_ID_LOGITECH_WHEEL:
+	case USB_DEVICE_ID_LOGITECH_DFP_WHEEL:
+		checklist = &lg4ff_main_checklist;
+		from_idx = 0;
+		to_idx = checklist->count - 1;
+		break;
+	case USB_DEVICE_ID_LOGITECH_G25_WHEEL:
+		checklist = &lg4ff_main_checklist;
+		from_idx = 0;
+		to_idx = checklist->count - 2; /* End identity check at G25 */
+		break;
+	case USB_DEVICE_ID_LOGITECH_G27_WHEEL:
+		checklist = &lg4ff_main_checklist;
+		from_idx = 1; /* Start identity check at G27 */
+		to_idx = checklist->count - 3; /* End identity check at G27 */
+		break;
+	case USB_DEVICE_ID_LOGITECH_DFGT_WHEEL:
+		checklist = &lg4ff_main_checklist;
+		from_idx = 0;
+		to_idx = checklist->count - 4; /* End identity check at DFGT */
+		break;
+	default:
+		return 0;
+	}
+
+	for (i = from_idx; i <= to_idx; i++) {
+		const u16 mask = checklist->models[i]->mask;
+		const u16 result = checklist->models[i]->result;
+		const u16 real_product_id = checklist->models[i]->real_product_id;
+
+		if ((bcdDevice & mask) == result) {
+			dbg_hid("Found wheel with real PID %X whose reported PID is %X\n", real_product_id, reported_product_id);
+			return real_product_id;
+		}
+	}
+
+	/* No match found. This is either Driving Force or an unknown
+	 * wheel model, do not touch it */
+	dbg_hid("Wheel with bcdDevice %X was not recognized as multimode wheel, leaving in its current mode\n", bcdDevice);
+	return 0;
+}
+
+static int lg4ff_handle_multimode_wheel(struct hid_device *hid, u16 *real_product_id, const u16 bcdDevice)
+{
+	const u16 reported_product_id = hid->product;
+	int ret;
+
+	*real_product_id = lg4ff_identify_multimode_wheel(hid, reported_product_id, bcdDevice);
+	/* Probed wheel is not a multimode wheel */
+	if (!*real_product_id) {
+		*real_product_id = reported_product_id;
+		dbg_hid("Wheel is not a multimode wheel\n");
+		return LG4FF_MMODE_NOT_MULTIMODE;
+	}
+
+	/* Switch from "Driving Force" mode to native mode automatically.
+	 * Otherwise keep the wheel in its current mode */
+	if (reported_product_id == USB_DEVICE_ID_LOGITECH_WHEEL &&
+	    reported_product_id != *real_product_id &&
+	    !lg4ff_no_autoswitch) {
+		const struct lg4ff_compat_mode_switch *s = lg4ff_get_mode_switch_command(*real_product_id, *real_product_id);
+
+		if (!s) {
+			hid_err(hid, "Invalid product id %X\n", *real_product_id);
+			return LG4FF_MMODE_NOT_MULTIMODE;
+		}
+
+		ret = lg4ff_switch_compatibility_mode(hid, s);
+		if (ret) {
+			/* Wheel could not have been switched to native mode,
+			 * leave it in "Driving Force" mode and continue */
+			hid_err(hid, "Unable to switch wheel mode, errno %d\n", ret);
+			return LG4FF_MMODE_IS_MULTIMODE;
+		}
+		return LG4FF_MMODE_SWITCHED;
+	}
+
+	return LG4FF_MMODE_IS_MULTIMODE;
+}
+
+
 int lg4ff_init(struct hid_device *hid)
 {
 	struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
 	struct input_dev *dev = hidinput->input;
+	const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor);
+	const u16 bcdDevice = le16_to_cpu(udesc->bcdDevice);
 	struct lg4ff_device_entry *entry;
 	struct lg_drv_data *drv_data;
-	struct usb_device_descriptor *udesc;
 	int error, i, j;
-	__u16 bcdDevice, rev_maj, rev_min;
+	int mmode_ret, mmode_idx = -1;
+	u16 real_product_id;
 
 	/* Check that the report looks ok */
 	if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7))
 		return -1;
 
+	/* Check if a multimode wheel has been connected and
+	 * handle it appropriately */
+	mmode_ret = lg4ff_handle_multimode_wheel(hid, &real_product_id, bcdDevice);
+
+	/* Wheel has been told to switch to native mode. There is no point in going on
+	 * with the initialization as the wheel will do a USB reset when it switches mode
+	 */
+	if (mmode_ret == LG4FF_MMODE_SWITCHED)
+		return 0;
+
 	/* Check what wheel has been connected */
 	for (i = 0; i < ARRAY_SIZE(lg4ff_devices); i++) {
 		if (hid->product == lg4ff_devices[i].product_id) {
@@ -583,25 +1027,15 @@
 		return -1;
 	}
 
-	/* Attempt to switch wheel to native mode when applicable */
-	udesc = &(hid_to_usb_dev(hid)->descriptor);
-	if (!udesc) {
-		hid_err(hid, "NULL USB device descriptor\n");
-		return -1;
-	}
-	bcdDevice = le16_to_cpu(udesc->bcdDevice);
-	rev_maj = bcdDevice >> 8;
-	rev_min = bcdDevice & 0xff;
+	if (mmode_ret == LG4FF_MMODE_IS_MULTIMODE) {
+		for (mmode_idx = 0; mmode_idx < ARRAY_SIZE(lg4ff_multimode_wheels); mmode_idx++) {
+			if (real_product_id == lg4ff_multimode_wheels[mmode_idx].product_id)
+				break;
+		}
 
-	if (lg4ff_devices[i].product_id == USB_DEVICE_ID_LOGITECH_WHEEL) {
-		dbg_hid("Generic wheel detected, can it do native?\n");
-		dbg_hid("USB revision: %2x.%02x\n", rev_maj, rev_min);
-
-		for (j = 0; j < ARRAY_SIZE(lg4ff_revs); j++) {
-			if (lg4ff_revs[j].rev_maj == rev_maj && lg4ff_revs[j].rev_min == rev_min) {
-				hid_lg4ff_switch_native(hid, lg4ff_revs[j].command);
-				hid_info(hid, "Switched to native mode\n");
-			}
+		if (mmode_idx == ARRAY_SIZE(lg4ff_multimode_wheels)) {
+			hid_err(hid, "Device product ID %X is not listed as a multimode wheel", real_product_id);
+			return -1;
 		}
 	}
 
@@ -630,14 +1064,23 @@
 	drv_data->device_props = entry;
 
 	entry->product_id = lg4ff_devices[i].product_id;
+	entry->real_product_id = real_product_id;
 	entry->min_range = lg4ff_devices[i].min_range;
 	entry->max_range = lg4ff_devices[i].max_range;
 	entry->set_range = lg4ff_devices[i].set_range;
+	if (mmode_ret == LG4FF_MMODE_IS_MULTIMODE) {
+		BUG_ON(mmode_idx == -1);
+		entry->alternate_modes = lg4ff_multimode_wheels[mmode_idx].alternate_modes;
+		entry->real_tag = lg4ff_multimode_wheels[mmode_idx].real_tag;
+		entry->real_name = lg4ff_multimode_wheels[mmode_idx].real_name;
+	}
 
 	/* Check if autocentering is available and
 	 * set the centering force to zero by default */
 	if (test_bit(FF_AUTOCENTER, dev->ffbit)) {
-		if (rev_maj == FFEX_REV_MAJ && rev_min == FFEX_REV_MIN)	/* Formula Force EX expects different autocentering command */
+		/* Formula Force EX expects different autocentering command */
+		if ((bcdDevice >> 8) == LG4FF_FFEX_REV_MAJ &&
+		    (bcdDevice & 0xff) == LG4FF_FFEX_REV_MIN)
 			dev->ff->set_autocenter = hid_lg4ff_set_autocenter_ffex;
 		else
 			dev->ff->set_autocenter = hid_lg4ff_set_autocenter_default;
@@ -649,6 +1092,14 @@
 	error = device_create_file(&hid->dev, &dev_attr_range);
 	if (error)
 		return error;
+	if (mmode_ret == LG4FF_MMODE_IS_MULTIMODE) {
+		error = device_create_file(&hid->dev, &dev_attr_real_id);
+		if (error)
+			return error;
+		error = device_create_file(&hid->dev, &dev_attr_alternate_modes);
+		if (error)
+			return error;
+	}
 	dbg_hid("sysfs interface created\n");
 
 	/* Set the maximum range to start with */
@@ -711,24 +1162,26 @@
 	return 0;
 }
 
-
-
 int lg4ff_deinit(struct hid_device *hid)
 {
 	struct lg4ff_device_entry *entry;
 	struct lg_drv_data *drv_data;
 
-	device_remove_file(&hid->dev, &dev_attr_range);
-
 	drv_data = hid_get_drvdata(hid);
 	if (!drv_data) {
 		hid_err(hid, "Error while deinitializing device, no private driver data.\n");
 		return -1;
 	}
 	entry = drv_data->device_props;
-	if (!entry) {
-		hid_err(hid, "Error while deinitializing device, no device properties data.\n");
-		return -1;
+	if (!entry)
+		goto out; /* Nothing more to do */
+
+	device_remove_file(&hid->dev, &dev_attr_range);
+
+	/* Multimode devices will have at least the "MODE_NATIVE" bit set */
+	if (entry->alternate_modes) {
+		device_remove_file(&hid->dev, &dev_attr_real_id);
+		device_remove_file(&hid->dev, &dev_attr_alternate_modes);
 	}
 
 #ifdef CONFIG_LEDS_CLASS
@@ -752,6 +1205,7 @@
 	/* Deallocate memory */
 	kfree(entry);
 
+out:
 	dbg_hid("Device successfully unregistered\n");
 	return 0;
 }
diff --git a/drivers/hid/hid-lg4ff.h b/drivers/hid/hid-lg4ff.h
new file mode 100644
index 0000000..5b6a508
--- /dev/null
+++ b/drivers/hid/hid-lg4ff.h
@@ -0,0 +1,18 @@
+#ifndef __HID_LG4FF_H
+#define __HID_LG4FF_H
+
+#ifdef CONFIG_LOGIWHEELS_FF
+extern int lg4ff_no_autoswitch; /* From hid-lg.c */
+
+int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field,
+			     struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data);
+int lg4ff_init(struct hid_device *hdev);
+int lg4ff_deinit(struct hid_device *hdev);
+#else
+static inline int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field,
+					   struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data) { return 0; }
+static inline int lg4ff_init(struct hid_device *hdev) { return -1; }
+static inline int lg4ff_deinit(struct hid_device *hdev) { return -1; }
+#endif
+
+#endif
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index e77658c..b3cf6fd 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -28,6 +28,11 @@
 MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
 MODULE_AUTHOR("Nestor Lopez Casado <nlopezcasad@logitech.com>");
 
+static bool disable_raw_mode;
+module_param(disable_raw_mode, bool, 0644);
+MODULE_PARM_DESC(disable_raw_mode,
+	"Disable Raw mode reporting for touchpads and keep firmware gestures.");
+
 #define REPORT_ID_HIDPP_SHORT			0x10
 #define REPORT_ID_HIDPP_LONG			0x11
 
@@ -1188,6 +1193,11 @@
 
 	hidpp->quirks = id->driver_data;
 
+	if (disable_raw_mode) {
+		hidpp->quirks &= ~HIDPP_QUIRK_CLASS_WTP;
+		hidpp->quirks &= ~HIDPP_QUIRK_DELAYED_INIT;
+	}
+
 	if (hidpp->quirks & HIDPP_QUIRK_CLASS_WTP) {
 		ret = wtp_allocate(hdev, id);
 		if (ret)
@@ -1210,6 +1220,7 @@
 	connected = hidpp_is_connected(hidpp);
 	if (id->group != HID_GROUP_LOGITECH_DJ_DEVICE) {
 		if (!connected) {
+			ret = -ENODEV;
 			hid_err(hdev, "Device not connected");
 			hid_device_io_stop(hdev);
 			goto hid_parse_fail;
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index f65e78b..6a9b05b 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -42,7 +42,6 @@
 #include <linux/hid.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/usb.h>
 #include <linux/input/mt.h>
 #include <linux/string.h>
 
@@ -72,6 +71,8 @@
 #define MT_INPUTMODE_TOUCHSCREEN	0x02
 #define MT_INPUTMODE_TOUCHPAD		0x03
 
+#define MT_BUTTONTYPE_CLICKPAD		0
+
 struct mt_slot {
 	__s32 x, y, cx, cy, p, w, h;
 	__s32 contactid;	/* the device ContactID assigned to this slot */
@@ -116,6 +117,8 @@
 	__u8 touches_by_report;	/* how many touches are present in one report:
 				* 1 means we should use a serial protocol
 				* > 1 means hybrid (multitouch) protocol */
+	__u8 buttons_count;	/* number of physical buttons per touchpad */
+	bool is_buttonpad;	/* is this device a button pad? */
 	bool serial_maybe;	/* need to check for serial protocol */
 	bool curvalid;		/* is the current contact valid? */
 	unsigned mt_flags;	/* flags to pass to input-mt */
@@ -334,6 +337,16 @@
 			td->maxcontacts = td->mtclass.maxcontacts;
 
 		break;
+	case HID_DG_BUTTONTYPE:
+		if (usage->usage_index >= field->report_count) {
+			dev_err(&hdev->dev, "HID_DG_BUTTONTYPE out of range\n");
+			break;
+		}
+
+		if (field->value[usage->usage_index] == MT_BUTTONTYPE_CLICKPAD)
+			td->is_buttonpad = true;
+
+		break;
 	}
 }
 
@@ -379,6 +392,10 @@
 		td->inputmode_value = MT_INPUTMODE_TOUCHPAD;
 	}
 
+	/* count the buttons on touchpads */
+	if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON)
+		td->buttons_count++;
+
 	if (usage->usage_index)
 		prev_usage = &field->usage[usage->usage_index - 1];
 
@@ -728,6 +745,13 @@
 	if (cls->quirks & MT_QUIRK_NOT_SEEN_MEANS_UP)
 		td->mt_flags |= INPUT_MT_DROP_UNUSED;
 
+	/* check for clickpads */
+	if ((td->mt_flags & INPUT_MT_POINTER) && (td->buttons_count == 1))
+		td->is_buttonpad = true;
+
+	if (td->is_buttonpad)
+		__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
+
 	input_mt_init_slots(input, td->maxcontacts, td->mt_flags);
 
 	td->mt_flags = 0;
diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c
index 49d4fe4..368ffdf 100644
--- a/drivers/hid/hid-rmi.c
+++ b/drivers/hid/hid-rmi.c
@@ -104,6 +104,7 @@
 
 	unsigned long flags;
 
+	struct rmi_function f01;
 	struct rmi_function f11;
 	struct rmi_function f30;
 
@@ -124,6 +125,7 @@
 	struct hid_device *hdev;
 
 	unsigned long device_flags;
+	unsigned long firmware_id;
 };
 
 #define RMI_PAGE(addr) (((addr) >> 8) & 0xff)
@@ -272,6 +274,46 @@
 	return rmi_read_block(hdev, addr, buf, 1);
 }
 
+static int rmi_write_block(struct hid_device *hdev, u16 addr, void *buf,
+		const int len)
+{
+	struct rmi_data *data = hid_get_drvdata(hdev);
+	int ret;
+
+	mutex_lock(&data->page_mutex);
+
+	if (RMI_PAGE(addr) != data->page) {
+		ret = rmi_set_page(hdev, RMI_PAGE(addr));
+		if (ret < 0)
+			goto exit;
+	}
+
+	data->writeReport[0] = RMI_WRITE_REPORT_ID;
+	data->writeReport[1] = len;
+	data->writeReport[2] = addr & 0xFF;
+	data->writeReport[3] = (addr >> 8) & 0xFF;
+	memcpy(&data->writeReport[4], buf, len);
+
+	ret = rmi_write_report(hdev, data->writeReport,
+					data->output_report_size);
+	if (ret < 0) {
+		dev_err(&hdev->dev,
+			"failed to write request output report (%d)\n",
+			ret);
+		goto exit;
+	}
+	ret = 0;
+
+exit:
+	mutex_unlock(&data->page_mutex);
+	return ret;
+}
+
+static inline int rmi_write(struct hid_device *hdev, u16 addr, void *buf)
+{
+	return rmi_write_block(hdev, addr, buf, 1);
+}
+
 static void rmi_f11_process_touch(struct rmi_data *hdata, int slot,
 		u8 finger_state, u8 *touch_data)
 {
@@ -532,6 +574,9 @@
 	u16 page_base = page << 8;
 
 	switch (pdt_entry->function_number) {
+	case 0x01:
+		f = &data->f01;
+		break;
 	case 0x11:
 		f = &data->f11;
 		break;
@@ -604,6 +649,92 @@
 	return retval;
 }
 
+#define RMI_DEVICE_F01_BASIC_QUERY_LEN	11
+
+static int rmi_populate_f01(struct hid_device *hdev)
+{
+	struct rmi_data *data = hid_get_drvdata(hdev);
+	u8 basic_queries[RMI_DEVICE_F01_BASIC_QUERY_LEN];
+	u8 info[3];
+	int ret;
+	bool has_query42;
+	bool has_lts;
+	bool has_sensor_id;
+	bool has_ds4_queries = false;
+	bool has_build_id_query = false;
+	bool has_package_id_query = false;
+	u16 query_offset = data->f01.query_base_addr;
+	u16 prod_info_addr;
+	u8 ds4_query_len;
+
+	ret = rmi_read_block(hdev, query_offset, basic_queries,
+				RMI_DEVICE_F01_BASIC_QUERY_LEN);
+	if (ret) {
+		hid_err(hdev, "Can not read basic queries from Function 0x1.\n");
+		return ret;
+	}
+
+	has_lts = !!(basic_queries[0] & BIT(2));
+	has_sensor_id = !!(basic_queries[1] & BIT(3));
+	has_query42 = !!(basic_queries[1] & BIT(7));
+
+	query_offset += 11;
+	prod_info_addr = query_offset + 6;
+	query_offset += 10;
+
+	if (has_lts)
+		query_offset += 20;
+
+	if (has_sensor_id)
+		query_offset++;
+
+	if (has_query42) {
+		ret = rmi_read(hdev, query_offset, info);
+		if (ret) {
+			hid_err(hdev, "Can not read query42.\n");
+			return ret;
+		}
+		has_ds4_queries = !!(info[0] & BIT(0));
+		query_offset++;
+	}
+
+	if (has_ds4_queries) {
+		ret = rmi_read(hdev, query_offset, &ds4_query_len);
+		if (ret) {
+			hid_err(hdev, "Can not read DS4 Query length.\n");
+			return ret;
+		}
+		query_offset++;
+
+		if (ds4_query_len > 0) {
+			ret = rmi_read(hdev, query_offset, info);
+			if (ret) {
+				hid_err(hdev, "Can not read DS4 query.\n");
+				return ret;
+			}
+
+			has_package_id_query = !!(info[0] & BIT(0));
+			has_build_id_query = !!(info[0] & BIT(1));
+		}
+	}
+
+	if (has_package_id_query)
+		prod_info_addr++;
+
+	if (has_build_id_query) {
+		ret = rmi_read_block(hdev, prod_info_addr, info, 3);
+		if (ret) {
+			hid_err(hdev, "Can not read product info.\n");
+			return ret;
+		}
+
+		data->firmware_id = info[1] << 8 | info[0];
+		data->firmware_id += info[2] * 65536;
+	}
+
+	return 0;
+}
+
 static int rmi_populate_f11(struct hid_device *hdev)
 {
 	struct rmi_data *data = hid_get_drvdata(hdev);
@@ -620,6 +751,8 @@
 	bool has_gestures;
 	bool has_rel;
 	bool has_data40 = false;
+	bool has_dribble = false;
+	bool has_palm_detect = false;
 	unsigned x_size, y_size;
 	u16 query_offset;
 
@@ -661,6 +794,14 @@
 	has_rel = !!(buf[0] & BIT(3));
 	has_gestures = !!(buf[0] & BIT(5));
 
+	ret = rmi_read(hdev, data->f11.query_base_addr + 5, buf);
+	if (ret) {
+		hid_err(hdev, "can not get absolute data sources: %d.\n", ret);
+		return ret;
+	}
+
+	has_dribble = !!(buf[0] & BIT(4));
+
 	/*
 	 * At least 4 queries are guaranteed to be present in F11
 	 * +1 for query 5 which is present since absolute events are
@@ -680,6 +821,7 @@
 				ret);
 			return ret;
 		}
+		has_palm_detect = !!(buf[0] & BIT(0));
 		has_query10 = !!(buf[0] & BIT(2));
 
 		query_offset += 2; /* query 7 and 8 are present */
@@ -766,17 +908,38 @@
 	 * retrieve the ctrl registers
 	 * the ctrl register has a size of 20 but a fw bug split it into 16 + 4,
 	 * and there is no way to know if the first 20 bytes are here or not.
-	 * We use only the first 10 bytes, so get only them.
+	 * We use only the first 12 bytes, so get only them.
 	 */
-	ret = rmi_read_block(hdev, data->f11.control_base_addr, buf, 10);
+	ret = rmi_read_block(hdev, data->f11.control_base_addr, buf, 12);
 	if (ret) {
-		hid_err(hdev, "can not read ctrl block of size 10: %d.\n", ret);
+		hid_err(hdev, "can not read ctrl block of size 11: %d.\n", ret);
 		return ret;
 	}
 
 	data->max_x = buf[6] | (buf[7] << 8);
 	data->max_y = buf[8] | (buf[9] << 8);
 
+	if (has_dribble) {
+		buf[0] = buf[0] & ~BIT(6);
+		ret = rmi_write(hdev, data->f11.control_base_addr, buf);
+		if (ret) {
+			hid_err(hdev, "can not write to control reg 0: %d.\n",
+				ret);
+			return ret;
+		}
+	}
+
+	if (has_palm_detect) {
+		buf[11] = buf[11] & ~BIT(0);
+		ret = rmi_write(hdev, data->f11.control_base_addr + 11,
+				&buf[11]);
+		if (ret) {
+			hid_err(hdev, "can not write to control reg 11: %d.\n",
+				ret);
+			return ret;
+		}
+	}
+
 	return 0;
 }
 
@@ -858,6 +1021,12 @@
 		return ret;
 	}
 
+	ret = rmi_populate_f01(hdev);
+	if (ret) {
+		hid_err(hdev, "Error while initializing F01 (%d).\n", ret);
+		return ret;
+	}
+
 	ret = rmi_populate_f11(hdev);
 	if (ret) {
 		hid_err(hdev, "Error while initializing F11 (%d).\n", ret);
@@ -907,6 +1076,8 @@
 	if (ret)
 		goto exit;
 
+	hid_info(hdev, "firmware id: %ld\n", data->firmware_id);
+
 	__set_bit(EV_ABS, input->evbit);
 	input_set_abs_params(input, ABS_MT_POSITION_X, 1, data->max_x, 0, 0);
 	input_set_abs_params(input, ABS_MT_POSITION_Y, 1, data->max_y, 0, 0);
diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
new file mode 100644
index 0000000..5614fee
--- /dev/null
+++ b/drivers/hid/hid-sensor-custom.c
@@ -0,0 +1,849 @@
+/*
+ * hid-sensor-custom.c
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/kfifo.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/bsearch.h>
+#include <linux/platform_device.h>
+#include <linux/hid-sensor-hub.h>
+
+#define HID_CUSTOM_NAME_LENGTH		64
+#define HID_CUSTOM_MAX_CORE_ATTRS	10
+#define HID_CUSTOM_TOTAL_ATTRS		(HID_CUSTOM_MAX_CORE_ATTRS + 1)
+#define HID_CUSTOM_FIFO_SIZE		4096
+#define HID_CUSTOM_MAX_FEATURE_BYTES	64
+
+struct hid_sensor_custom_field {
+	int report_id;
+	char group_name[HID_CUSTOM_NAME_LENGTH];
+	struct hid_sensor_hub_attribute_info attribute;
+	struct device_attribute sd_attrs[HID_CUSTOM_MAX_CORE_ATTRS];
+	char attr_name[HID_CUSTOM_TOTAL_ATTRS][HID_CUSTOM_NAME_LENGTH];
+	struct attribute *attrs[HID_CUSTOM_TOTAL_ATTRS];
+	struct attribute_group hid_custom_attribute_group;
+};
+
+struct hid_sensor_custom {
+	struct mutex mutex;
+	struct platform_device *pdev;
+	struct hid_sensor_hub_device *hsdev;
+	struct hid_sensor_hub_callbacks callbacks;
+	int sensor_field_count;
+	struct hid_sensor_custom_field *fields;
+	int input_field_count;
+	int input_report_size;
+	int input_report_recd_size;
+	bool input_skip_sample;
+	bool enable;
+	struct hid_sensor_custom_field *power_state;
+	struct hid_sensor_custom_field *report_state;
+	struct miscdevice custom_dev;
+	struct kfifo data_fifo;
+	unsigned long misc_opened;
+	wait_queue_head_t wait;
+};
+
+/* Header for each sample to user space via dev interface */
+struct hid_sensor_sample {
+	u32 usage_id;
+	u64 timestamp;
+	u32 raw_len;
+} __packed;
+
+static struct attribute hid_custom_attrs[] = {
+	{.name = "name", .mode = S_IRUGO},
+	{.name = "units", .mode = S_IRUGO},
+	{.name = "unit-expo", .mode = S_IRUGO},
+	{.name = "minimum", .mode = S_IRUGO},
+	{.name = "maximum", .mode = S_IRUGO},
+	{.name = "size", .mode = S_IRUGO},
+	{.name = "value", .mode = S_IWUSR | S_IRUGO},
+	{.name = NULL}
+};
+
+static const struct hid_custom_usage_desc {
+	int usage_id;
+	char *desc;
+} hid_custom_usage_desc_table[] = {
+	{0x200201,	"event-sensor-state"},
+	{0x200202,	"event-sensor-event"},
+	{0x200301,	"property-friendly-name"},
+	{0x200302,	"property-persistent-unique-id"},
+	{0x200303,	"property-sensor-status"},
+	{0x200304,	"property-min-report-interval"},
+	{0x200305,	"property-sensor-manufacturer"},
+	{0x200306,	"property-sensor-model"},
+	{0x200307,	"property-sensor-serial-number"},
+	{0x200308,	"property-sensor-description"},
+	{0x200309,	"property-sensor-connection-type"},
+	{0x20030A,	"property-sensor-device-path"},
+	{0x20030B,	"property-hardware-revision"},
+	{0x20030C,	"property-firmware-version"},
+	{0x20030D,	"property-release-date"},
+	{0x20030E,	"property-report-interval"},
+	{0x20030F,	"property-change-sensitivity-absolute"},
+	{0x200310,	"property-change-sensitivity-percent-range"},
+	{0x200311,	"property-change-sensitivity-percent-relative"},
+	{0x200312,	"property-accuracy"},
+	{0x200313,	"property-resolution"},
+	{0x200314,	"property-maximum"},
+	{0x200315,	"property-minimum"},
+	{0x200316,	"property-reporting-state"},
+	{0x200317,	"property-sampling-rate"},
+	{0x200318,	"property-response-curve"},
+	{0x200319,	"property-power-state"},
+	{0x200540,	"data-field-custom"},
+	{0x200541,	"data-field-custom-usage"},
+	{0x200542,	"data-field-custom-boolean-array"},
+	{0x200543,	"data-field-custom-value"},
+	{0x200544,	"data-field-custom-value_1"},
+	{0x200545,	"data-field-custom-value_2"},
+	{0x200546,	"data-field-custom-value_3"},
+	{0x200547,	"data-field-custom-value_4"},
+	{0x200548,	"data-field-custom-value_5"},
+	{0x200549,	"data-field-custom-value_6"},
+	{0x20054A,	"data-field-custom-value_7"},
+	{0x20054B,	"data-field-custom-value_8"},
+	{0x20054C,	"data-field-custom-value_9"},
+	{0x20054D,	"data-field-custom-value_10"},
+	{0x20054E,	"data-field-custom-value_11"},
+	{0x20054F,	"data-field-custom-value_12"},
+	{0x200550,	"data-field-custom-value_13"},
+	{0x200551,	"data-field-custom-value_14"},
+	{0x200552,	"data-field-custom-value_15"},
+	{0x200553,	"data-field-custom-value_16"},
+	{0x200554,	"data-field-custom-value_17"},
+	{0x200555,	"data-field-custom-value_18"},
+	{0x200556,	"data-field-custom-value_19"},
+	{0x200557,	"data-field-custom-value_20"},
+	{0x200558,	"data-field-custom-value_21"},
+	{0x200559,	"data-field-custom-value_22"},
+	{0x20055A,	"data-field-custom-value_23"},
+	{0x20055B,	"data-field-custom-value_24"},
+	{0x20055C,	"data-field-custom-value_25"},
+	{0x20055D,	"data-field-custom-value_26"},
+	{0x20055E,	"data-field-custom-value_27"},
+	{0x20055F,	"data-field-custom-value_28"},
+};
+
+static int usage_id_cmp(const void *p1, const void *p2)
+{
+	if (*(int *)p1 < *(int *)p2)
+		return -1;
+
+	if (*(int *)p1 > *(int *)p2)
+		return 1;
+
+	return 0;
+}
+
+static ssize_t enable_sensor_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev);
+
+	return sprintf(buf, "%d\n", sensor_inst->enable);
+}
+
+static int set_power_report_state(struct hid_sensor_custom *sensor_inst,
+				  bool state)
+{
+	int power_val = -1;
+	int report_val = -1;
+	u32 power_state_usage_id;
+	u32 report_state_usage_id;
+	int ret;
+
+	/*
+	 * It is possible that the power/report state ids are not present.
+	 * In this case this function will return success. But if the
+	 * ids are present, then it will return error if set fails.
+	 */
+	if (state) {
+		power_state_usage_id =
+			HID_USAGE_SENSOR_PROP_POWER_STATE_D0_FULL_POWER_ENUM;
+		report_state_usage_id =
+			HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM;
+	} else {
+		power_state_usage_id =
+			HID_USAGE_SENSOR_PROP_POWER_STATE_D4_POWER_OFF_ENUM;
+		report_state_usage_id =
+			HID_USAGE_SENSOR_PROP_REPORTING_STATE_NO_EVENTS_ENUM;
+	}
+
+	if (sensor_inst->power_state)
+		power_val = hid_sensor_get_usage_index(sensor_inst->hsdev,
+				sensor_inst->power_state->attribute.report_id,
+				sensor_inst->power_state->attribute.index,
+				power_state_usage_id);
+	if (sensor_inst->report_state)
+		report_val = hid_sensor_get_usage_index(sensor_inst->hsdev,
+				sensor_inst->report_state->attribute.report_id,
+				sensor_inst->report_state->attribute.index,
+				report_state_usage_id);
+
+	if (power_val >= 0) {
+		power_val +=
+			sensor_inst->power_state->attribute.logical_minimum;
+		ret = sensor_hub_set_feature(sensor_inst->hsdev,
+				sensor_inst->power_state->attribute.report_id,
+				sensor_inst->power_state->attribute.index,
+				sizeof(power_val),
+				&power_val);
+		if (ret) {
+			hid_err(sensor_inst->hsdev->hdev,
+				"Set power state failed\n");
+			return ret;
+		}
+	}
+
+	if (report_val >= 0) {
+		report_val +=
+			sensor_inst->report_state->attribute.logical_minimum;
+		ret = sensor_hub_set_feature(sensor_inst->hsdev,
+				sensor_inst->report_state->attribute.report_id,
+				sensor_inst->report_state->attribute.index,
+				sizeof(report_val),
+				&report_val);
+		if (ret) {
+			hid_err(sensor_inst->hsdev->hdev,
+				"Set report state failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t enable_sensor_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev);
+	int value;
+	int ret = -EINVAL;
+
+	if (kstrtoint(buf, 0, &value) != 0)
+		return -EINVAL;
+
+	mutex_lock(&sensor_inst->mutex);
+	if (value && !sensor_inst->enable) {
+		ret = sensor_hub_device_open(sensor_inst->hsdev);
+		if (ret)
+			goto unlock_state;
+
+		ret = set_power_report_state(sensor_inst, true);
+		if (ret) {
+			sensor_hub_device_close(sensor_inst->hsdev);
+			goto unlock_state;
+		}
+		sensor_inst->enable = true;
+	} else if (!value && sensor_inst->enable) {
+		ret = set_power_report_state(sensor_inst, false);
+		sensor_hub_device_close(sensor_inst->hsdev);
+		sensor_inst->enable = false;
+	}
+unlock_state:
+	mutex_unlock(&sensor_inst->mutex);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(enable_sensor);
+
+static struct attribute *enable_sensor_attrs[] = {
+	&dev_attr_enable_sensor.attr,
+	NULL,
+};
+
+static struct attribute_group enable_sensor_attr_group = {
+	.attrs = enable_sensor_attrs,
+};
+
+static ssize_t show_value(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev);
+	struct hid_sensor_hub_attribute_info *attribute;
+	int index, usage, field_index;
+	char name[HID_CUSTOM_NAME_LENGTH];
+	bool feature = false;
+	bool input = false;
+	int value = 0;
+
+	if (sscanf(attr->attr.name, "feature-%d-%x-%s", &index, &usage,
+		   name) == 3) {
+		feature = true;
+		field_index = index + sensor_inst->input_field_count;
+	} else if (sscanf(attr->attr.name, "input-%d-%x-%s", &index, &usage,
+		   name) == 3) {
+		input = true;
+		field_index = index;
+	} else
+		return -EINVAL;
+
+	if (!strncmp(name, "value", strlen("value"))) {
+		u32 report_id;
+		int ret;
+
+		attribute = &sensor_inst->fields[field_index].attribute;
+		report_id = attribute->report_id;
+		if (feature) {
+			u8 values[HID_CUSTOM_MAX_FEATURE_BYTES];
+			int len = 0;
+			u64 value = 0;
+			int i = 0;
+
+			ret = sensor_hub_get_feature(sensor_inst->hsdev,
+						     report_id,
+						     index,
+						     sizeof(values), values);
+			if (ret < 0)
+				return ret;
+
+			while (i < ret) {
+				if (i + attribute->size > ret) {
+					len += snprintf(&buf[len],
+							PAGE_SIZE - len,
+							"%d ", values[i]);
+					break;
+				}
+				switch (attribute->size) {
+				case 2:
+					value = (u64) *(u16 *)&values[i];
+					i += attribute->size;
+					break;
+				case 4:
+					value = (u64) *(u32 *)&values[i];
+					i += attribute->size;
+					break;
+				case 8:
+					value = *(u64 *)&values[i];
+					i += attribute->size;
+					break;
+				default:
+					value = (u64) values[i];
+					++i;
+					break;
+				}
+				len += snprintf(&buf[len], PAGE_SIZE - len,
+						"%lld ", value);
+			}
+			len += snprintf(&buf[len], PAGE_SIZE - len, "\n");
+
+			return len;
+		} else if (input)
+			value = sensor_hub_input_attr_get_raw_value(
+						sensor_inst->hsdev,
+						sensor_inst->hsdev->usage,
+						usage, report_id,
+						SENSOR_HUB_SYNC);
+	} else if (!strncmp(name, "units", strlen("units")))
+		value = sensor_inst->fields[field_index].attribute.units;
+	else if (!strncmp(name, "unit-expo", strlen("unit-expo")))
+		value = sensor_inst->fields[field_index].attribute.unit_expo;
+	else if (!strncmp(name, "size", strlen("size")))
+		value = sensor_inst->fields[field_index].attribute.size;
+	else if (!strncmp(name, "minimum", strlen("minimum")))
+		value = sensor_inst->fields[field_index].attribute.
+							logical_minimum;
+	else if (!strncmp(name, "maximum", strlen("maximum")))
+		value = sensor_inst->fields[field_index].attribute.
+							logical_maximum;
+	else if (!strncmp(name, "name", strlen("name"))) {
+		struct hid_custom_usage_desc *usage_desc;
+
+		usage_desc = bsearch(&usage, hid_custom_usage_desc_table,
+				     ARRAY_SIZE(hid_custom_usage_desc_table),
+				     sizeof(struct hid_custom_usage_desc),
+				     usage_id_cmp);
+		if (usage_desc)
+			return snprintf(buf, PAGE_SIZE, "%s\n",
+					usage_desc->desc);
+		else
+			return sprintf(buf, "not-specified\n");
+	 } else
+		return -EINVAL;
+
+	return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t store_value(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev);
+	int index, field_index, usage;
+	char name[HID_CUSTOM_NAME_LENGTH];
+	int value;
+
+	if (sscanf(attr->attr.name, "feature-%d-%x-%s", &index, &usage,
+		   name) == 3) {
+		field_index = index + sensor_inst->input_field_count;
+	} else
+		return -EINVAL;
+
+	if (!strncmp(name, "value", strlen("value"))) {
+		u32 report_id;
+		int ret;
+
+		if (kstrtoint(buf, 0, &value) != 0)
+			return -EINVAL;
+
+		report_id = sensor_inst->fields[field_index].attribute.
+								report_id;
+		ret = sensor_hub_set_feature(sensor_inst->hsdev, report_id,
+					     index, sizeof(value), &value);
+	} else
+		return -EINVAL;
+
+	return count;
+}
+
+static int hid_sensor_capture_sample(struct hid_sensor_hub_device *hsdev,
+				  unsigned usage_id, size_t raw_len,
+				  char *raw_data, void *priv)
+{
+	struct hid_sensor_custom *sensor_inst = platform_get_drvdata(priv);
+	struct hid_sensor_sample header;
+
+	/* If any error occurs in a sample, rest of the fields are ignored */
+	if (sensor_inst->input_skip_sample) {
+		hid_err(sensor_inst->hsdev->hdev, "Skipped remaining data\n");
+		return 0;
+	}
+
+	hid_dbg(sensor_inst->hsdev->hdev, "%s received %d of %d\n", __func__,
+		(int) (sensor_inst->input_report_recd_size + raw_len),
+		sensor_inst->input_report_size);
+
+	if (!test_bit(0, &sensor_inst->misc_opened))
+		return 0;
+
+	if (!sensor_inst->input_report_recd_size) {
+		int required_size = sizeof(struct hid_sensor_sample) +
+						sensor_inst->input_report_size;
+		header.usage_id = hsdev->usage;
+		header.raw_len = sensor_inst->input_report_size;
+		header.timestamp = ktime_get_real_ns();
+		if (kfifo_avail(&sensor_inst->data_fifo) >= required_size) {
+			kfifo_in(&sensor_inst->data_fifo,
+				 (unsigned char *)&header,
+				 sizeof(header));
+		} else
+			sensor_inst->input_skip_sample = true;
+	}
+	if (kfifo_avail(&sensor_inst->data_fifo) >= raw_len)
+		kfifo_in(&sensor_inst->data_fifo, (unsigned char *)raw_data,
+			 raw_len);
+
+	sensor_inst->input_report_recd_size += raw_len;
+
+	return 0;
+}
+
+static int hid_sensor_send_event(struct hid_sensor_hub_device *hsdev,
+				 unsigned usage_id, void *priv)
+{
+	struct hid_sensor_custom *sensor_inst = platform_get_drvdata(priv);
+
+	if (!test_bit(0, &sensor_inst->misc_opened))
+		return 0;
+
+	sensor_inst->input_report_recd_size = 0;
+	sensor_inst->input_skip_sample = false;
+
+	wake_up(&sensor_inst->wait);
+
+	return 0;
+}
+
+static int hid_sensor_custom_add_field(struct hid_sensor_custom *sensor_inst,
+				       int index, int report_type,
+				       struct hid_report *report,
+				       struct hid_field *field)
+{
+	struct hid_sensor_custom_field *sensor_field;
+	void *fields;
+
+	fields = krealloc(sensor_inst->fields,
+			  (sensor_inst->sensor_field_count + 1) *
+			   sizeof(struct hid_sensor_custom_field), GFP_KERNEL);
+	if (!fields) {
+		kfree(sensor_inst->fields);
+		return -ENOMEM;
+	}
+	sensor_inst->fields = fields;
+	sensor_field = &sensor_inst->fields[sensor_inst->sensor_field_count];
+	sensor_field->attribute.usage_id = sensor_inst->hsdev->usage;
+	if (field->logical)
+		sensor_field->attribute.attrib_id = field->logical;
+	else
+		sensor_field->attribute.attrib_id = field->usage[0].hid;
+
+	sensor_field->attribute.index = index;
+	sensor_field->attribute.report_id = report->id;
+	sensor_field->attribute.units = field->unit;
+	sensor_field->attribute.unit_expo = field->unit_exponent;
+	sensor_field->attribute.size = (field->report_size / 8);
+	sensor_field->attribute.logical_minimum = field->logical_minimum;
+	sensor_field->attribute.logical_maximum = field->logical_maximum;
+
+	if (report_type == HID_FEATURE_REPORT)
+		snprintf(sensor_field->group_name,
+			 sizeof(sensor_field->group_name), "feature-%x-%x",
+			 sensor_field->attribute.index,
+			 sensor_field->attribute.attrib_id);
+	else if (report_type == HID_INPUT_REPORT) {
+		snprintf(sensor_field->group_name,
+			 sizeof(sensor_field->group_name),
+			 "input-%x-%x", sensor_field->attribute.index,
+			 sensor_field->attribute.attrib_id);
+		sensor_inst->input_field_count++;
+		sensor_inst->input_report_size += (field->report_size *
+						   field->report_count) / 8;
+	}
+
+	memset(&sensor_field->hid_custom_attribute_group, 0,
+	       sizeof(struct attribute_group));
+	sensor_inst->sensor_field_count++;
+
+	return 0;
+}
+
+static int hid_sensor_custom_add_fields(struct hid_sensor_custom *sensor_inst,
+					struct hid_report_enum *report_enum,
+					int report_type)
+{
+	int i;
+	int ret;
+	struct hid_report *report;
+	struct hid_field *field;
+	struct hid_sensor_hub_device *hsdev = sensor_inst->hsdev;
+
+	list_for_each_entry(report, &report_enum->report_list, list) {
+		for (i = 0; i < report->maxfield; ++i) {
+			field = report->field[i];
+			if (field->maxusage &&
+			    ((field->usage[0].collection_index >=
+			      hsdev->start_collection_index) &&
+			      (field->usage[0].collection_index <
+			       hsdev->end_collection_index))) {
+
+				ret = hid_sensor_custom_add_field(sensor_inst,
+								  i,
+								  report_type,
+								  report,
+								  field);
+				if (ret)
+					return ret;
+
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int hid_sensor_custom_add_attributes(struct hid_sensor_custom
+								*sensor_inst)
+{
+	struct hid_sensor_hub_device *hsdev = sensor_inst->hsdev;
+	struct hid_device *hdev = hsdev->hdev;
+	int ret = -1;
+	int i, j;
+
+	for (j = 0; j < HID_REPORT_TYPES; ++j) {
+		if (j == HID_OUTPUT_REPORT)
+			continue;
+
+		ret = hid_sensor_custom_add_fields(sensor_inst,
+						   &hdev->report_enum[j], j);
+		if (ret)
+			return ret;
+
+	}
+
+	/* Create sysfs attributes */
+	for (i = 0; i < sensor_inst->sensor_field_count; ++i) {
+		j = 0;
+		while (j < HID_CUSTOM_TOTAL_ATTRS &&
+		       hid_custom_attrs[j].name) {
+			struct device_attribute *device_attr;
+
+			device_attr = &sensor_inst->fields[i].sd_attrs[j];
+
+			snprintf((char *)&sensor_inst->fields[i].attr_name[j],
+				 HID_CUSTOM_NAME_LENGTH, "%s-%s",
+				 sensor_inst->fields[i].group_name,
+				 hid_custom_attrs[j].name);
+			sysfs_attr_init(&device_attr->attr);
+			device_attr->attr.name =
+				(char *)&sensor_inst->fields[i].attr_name[j];
+			device_attr->attr.mode = hid_custom_attrs[j].mode;
+			device_attr->show = show_value;
+			if (hid_custom_attrs[j].mode & S_IWUSR)
+				device_attr->store = store_value;
+			sensor_inst->fields[i].attrs[j] = &device_attr->attr;
+			++j;
+		}
+		sensor_inst->fields[i].attrs[j] = NULL;
+		sensor_inst->fields[i].hid_custom_attribute_group.attrs =
+						sensor_inst->fields[i].attrs;
+		sensor_inst->fields[i].hid_custom_attribute_group.name =
+					sensor_inst->fields[i].group_name;
+		ret = sysfs_create_group(&sensor_inst->pdev->dev.kobj,
+					 &sensor_inst->fields[i].
+					 hid_custom_attribute_group);
+		if (ret)
+			break;
+
+		/* For power or report field store indexes */
+		if (sensor_inst->fields[i].attribute.attrib_id ==
+					HID_USAGE_SENSOR_PROY_POWER_STATE)
+			sensor_inst->power_state = &sensor_inst->fields[i];
+		else if (sensor_inst->fields[i].attribute.attrib_id ==
+					HID_USAGE_SENSOR_PROP_REPORT_STATE)
+			sensor_inst->report_state = &sensor_inst->fields[i];
+	}
+
+	return ret;
+}
+
+static void hid_sensor_custom_remove_attributes(struct hid_sensor_custom *
+								sensor_inst)
+{
+	int i;
+
+	for (i = 0; i < sensor_inst->sensor_field_count; ++i)
+		sysfs_remove_group(&sensor_inst->pdev->dev.kobj,
+				   &sensor_inst->fields[i].
+				   hid_custom_attribute_group);
+
+	kfree(sensor_inst->fields);
+}
+
+static ssize_t hid_sensor_custom_read(struct file *file, char __user *buf,
+				      size_t count, loff_t *f_ps)
+{
+	struct hid_sensor_custom *sensor_inst;
+	unsigned int copied;
+	int ret;
+
+	sensor_inst = container_of(file->private_data,
+				   struct hid_sensor_custom, custom_dev);
+
+	if (count < sizeof(struct hid_sensor_sample))
+		return -EINVAL;
+
+	do {
+		if (kfifo_is_empty(&sensor_inst->data_fifo)) {
+			if (file->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			ret = wait_event_interruptible(sensor_inst->wait,
+				!kfifo_is_empty(&sensor_inst->data_fifo));
+			if (ret)
+				return ret;
+		}
+		ret = kfifo_to_user(&sensor_inst->data_fifo, buf, count,
+				    &copied);
+		if (ret)
+			return ret;
+
+	} while (copied == 0);
+
+	return copied;
+}
+
+static int hid_sensor_custom_release(struct inode *inode, struct file *file)
+{
+	struct hid_sensor_custom *sensor_inst;
+
+	sensor_inst = container_of(file->private_data,
+				   struct hid_sensor_custom, custom_dev);
+
+	clear_bit(0, &sensor_inst->misc_opened);
+
+	return 0;
+}
+
+static int hid_sensor_custom_open(struct inode *inode, struct file *file)
+{
+	struct hid_sensor_custom *sensor_inst;
+
+	sensor_inst = container_of(file->private_data,
+				   struct hid_sensor_custom, custom_dev);
+	/* We essentially have single reader and writer */
+	if (test_and_set_bit(0, &sensor_inst->misc_opened))
+		return -EBUSY;
+
+	return nonseekable_open(inode, file);
+}
+
+static unsigned int hid_sensor_custom_poll(struct file *file,
+					   struct poll_table_struct *wait)
+{
+	struct hid_sensor_custom *sensor_inst;
+	unsigned int mask = 0;
+
+	sensor_inst = container_of(file->private_data,
+				   struct hid_sensor_custom, custom_dev);
+
+	poll_wait(file, &sensor_inst->wait, wait);
+
+	if (!kfifo_is_empty(&sensor_inst->data_fifo))
+		mask = POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+static const struct file_operations hid_sensor_custom_fops = {
+	.open =  hid_sensor_custom_open,
+	.read =  hid_sensor_custom_read,
+	.release = hid_sensor_custom_release,
+	.poll = hid_sensor_custom_poll,
+	.llseek = noop_llseek,
+};
+
+static int hid_sensor_custom_dev_if_add(struct hid_sensor_custom *sensor_inst)
+{
+	int ret;
+
+	ret = kfifo_alloc(&sensor_inst->data_fifo, HID_CUSTOM_FIFO_SIZE,
+			  GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&sensor_inst->wait);
+
+	sensor_inst->custom_dev.minor = MISC_DYNAMIC_MINOR;
+	sensor_inst->custom_dev.name = dev_name(&sensor_inst->pdev->dev);
+	sensor_inst->custom_dev.fops = &hid_sensor_custom_fops,
+	ret = misc_register(&sensor_inst->custom_dev);
+	if (ret) {
+		kfifo_free(&sensor_inst->data_fifo);
+		return ret;
+	}
+	return 0;
+}
+
+static void hid_sensor_custom_dev_if_remove(struct hid_sensor_custom
+								*sensor_inst)
+{
+	wake_up(&sensor_inst->wait);
+	misc_deregister(&sensor_inst->custom_dev);
+	kfifo_free(&sensor_inst->data_fifo);
+
+}
+
+static int hid_sensor_custom_probe(struct platform_device *pdev)
+{
+	struct hid_sensor_custom *sensor_inst;
+	struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
+	int ret;
+
+	sensor_inst = devm_kzalloc(&pdev->dev, sizeof(*sensor_inst),
+				   GFP_KERNEL);
+	if (!sensor_inst)
+		return -ENOMEM;
+
+	sensor_inst->callbacks.capture_sample = hid_sensor_capture_sample;
+	sensor_inst->callbacks.send_event = hid_sensor_send_event;
+	sensor_inst->callbacks.pdev = pdev;
+	sensor_inst->hsdev = hsdev;
+	sensor_inst->pdev = pdev;
+	mutex_init(&sensor_inst->mutex);
+	platform_set_drvdata(pdev, sensor_inst);
+	ret = sensor_hub_register_callback(hsdev, hsdev->usage,
+					   &sensor_inst->callbacks);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "callback reg failed\n");
+		return ret;
+	}
+
+	ret = sysfs_create_group(&sensor_inst->pdev->dev.kobj,
+				 &enable_sensor_attr_group);
+	if (ret)
+		goto err_remove_callback;
+
+	ret = hid_sensor_custom_add_attributes(sensor_inst);
+	if (ret)
+		goto err_remove_group;
+
+	ret = hid_sensor_custom_dev_if_add(sensor_inst);
+	if (ret)
+		goto err_remove_attributes;
+
+	return 0;
+
+err_remove_attributes:
+	hid_sensor_custom_remove_attributes(sensor_inst);
+err_remove_group:
+	sysfs_remove_group(&sensor_inst->pdev->dev.kobj,
+			   &enable_sensor_attr_group);
+err_remove_callback:
+	sensor_hub_remove_callback(hsdev, hsdev->usage);
+
+	return ret;
+}
+
+static int hid_sensor_custom_remove(struct platform_device *pdev)
+{
+	struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev);
+	struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
+
+	hid_sensor_custom_dev_if_remove(sensor_inst);
+	hid_sensor_custom_remove_attributes(sensor_inst);
+	sysfs_remove_group(&sensor_inst->pdev->dev.kobj,
+			   &enable_sensor_attr_group);
+	sensor_hub_remove_callback(hsdev, hsdev->usage);
+
+	return 0;
+}
+
+static struct platform_device_id hid_sensor_custom_ids[] = {
+	{
+		.name = "HID-SENSOR-2000e1",
+	},
+	{
+		.name = "HID-SENSOR-2000e2",
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, hid_sensor_custom_ids);
+
+static struct platform_driver hid_sensor_custom_platform_driver = {
+	.id_table = hid_sensor_custom_ids,
+	.driver = {
+		.name	= KBUILD_MODNAME,
+	},
+	.probe		= hid_sensor_custom_probe,
+	.remove		= hid_sensor_custom_remove,
+};
+module_platform_driver(hid_sensor_custom_platform_driver);
+
+MODULE_DESCRIPTION("HID Sensor Custom and Generic sensor Driver");
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index e54ce10..c3f6f1e3 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -29,29 +29,10 @@
 #define HID_SENSOR_HUB_ENUM_QUIRK	0x01
 
 /**
- * struct sensor_hub_pending - Synchronous read pending information
- * @status:		Pending status true/false.
- * @ready:		Completion synchronization data.
- * @usage_id:		Usage id for physical device, E.g. Gyro usage id.
- * @attr_usage_id:	Usage Id of a field, E.g. X-AXIS for a gyro.
- * @raw_size:		Response size for a read request.
- * @raw_data:		Place holder for received response.
- */
-struct sensor_hub_pending {
-	bool status;
-	struct completion ready;
-	u32 usage_id;
-	u32 attr_usage_id;
-	int raw_size;
-	u8  *raw_data;
-};
-
-/**
  * struct sensor_hub_data - Hold a instance data for a HID hub device
  * @hsdev:		Stored hid instance for current hub device.
  * @mutex:		Mutex to serialize synchronous request.
  * @lock:		Spin lock to protect pending request structure.
- * @pending:		Holds information of pending sync read request.
  * @dyn_callback_list:	Holds callback function
  * @dyn_callback_lock:	spin lock to protect callback list
  * @hid_sensor_hub_client_devs:	Stores all MFD cells for a hub instance.
@@ -61,7 +42,6 @@
 struct sensor_hub_data {
 	struct mutex mutex;
 	spinlock_t lock;
-	struct sensor_hub_pending pending;
 	struct list_head dyn_callback_list;
 	spinlock_t dyn_callback_lock;
 	struct mfd_cell *hid_sensor_hub_client_devs;
@@ -106,7 +86,8 @@
 
 	for (i = 0; i < hdev->maxcollection; ++i) {
 		struct hid_collection *collection = &hdev->collection[i];
-		if (collection->type == HID_COLLECTION_PHYSICAL)
+		if (collection->type == HID_COLLECTION_PHYSICAL ||
+		    collection->type == HID_COLLECTION_APPLICATION)
 			++count;
 	}
 
@@ -139,7 +120,8 @@
 
 	spin_lock_irqsave(&pdata->dyn_callback_lock, flags);
 	list_for_each_entry(callback, &pdata->dyn_callback_list, list)
-		if (callback->usage_id == usage_id &&
+		if ((callback->usage_id == usage_id ||
+		     callback->usage_id == HID_USAGE_SENSOR_COLLECTION) &&
 			(collection_index >=
 				callback->hsdev->start_collection_index) &&
 			(collection_index <
@@ -179,7 +161,18 @@
 	callback->usage_callback = usage_callback;
 	callback->usage_id = usage_id;
 	callback->priv = NULL;
-	list_add_tail(&callback->list, &pdata->dyn_callback_list);
+	/*
+	 * If there is a handler registered for the collection type, then
+	 * it will handle all reports for sensors in this collection. If
+	 * there is also an individual sensor handler registration, then
+	 * we want to make sure that the reports are directed to collection
+	 * handler, as this may be a fusion sensor. So add collection handlers
+	 * to the beginning of the list, so that they are matched first.
+	 */
+	if (usage_id == HID_USAGE_SENSOR_COLLECTION)
+		list_add(&callback->list, &pdata->dyn_callback_list);
+	else
+		list_add_tail(&callback->list, &pdata->dyn_callback_list);
 	spin_unlock_irqrestore(&pdata->dyn_callback_lock, flags);
 
 	return 0;
@@ -208,10 +201,14 @@
 EXPORT_SYMBOL_GPL(sensor_hub_remove_callback);
 
 int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
-				u32 field_index, s32 value)
+			   u32 field_index, int buffer_size, void *buffer)
 {
 	struct hid_report *report;
 	struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev);
+	__s32 *buf32 = buffer;
+	int i = 0;
+	int remaining_bytes;
+	__s32 value;
 	int ret = 0;
 
 	mutex_lock(&data->mutex);
@@ -220,7 +217,21 @@
 		ret = -EINVAL;
 		goto done_proc;
 	}
-	hid_set_field(report->field[field_index], 0, value);
+
+	remaining_bytes = do_div(buffer_size, sizeof(__s32));
+	if (buffer_size) {
+		for (i = 0; i < buffer_size; ++i) {
+			hid_set_field(report->field[field_index], i,
+				      (__force __s32)cpu_to_le32(*buf32));
+			++buf32;
+		}
+	}
+	if (remaining_bytes) {
+		value = 0;
+		memcpy(&value, (u8 *)buf32, remaining_bytes);
+		hid_set_field(report->field[field_index], i,
+			      (__force __s32)cpu_to_le32(value));
+	}
 	hid_hw_request(hsdev->hdev, report, HID_REQ_SET_REPORT);
 	hid_hw_wait(hsdev->hdev);
 
@@ -232,10 +243,11 @@
 EXPORT_SYMBOL_GPL(sensor_hub_set_feature);
 
 int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
-				u32 field_index, s32 *value)
+			   u32 field_index, int buffer_size, void *buffer)
 {
 	struct hid_report *report;
 	struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev);
+	int report_size;
 	int ret = 0;
 
 	mutex_lock(&data->mutex);
@@ -247,7 +259,17 @@
 	}
 	hid_hw_request(hsdev->hdev, report, HID_REQ_GET_REPORT);
 	hid_hw_wait(hsdev->hdev);
-	*value = report->field[field_index]->value[0];
+
+	/* calculate number of bytes required to read this field */
+	report_size = DIV_ROUND_UP(report->field[field_index]->report_size,
+				   8) *
+				   report->field[field_index]->report_count;
+	if (!report_size) {
+		ret = -EINVAL;
+		goto done_proc;
+	}
+	ret = min(report_size, buffer_size);
+	memcpy(buffer, report->field[field_index]->value, ret);
 
 done_proc:
 	mutex_unlock(&data->mutex);
@@ -259,47 +281,54 @@
 
 int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
 					u32 usage_id,
-					u32 attr_usage_id, u32 report_id)
+					u32 attr_usage_id, u32 report_id,
+					enum sensor_hub_read_flags flag)
 {
 	struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev);
 	unsigned long flags;
 	struct hid_report *report;
 	int ret_val = 0;
 
-	mutex_lock(&data->mutex);
-	memset(&data->pending, 0, sizeof(data->pending));
-	init_completion(&data->pending.ready);
-	data->pending.usage_id = usage_id;
-	data->pending.attr_usage_id = attr_usage_id;
-	data->pending.raw_size = 0;
-
-	spin_lock_irqsave(&data->lock, flags);
-	data->pending.status = true;
-	spin_unlock_irqrestore(&data->lock, flags);
-	report = sensor_hub_report(report_id, hsdev->hdev, HID_INPUT_REPORT);
+	report = sensor_hub_report(report_id, hsdev->hdev,
+				   HID_INPUT_REPORT);
 	if (!report)
-		goto err_free;
+		return -EINVAL;
 
-	hid_hw_request(hsdev->hdev, report, HID_REQ_GET_REPORT);
-	wait_for_completion_interruptible_timeout(&data->pending.ready, HZ*5);
-	switch (data->pending.raw_size) {
-	case 1:
-		ret_val = *(u8 *)data->pending.raw_data;
-		break;
-	case 2:
-		ret_val = *(u16 *)data->pending.raw_data;
-		break;
-	case 4:
-		ret_val = *(u32 *)data->pending.raw_data;
-		break;
-	default:
-		ret_val = 0;
+	mutex_lock(&hsdev->mutex);
+	if (flag == SENSOR_HUB_SYNC) {
+		memset(&hsdev->pending, 0, sizeof(hsdev->pending));
+		init_completion(&hsdev->pending.ready);
+		hsdev->pending.usage_id = usage_id;
+		hsdev->pending.attr_usage_id = attr_usage_id;
+		hsdev->pending.raw_size = 0;
+
+		spin_lock_irqsave(&data->lock, flags);
+		hsdev->pending.status = true;
+		spin_unlock_irqrestore(&data->lock, flags);
 	}
-	kfree(data->pending.raw_data);
-
-err_free:
-	data->pending.status = false;
+	mutex_lock(&data->mutex);
+	hid_hw_request(hsdev->hdev, report, HID_REQ_GET_REPORT);
 	mutex_unlock(&data->mutex);
+	if (flag == SENSOR_HUB_SYNC) {
+		wait_for_completion_interruptible_timeout(
+						&hsdev->pending.ready, HZ*5);
+		switch (hsdev->pending.raw_size) {
+		case 1:
+			ret_val = *(u8 *)hsdev->pending.raw_data;
+			break;
+		case 2:
+			ret_val = *(u16 *)hsdev->pending.raw_data;
+			break;
+		case 4:
+			ret_val = *(u32 *)hsdev->pending.raw_data;
+			break;
+		default:
+			ret_val = 0;
+		}
+		kfree(hsdev->pending.raw_data);
+		hsdev->pending.status = false;
+	}
+	mutex_unlock(&hsdev->mutex);
 
 	return ret_val;
 }
@@ -455,16 +484,6 @@
 					report->field[i]->report_count)/8);
 		sz = (report->field[i]->report_size *
 					report->field[i]->report_count)/8;
-		if (pdata->pending.status && pdata->pending.attr_usage_id ==
-				report->field[i]->usage->hid) {
-			hid_dbg(hdev, "data was pending ...\n");
-			pdata->pending.raw_data = kmemdup(ptr, sz, GFP_ATOMIC);
-			if (pdata->pending.raw_data)
-				pdata->pending.raw_size = sz;
-			else
-				pdata->pending.raw_size = 0;
-			complete(&pdata->pending.ready);
-		}
 		collection = &hdev->collection[
 				report->field[i]->usage->collection_index];
 		hid_dbg(hdev, "collection->usage %x\n",
@@ -474,8 +493,23 @@
 				report->field[i]->physical,
 				report->field[i]->usage[0].collection_index,
 				&hsdev, &priv);
-
-		if (callback && callback->capture_sample) {
+		if (!callback) {
+			ptr += sz;
+			continue;
+		}
+		if (hsdev->pending.status && (hsdev->pending.attr_usage_id ==
+					      report->field[i]->usage->hid ||
+					      hsdev->pending.attr_usage_id ==
+					      report->field[i]->logical)) {
+			hid_dbg(hdev, "data was pending ...\n");
+			hsdev->pending.raw_data = kmemdup(ptr, sz, GFP_ATOMIC);
+			if (hsdev->pending.raw_data)
+				hsdev->pending.raw_size = sz;
+			else
+				hsdev->pending.raw_size = 0;
+			complete(&hsdev->pending.ready);
+		}
+		if (callback->capture_sample) {
 			if (report->field[i]->logical)
 				callback->capture_sample(hsdev,
 					report->field[i]->logical, sz, ptr,
@@ -572,6 +606,7 @@
 	int dev_cnt;
 	struct hid_sensor_hub_device *hsdev;
 	struct hid_sensor_hub_device *last_hsdev = NULL;
+	struct hid_sensor_hub_device *collection_hsdev = NULL;
 
 	sd = devm_kzalloc(&hdev->dev, sizeof(*sd), GFP_KERNEL);
 	if (!sd) {
@@ -618,7 +653,8 @@
 	for (i = 0; i < hdev->maxcollection; ++i) {
 		struct hid_collection *collection = &hdev->collection[i];
 
-		if (collection->type == HID_COLLECTION_PHYSICAL) {
+		if (collection->type == HID_COLLECTION_PHYSICAL ||
+		    collection->type == HID_COLLECTION_APPLICATION) {
 
 			hsdev = devm_kzalloc(&hdev->dev, sizeof(*hsdev),
 					     GFP_KERNEL);
@@ -630,6 +666,8 @@
 			hsdev->hdev = hdev;
 			hsdev->vendor_id = hdev->vendor;
 			hsdev->product_id = hdev->product;
+			hsdev->usage = collection->usage;
+			mutex_init(&hsdev->mutex);
 			hsdev->start_collection_index = i;
 			if (last_hsdev)
 				last_hsdev->end_collection_index = i;
@@ -653,10 +691,17 @@
 			hid_dbg(hdev, "Adding %s:%d\n", name,
 					hsdev->start_collection_index);
 			sd->hid_sensor_client_cnt++;
+			if (collection_hsdev)
+				collection_hsdev->end_collection_index = i;
+			if (collection->type == HID_COLLECTION_APPLICATION &&
+			    collection->usage == HID_USAGE_SENSOR_COLLECTION)
+				collection_hsdev = hsdev;
 		}
 	}
 	if (last_hsdev)
 		last_hsdev->end_collection_index = i;
+	if (collection_hsdev)
+		collection_hsdev->end_collection_index = i;
 
 	ret = mfd_add_hotplug_devices(&hdev->dev,
 			sd->hid_sensor_hub_client_devs,
@@ -676,13 +721,18 @@
 {
 	struct sensor_hub_data *data = hid_get_drvdata(hdev);
 	unsigned long flags;
+	int i;
 
 	hid_dbg(hdev, " hardware removed\n");
 	hid_hw_close(hdev);
 	hid_hw_stop(hdev);
 	spin_lock_irqsave(&data->lock, flags);
-	if (data->pending.status)
-		complete(&data->pending.ready);
+	for (i = 0; i < data->hid_sensor_client_cnt; ++i) {
+		struct hid_sensor_hub_device *hsdev =
+			data->hid_sensor_hub_client_devs[i].platform_data;
+		if (hsdev->pending.status)
+			complete(&hsdev->pending.ready);
+	}
 	spin_unlock_irqrestore(&data->lock, flags);
 	mfd_remove_devices(&hdev->dev);
 	hid_set_drvdata(hdev, NULL);
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index c906300..6ca96ce 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -802,7 +802,8 @@
 #define DS4_REPORT_0x05_SIZE 32
 #define DS4_REPORT_0x11_SIZE 78
 #define DS4_REPORT_0x81_SIZE 7
-#define SIXAXIS_REPORT_0xF2_SIZE 18
+#define SIXAXIS_REPORT_0xF2_SIZE 17
+#define SIXAXIS_REPORT_0xF5_SIZE 8
 
 static DEFINE_SPINLOCK(sony_dev_list_lock);
 static LIST_HEAD(sony_device_list);
@@ -1131,18 +1132,38 @@
  */
 static int sixaxis_set_operational_usb(struct hid_device *hdev)
 {
+	const int buf_size =
+		max(SIXAXIS_REPORT_0xF2_SIZE, SIXAXIS_REPORT_0xF5_SIZE);
+	__u8 *buf;
 	int ret;
-	char *buf = kmalloc(18, GFP_KERNEL);
 
+	buf = kmalloc(buf_size, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
-	ret = hid_hw_raw_request(hdev, 0xf2, buf, 17, HID_FEATURE_REPORT,
-				 HID_REQ_GET_REPORT);
+	ret = hid_hw_raw_request(hdev, 0xf2, buf, SIXAXIS_REPORT_0xF2_SIZE,
+				 HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
+	if (ret < 0) {
+		hid_err(hdev, "can't set operational mode: step 1\n");
+		goto out;
+	}
 
+	/*
+	 * Some compatible controllers like the Speedlink Strike FX and
+	 * Gasia need another query plus an USB interrupt to get operational.
+	 */
+	ret = hid_hw_raw_request(hdev, 0xf5, buf, SIXAXIS_REPORT_0xF5_SIZE,
+				 HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
+	if (ret < 0) {
+		hid_err(hdev, "can't set operational mode: step 2\n");
+		goto out;
+	}
+
+	ret = hid_hw_output_report(hdev, buf, 1);
 	if (ret < 0)
-		hid_err(hdev, "can't set operational mode\n");
+		hid_err(hdev, "can't set operational mode: step 3\n");
 
+out:
 	kfree(buf);
 
 	return ret;
diff --git a/drivers/hid/hid-steelseries.c b/drivers/hid/hid-steelseries.c
index 29f328f..3edd4ac 100644
--- a/drivers/hid/hid-steelseries.c
+++ b/drivers/hid/hid-steelseries.c
@@ -12,7 +12,6 @@
  */
 
 #include <linux/device.h>
-#include <linux/usb.h>
 #include <linux/hid.h>
 #include <linux/module.h>
 
diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c
index fb8b516..9416731 100644
--- a/drivers/hid/hid-uclogic.c
+++ b/drivers/hid/hid-uclogic.c
@@ -1,7 +1,8 @@
 /*
  *  HID driver for UC-Logic devices not fully compliant with HID standard
  *
- *  Copyright (c) 2010 Nikolai Kondrashov
+ *  Copyright (c) 2010-2014 Nikolai Kondrashov
+ *  Copyright (c) 2013 Martin Rusko
  */
 
 /*
@@ -15,6 +16,8 @@
 #include <linux/hid.h>
 #include <linux/module.h>
 #include <linux/usb.h>
+#include <asm/unaligned.h>
+#include "usbhid/usbhid.h"
 
 #include "hid-ids.h"
 
@@ -546,11 +549,93 @@
 	0xC0        /*  End Collection              */
 };
 
+/* Report descriptor template placeholder head */
+#define UCLOGIC_PH_HEAD	0xFE, 0xED, 0x1D
+
+/* Report descriptor template placeholder IDs */
+enum uclogic_ph_id {
+	UCLOGIC_PH_ID_X_LM,
+	UCLOGIC_PH_ID_X_PM,
+	UCLOGIC_PH_ID_Y_LM,
+	UCLOGIC_PH_ID_Y_PM,
+	UCLOGIC_PH_ID_PRESSURE_LM,
+	UCLOGIC_PH_ID_NUM
+};
+
+/* Report descriptor template placeholder */
+#define UCLOGIC_PH(_ID) UCLOGIC_PH_HEAD, UCLOGIC_PH_ID_##_ID
+#define UCLOGIC_PEN_REPORT_ID	0x07
+
+/* Fixed report descriptor template */
+static const __u8 uclogic_tablet_rdesc_template[] = {
+	0x05, 0x0D,             /*  Usage Page (Digitizer),                 */
+	0x09, 0x02,             /*  Usage (Pen),                            */
+	0xA1, 0x01,             /*  Collection (Application),               */
+	0x85, 0x07,             /*      Report ID (7),                      */
+	0x09, 0x20,             /*      Usage (Stylus),                     */
+	0xA0,                   /*      Collection (Physical),              */
+	0x14,                   /*          Logical Minimum (0),            */
+	0x25, 0x01,             /*          Logical Maximum (1),            */
+	0x75, 0x01,             /*          Report Size (1),                */
+	0x09, 0x42,             /*          Usage (Tip Switch),             */
+	0x09, 0x44,             /*          Usage (Barrel Switch),          */
+	0x09, 0x46,             /*          Usage (Tablet Pick),            */
+	0x95, 0x03,             /*          Report Count (3),               */
+	0x81, 0x02,             /*          Input (Variable),               */
+	0x95, 0x03,             /*          Report Count (3),               */
+	0x81, 0x03,             /*          Input (Constant, Variable),     */
+	0x09, 0x32,             /*          Usage (In Range),               */
+	0x95, 0x01,             /*          Report Count (1),               */
+	0x81, 0x02,             /*          Input (Variable),               */
+	0x95, 0x01,             /*          Report Count (1),               */
+	0x81, 0x03,             /*          Input (Constant, Variable),     */
+	0x75, 0x10,             /*          Report Size (16),               */
+	0x95, 0x01,             /*          Report Count (1),               */
+	0xA4,                   /*          Push,                           */
+	0x05, 0x01,             /*          Usage Page (Desktop),           */
+	0x65, 0x13,             /*          Unit (Inch),                    */
+	0x55, 0xFD,             /*          Unit Exponent (-3),             */
+	0x34,                   /*          Physical Minimum (0),           */
+	0x09, 0x30,             /*          Usage (X),                      */
+	0x27, UCLOGIC_PH(X_LM), /*          Logical Maximum (PLACEHOLDER),  */
+	0x47, UCLOGIC_PH(X_PM), /*          Physical Maximum (PLACEHOLDER), */
+	0x81, 0x02,             /*          Input (Variable),               */
+	0x09, 0x31,             /*          Usage (Y),                      */
+	0x27, UCLOGIC_PH(Y_LM), /*          Logical Maximum (PLACEHOLDER),  */
+	0x47, UCLOGIC_PH(Y_PM), /*          Physical Maximum (PLACEHOLDER), */
+	0x81, 0x02,             /*          Input (Variable),               */
+	0xB4,                   /*          Pop,                            */
+	0x09, 0x30,             /*          Usage (Tip Pressure),           */
+	0x27,
+	UCLOGIC_PH(PRESSURE_LM),/*          Logical Maximum (PLACEHOLDER),  */
+	0x81, 0x02,             /*          Input (Variable),               */
+	0xC0,                   /*      End Collection,                     */
+	0xC0                    /*  End Collection                          */
+};
+
+/* Parameter indices */
+enum uclogic_prm {
+	UCLOGIC_PRM_X_LM	= 1,
+	UCLOGIC_PRM_Y_LM	= 2,
+	UCLOGIC_PRM_PRESSURE_LM	= 4,
+	UCLOGIC_PRM_RESOLUTION	= 5,
+	UCLOGIC_PRM_NUM
+};
+
+/* Driver data */
+struct uclogic_drvdata {
+	__u8 *rdesc;
+	unsigned int rsize;
+	bool invert_pen_inrange;
+	bool ignore_pen_usage;
+};
+
 static __u8 *uclogic_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 					unsigned int *rsize)
 {
 	struct usb_interface *iface = to_usb_interface(hdev->dev.parent);
 	__u8 iface_num = iface->cur_altsetting->desc.bInterfaceNumber;
+	struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);
 
 	switch (hdev->product) {
 	case USB_DEVICE_ID_UCLOGIC_TABLET_PF1209:
@@ -621,11 +706,241 @@
 			break;
 		}
 		break;
+	default:
+		if (drvdata->rdesc != NULL) {
+			rdesc = drvdata->rdesc;
+			*rsize = drvdata->rsize;
+		}
 	}
 
 	return rdesc;
 }
 
+static int uclogic_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+		struct hid_field *field, struct hid_usage *usage,
+		unsigned long **bit, int *max)
+{
+	struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);
+
+	/* discard the unused pen interface */
+	if ((drvdata->ignore_pen_usage) &&
+	    (field->application == HID_DG_PEN))
+		return -1;
+
+	/* let hid-core decide what to do */
+	return 0;
+}
+
+static void uclogic_input_configured(struct hid_device *hdev,
+		struct hid_input *hi)
+{
+	char *name;
+	const char *suffix = NULL;
+	struct hid_field *field;
+	size_t len;
+
+	/* no report associated (HID_QUIRK_MULTI_INPUT not set) */
+	if (!hi->report)
+		return;
+
+	field = hi->report->field[0];
+
+	switch (field->application) {
+	case HID_GD_KEYBOARD:
+		suffix = "Keyboard";
+		break;
+	case HID_GD_MOUSE:
+		suffix = "Mouse";
+		break;
+	case HID_GD_KEYPAD:
+		suffix = "Pad";
+		break;
+	case HID_DG_PEN:
+		suffix = "Pen";
+		break;
+	case HID_CP_CONSUMER_CONTROL:
+		suffix = "Consumer Control";
+		break;
+	case HID_GD_SYSTEM_CONTROL:
+		suffix = "System Control";
+		break;
+	}
+
+	if (suffix) {
+		len = strlen(hdev->name) + 2 + strlen(suffix);
+		name = devm_kzalloc(&hi->input->dev, len, GFP_KERNEL);
+		if (name) {
+			snprintf(name, len, "%s %s", hdev->name, suffix);
+			hi->input->name = name;
+		}
+	}
+}
+
+/**
+ * Enable fully-functional tablet mode and determine device parameters.
+ *
+ * @hdev:	HID device
+ */
+static int uclogic_tablet_enable(struct hid_device *hdev)
+{
+	int rc;
+	struct usb_device *usb_dev = hid_to_usb_dev(hdev);
+	struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);
+	__le16 *buf = NULL;
+	size_t len;
+	s32 params[UCLOGIC_PH_ID_NUM];
+	s32 resolution;
+	__u8 *p;
+	s32 v;
+
+	/*
+	 * Read string descriptor containing tablet parameters. The specific
+	 * string descriptor and data were discovered by sniffing the Windows
+	 * driver traffic.
+	 * NOTE: This enables fully-functional tablet mode.
+	 */
+	len = UCLOGIC_PRM_NUM * sizeof(*buf);
+	buf = kmalloc(len, GFP_KERNEL);
+	if (buf == NULL) {
+		hid_err(hdev, "failed to allocate parameter buffer\n");
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+	rc = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
+				USB_REQ_GET_DESCRIPTOR, USB_DIR_IN,
+				(USB_DT_STRING << 8) + 0x64,
+				0x0409, buf, len,
+				USB_CTRL_GET_TIMEOUT);
+	if (rc == -EPIPE) {
+		hid_err(hdev, "device parameters not found\n");
+		rc = -ENODEV;
+		goto cleanup;
+	} else if (rc < 0) {
+		hid_err(hdev, "failed to get device parameters: %d\n", rc);
+		rc = -ENODEV;
+		goto cleanup;
+	} else if (rc != len) {
+		hid_err(hdev, "invalid device parameters\n");
+		rc = -ENODEV;
+		goto cleanup;
+	}
+
+	/* Extract device parameters */
+	params[UCLOGIC_PH_ID_X_LM] = le16_to_cpu(buf[UCLOGIC_PRM_X_LM]);
+	params[UCLOGIC_PH_ID_Y_LM] = le16_to_cpu(buf[UCLOGIC_PRM_Y_LM]);
+	params[UCLOGIC_PH_ID_PRESSURE_LM] =
+		le16_to_cpu(buf[UCLOGIC_PRM_PRESSURE_LM]);
+	resolution = le16_to_cpu(buf[UCLOGIC_PRM_RESOLUTION]);
+	if (resolution == 0) {
+		params[UCLOGIC_PH_ID_X_PM] = 0;
+		params[UCLOGIC_PH_ID_Y_PM] = 0;
+	} else {
+		params[UCLOGIC_PH_ID_X_PM] = params[UCLOGIC_PH_ID_X_LM] *
+						1000 / resolution;
+		params[UCLOGIC_PH_ID_Y_PM] = params[UCLOGIC_PH_ID_Y_LM] *
+						1000 / resolution;
+	}
+
+	/* Allocate fixed report descriptor */
+	drvdata->rdesc = devm_kzalloc(&hdev->dev,
+				sizeof(uclogic_tablet_rdesc_template),
+				GFP_KERNEL);
+	if (drvdata->rdesc == NULL) {
+		hid_err(hdev, "failed to allocate fixed rdesc\n");
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+	drvdata->rsize = sizeof(uclogic_tablet_rdesc_template);
+
+	/* Format fixed report descriptor */
+	memcpy(drvdata->rdesc, uclogic_tablet_rdesc_template,
+		drvdata->rsize);
+	for (p = drvdata->rdesc;
+	     p <= drvdata->rdesc + drvdata->rsize - 4;) {
+		if (p[0] == 0xFE && p[1] == 0xED && p[2] == 0x1D &&
+		    p[3] < sizeof(params)) {
+			v = params[p[3]];
+			put_unaligned(cpu_to_le32(v), (s32 *)p);
+			p += 4;
+		} else {
+			p++;
+		}
+	}
+
+	rc = 0;
+
+cleanup:
+	kfree(buf);
+	return rc;
+}
+
+static int uclogic_probe(struct hid_device *hdev,
+		const struct hid_device_id *id)
+{
+	int rc;
+	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+	struct uclogic_drvdata *drvdata;
+
+	/*
+	 * libinput requires the pad interface to be on a different node
+	 * than the pen, so use QUIRK_MULTI_INPUT for all tablets.
+	 */
+	hdev->quirks |= HID_QUIRK_MULTI_INPUT;
+	hdev->quirks |= HID_QUIRK_NO_EMPTY_INPUT;
+
+	/* Allocate and assign driver data */
+	drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL);
+	if (drvdata == NULL)
+		return -ENOMEM;
+
+	hid_set_drvdata(hdev, drvdata);
+
+	switch (id->product) {
+	case USB_DEVICE_ID_HUION_TABLET:
+		/* If this is the pen interface */
+		if (intf->cur_altsetting->desc.bInterfaceNumber == 0) {
+			rc = uclogic_tablet_enable(hdev);
+			if (rc) {
+				hid_err(hdev, "tablet enabling failed\n");
+				return rc;
+			}
+			drvdata->invert_pen_inrange = true;
+		} else {
+			drvdata->ignore_pen_usage = true;
+		}
+		break;
+	}
+
+	rc = hid_parse(hdev);
+	if (rc) {
+		hid_err(hdev, "parse failed\n");
+		return rc;
+	}
+
+	rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+	if (rc) {
+		hid_err(hdev, "hw start failed\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static int uclogic_raw_event(struct hid_device *hdev, struct hid_report *report,
+			u8 *data, int size)
+{
+	struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);
+
+	if ((drvdata->invert_pen_inrange) &&
+	    (report->type == HID_INPUT_REPORT) &&
+	    (report->id == UCLOGIC_PEN_REPORT_ID) &&
+	    (size >= 2))
+		/* Invert the in-range bit */
+		data[1] ^= 0x40;
+
+	return 0;
+}
+
 static const struct hid_device_id uclogic_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
 				USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) },
@@ -641,6 +956,8 @@
 				USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
 				USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, uclogic_devices);
@@ -648,8 +965,14 @@
 static struct hid_driver uclogic_driver = {
 	.name = "uclogic",
 	.id_table = uclogic_devices,
+	.probe = uclogic_probe,
 	.report_fixup = uclogic_report_fixup,
+	.raw_event = uclogic_raw_event,
+	.input_mapping = uclogic_input_mapping,
+	.input_configured = uclogic_input_configured,
 };
 module_hid_driver(uclogic_driver);
 
+MODULE_AUTHOR("Martin Rusko");
+MODULE_AUTHOR("Nikolai Kondrashov");
 MODULE_LICENSE("GPL");
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 36053f3..ab4dd95 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -37,6 +37,7 @@
 #include <linux/mutex.h>
 #include <linux/acpi.h>
 #include <linux/of.h>
+#include <linux/gpio/consumer.h>
 
 #include <linux/i2c/i2c-hid.h>
 
@@ -144,6 +145,8 @@
 	unsigned long		flags;		/* device flags */
 
 	wait_queue_head_t	wait;		/* For waiting the interrupt */
+	struct gpio_desc	*desc;
+	int			irq;
 
 	struct i2c_hid_platform_data pdata;
 };
@@ -785,16 +788,16 @@
 	struct i2c_hid *ihid = i2c_get_clientdata(client);
 	int ret;
 
-	dev_dbg(&client->dev, "Requesting IRQ: %d\n", client->irq);
+	dev_dbg(&client->dev, "Requesting IRQ: %d\n", ihid->irq);
 
-	ret = request_threaded_irq(client->irq, NULL, i2c_hid_irq,
+	ret = request_threaded_irq(ihid->irq, NULL, i2c_hid_irq,
 			IRQF_TRIGGER_LOW | IRQF_ONESHOT,
 			client->name, ihid);
 	if (ret < 0) {
 		dev_warn(&client->dev,
 			"Could not register for %s interrupt, irq = %d,"
 			" ret = %d\n",
-			client->name, client->irq, ret);
+			client->name, ihid->irq, ret);
 
 		return ret;
 	}
@@ -841,6 +844,14 @@
 }
 
 #ifdef CONFIG_ACPI
+
+/* Default GPIO mapping */
+static const struct acpi_gpio_params i2c_hid_irq_gpio = { 0, 0, true };
+static const struct acpi_gpio_mapping i2c_hid_acpi_gpios[] = {
+	{ "gpios", &i2c_hid_irq_gpio, 1 },
+	{ },
+};
+
 static int i2c_hid_acpi_pdata(struct i2c_client *client,
 		struct i2c_hid_platform_data *pdata)
 {
@@ -866,7 +877,7 @@
 	pdata->hid_descriptor_address = obj->integer.value;
 	ACPI_FREE(obj);
 
-	return 0;
+	return acpi_dev_add_driver_gpios(adev, i2c_hid_acpi_gpios);
 }
 
 static const struct acpi_device_id i2c_hid_acpi_match[] = {
@@ -930,12 +941,6 @@
 
 	dbg_hid("HID probe called for i2c 0x%02x\n", client->addr);
 
-	if (!client->irq) {
-		dev_err(&client->dev,
-			"HID over i2c has not been provided an Int IRQ\n");
-		return -EINVAL;
-	}
-
 	ihid = kzalloc(sizeof(struct i2c_hid), GFP_KERNEL);
 	if (!ihid)
 		return -ENOMEM;
@@ -955,6 +960,23 @@
 		ihid->pdata = *platform_data;
 	}
 
+	if (client->irq > 0) {
+		ihid->irq = client->irq;
+	} else if (ACPI_COMPANION(&client->dev)) {
+		ihid->desc = gpiod_get(&client->dev, NULL, GPIOD_IN);
+		if (IS_ERR(ihid->desc)) {
+			dev_err(&client->dev, "Failed to get GPIO interrupt\n");
+			return PTR_ERR(ihid->desc);
+		}
+
+		ihid->irq = gpiod_to_irq(ihid->desc);
+		if (ihid->irq < 0) {
+			gpiod_put(ihid->desc);
+			dev_err(&client->dev, "Failed to convert GPIO to IRQ\n");
+			return ihid->irq;
+		}
+	}
+
 	i2c_set_clientdata(client, ihid);
 
 	ihid->client = client;
@@ -1017,13 +1039,16 @@
 	hid_destroy_device(hid);
 
 err_irq:
-	free_irq(client->irq, ihid);
+	free_irq(ihid->irq, ihid);
 
 err_pm:
 	pm_runtime_put_noidle(&client->dev);
 	pm_runtime_disable(&client->dev);
 
 err:
+	if (ihid->desc)
+		gpiod_put(ihid->desc);
+
 	i2c_hid_free_buffers(ihid);
 	kfree(ihid);
 	return ret;
@@ -1042,13 +1067,18 @@
 	hid = ihid->hid;
 	hid_destroy_device(hid);
 
-	free_irq(client->irq, ihid);
+	free_irq(ihid->irq, ihid);
 
 	if (ihid->bufsize)
 		i2c_hid_free_buffers(ihid);
 
+	if (ihid->desc)
+		gpiod_put(ihid->desc);
+
 	kfree(ihid);
 
+	acpi_dev_remove_driver_gpios(ACPI_COMPANION(&client->dev));
+
 	return 0;
 }
 
@@ -1060,9 +1090,9 @@
 	struct hid_device *hid = ihid->hid;
 	int ret = 0;
 
-	disable_irq(client->irq);
+	disable_irq(ihid->irq);
 	if (device_may_wakeup(&client->dev))
-		enable_irq_wake(client->irq);
+		enable_irq_wake(ihid->irq);
 
 	if (hid->driver && hid->driver->suspend)
 		ret = hid->driver->suspend(hid, PMSG_SUSPEND);
@@ -1080,13 +1110,13 @@
 	struct i2c_hid *ihid = i2c_get_clientdata(client);
 	struct hid_device *hid = ihid->hid;
 
-	enable_irq(client->irq);
+	enable_irq(ihid->irq);
 	ret = i2c_hid_hwreset(client);
 	if (ret)
 		return ret;
 
 	if (device_may_wakeup(&client->dev))
-		disable_irq_wake(client->irq);
+		disable_irq_wake(ihid->irq);
 
 	if (hid->driver && hid->driver->reset_resume) {
 		ret = hid->driver->reset_resume(hid);
@@ -1101,17 +1131,19 @@
 static int i2c_hid_runtime_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
 
 	i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
-	disable_irq(client->irq);
+	disable_irq(ihid->irq);
 	return 0;
 }
 
 static int i2c_hid_runtime_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
+	struct i2c_hid *ihid = i2c_get_clientdata(client);
 
-	enable_irq(client->irq);
+	enable_irq(ihid->irq);
 	i2c_hid_set_power(client, I2C_HID_PWR_ON);
 	return 0;
 }
diff --git a/drivers/hid/usbhid/hid-pidff.c b/drivers/hid/usbhid/hid-pidff.c
index 0b531c6..08174d3 100644
--- a/drivers/hid/usbhid/hid-pidff.c
+++ b/drivers/hid/usbhid/hid-pidff.c
@@ -568,6 +568,12 @@
 	int type_id;
 	int error;
 
+	pidff->block_load[PID_EFFECT_BLOCK_INDEX].value[0] = 0;
+	if (old) {
+		pidff->block_load[PID_EFFECT_BLOCK_INDEX].value[0] =
+			pidff->pid_id[effect->id];
+	}
+
 	switch (effect->type) {
 	case FF_CONSTANT:
 		if (!old) {
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index a821277..a775143 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -78,7 +78,13 @@
 	{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET },
+	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C01A, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_3, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_3_JP, HID_QUIRK_NO_INIT_REPORTS },
@@ -92,6 +98,7 @@
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2, HID_QUIRK_NO_INIT_REPORTS },
+	{ USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008, HID_QUIRK_NOGET },
@@ -107,12 +114,8 @@
 	{ USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_2, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_TPV, USB_DEVICE_ID_TPV_OPTICAL_TOUCHSCREEN, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD, HID_QUIRK_NOGET },
-	{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209, HID_QUIRK_MULTI_INPUT },
-	{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60, HID_QUIRK_MULTI_INPUT },
-	{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U, HID_QUIRK_MULTI_INPUT },
-	{ USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET, HID_QUIRK_MULTI_INPUT },
@@ -128,6 +131,7 @@
 	{ USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X, HID_QUIRK_MULTI_INPUT },
+	{ USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_DUOSENSE, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_LTS1, HID_QUIRK_NO_INIT_REPORTS },
diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h
index 0d0d0dd..024f4d89 100644
--- a/drivers/hid/wacom.h
+++ b/drivers/hid/wacom.h
@@ -131,13 +131,6 @@
 	schedule_work(&wacom->work);
 }
 
-static inline void wacom_notify_battery(struct wacom_wac *wacom_wac)
-{
-	struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac);
-
-	power_supply_changed(wacom->battery);
-}
-
 extern const struct hid_device_id wacom_ids[];
 
 void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len);
@@ -151,4 +144,5 @@
 int wacom_wac_event(struct hid_device *hdev, struct hid_field *field,
 		struct hid_usage *usage, __s32 value);
 void wacom_wac_report(struct hid_device *hdev, struct hid_report *report);
+void wacom_battery_work(struct work_struct *work);
 #endif
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index ba9af47..e8607d0 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -406,6 +406,9 @@
 		else if (features->type == WACOM_27QHDT) {
 			return wacom_set_device_mode(hdev, 131, 3, 2);
 		}
+		else if (features->type == BAMBOO_PAD) {
+			return wacom_set_device_mode(hdev, 2, 2, 2);
+		}
 	} else if (features->device_type == BTN_TOOL_PEN) {
 		if (features->type <= BAMBOO_PT && features->type != WIRELESS) {
 			return wacom_set_device_mode(hdev, 2, 2, 2);
@@ -524,6 +527,11 @@
 
 	wacom_wac->shared = &data->shared;
 
+	if (wacom_wac->features.device_type == BTN_TOOL_FINGER)
+		wacom_wac->shared->touch = hdev;
+	else if (wacom_wac->features.device_type == BTN_TOOL_PEN)
+		wacom_wac->shared->pen = hdev;
+
 out:
 	mutex_unlock(&wacom_udev_list_lock);
 	return retval;
@@ -541,14 +549,22 @@
 	kfree(data);
 }
 
-static void wacom_remove_shared_data(struct wacom_wac *wacom)
+static void wacom_remove_shared_data(struct wacom *wacom)
 {
 	struct wacom_hdev_data *data;
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
 
-	if (wacom->shared) {
-		data = container_of(wacom->shared, struct wacom_hdev_data, shared);
+	if (wacom_wac->shared) {
+		data = container_of(wacom_wac->shared, struct wacom_hdev_data,
+				    shared);
+
+		if (wacom_wac->shared->touch == wacom->hdev)
+			wacom_wac->shared->touch = NULL;
+		else if (wacom_wac->shared->pen == wacom->hdev)
+			wacom_wac->shared->pen = NULL;
+
 		kref_put(&data->kref, wacom_release_shared_data);
-		wacom->shared = NULL;
+		wacom_wac->shared = NULL;
 	}
 }
 
@@ -929,6 +945,7 @@
 }
 
 static enum power_supply_property wacom_battery_props[] = {
+	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_SCOPE,
 	POWER_SUPPLY_PROP_CAPACITY
@@ -948,6 +965,9 @@
 	int ret = 0;
 
 	switch (psp) {
+		case POWER_SUPPLY_PROP_PRESENT:
+			val->intval = wacom->wacom_wac.bat_connected;
+			break;
 		case POWER_SUPPLY_PROP_SCOPE:
 			val->intval = POWER_SUPPLY_SCOPE_DEVICE;
 			break;
@@ -961,6 +981,8 @@
 			else if (wacom->wacom_wac.battery_capacity == 100 &&
 				    wacom->wacom_wac.ps_connected)
 				val->intval = POWER_SUPPLY_STATUS_FULL;
+			else if (wacom->wacom_wac.ps_connected)
+				val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
 			else
 				val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
 			break;
@@ -1045,8 +1067,7 @@
 
 static void wacom_destroy_battery(struct wacom *wacom)
 {
-	if ((wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) &&
-	     wacom->battery) {
+	if (wacom->battery) {
 		power_supply_unregister(wacom->battery);
 		wacom->battery = NULL;
 		power_supply_unregister(wacom->ac);
@@ -1317,6 +1338,20 @@
 	return;
 }
 
+void wacom_battery_work(struct work_struct *work)
+{
+	struct wacom *wacom = container_of(work, struct wacom, work);
+
+	if ((wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) &&
+	     !wacom->battery) {
+		wacom_initialize_battery(wacom);
+	}
+	else if (!(wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) &&
+		 wacom->battery) {
+		wacom_destroy_battery(wacom);
+	}
+}
+
 /*
  * Not all devices report physical dimensions from HID.
  * Compute the default from hardcoded logical dimension
@@ -1377,6 +1412,9 @@
 
 	hdev->quirks |= HID_QUIRK_NO_INIT_REPORTS;
 
+	/* hid-core sets this quirk for the boot interface */
+	hdev->quirks &= ~HID_QUIRK_NOGET;
+
 	wacom = kzalloc(sizeof(struct wacom), GFP_KERNEL);
 	if (!wacom)
 		return -ENOMEM;
@@ -1416,6 +1454,21 @@
 			goto fail_allocate_inputs;
 	}
 
+	/*
+	 * Bamboo Pad has a generic hid handling for the Pen, and we switch it
+	 * into debug mode for the touch part.
+	 * We ignore the other interfaces.
+	 */
+	if (features->type == BAMBOO_PAD) {
+		if (features->pktlen == WACOM_PKGLEN_PENABLED) {
+			features->type = HID_GENERIC;
+		} else if ((features->pktlen != WACOM_PKGLEN_BPAD_TOUCH) &&
+			   (features->pktlen != WACOM_PKGLEN_BPAD_TOUCH_USB)) {
+			error = -ENODEV;
+			goto fail_shared_data;
+		}
+	}
+
 	/* set the default size in case we do not get them from hid */
 	wacom_set_default_phy(features);
 
@@ -1450,6 +1503,12 @@
 		features->y_max = 4096;
 	}
 
+	/*
+	 * Same thing for Bamboo PAD
+	 */
+	if (features->type == BAMBOO_PAD)
+		features->device_type = BTN_TOOL_FINGER;
+
 	if (hdev->bus == BUS_BLUETOOTH)
 		features->quirks |= WACOM_QUIRK_BATTERY;
 
@@ -1466,19 +1525,17 @@
 	snprintf(wacom_wac->pad_name, sizeof(wacom_wac->pad_name),
 		"%s Pad", features->name);
 
-	if (features->quirks & WACOM_QUIRK_MULTI_INPUT) {
-		/* Append the device type to the name */
-		if (features->device_type != BTN_TOOL_FINGER)
-			strlcat(wacom_wac->name, " Pen", WACOM_NAME_MAX);
-		else if (features->touch_max)
-			strlcat(wacom_wac->name, " Finger", WACOM_NAME_MAX);
-		else
-			strlcat(wacom_wac->name, " Pad", WACOM_NAME_MAX);
+	/* Append the device type to the name */
+	if (features->device_type != BTN_TOOL_FINGER)
+		strlcat(wacom_wac->name, " Pen", WACOM_NAME_MAX);
+	else if (features->touch_max)
+		strlcat(wacom_wac->name, " Finger", WACOM_NAME_MAX);
+	else
+		strlcat(wacom_wac->name, " Pad", WACOM_NAME_MAX);
 
-		error = wacom_add_shared_data(hdev);
-		if (error)
-			goto fail_shared_data;
-	}
+	error = wacom_add_shared_data(hdev);
+	if (error)
+		goto fail_shared_data;
 
 	if (!(features->quirks & WACOM_QUIRK_MONITOR) &&
 	     (features->quirks & WACOM_QUIRK_BATTERY)) {
@@ -1531,7 +1588,7 @@
 	wacom_clean_inputs(wacom);
 	wacom_destroy_battery(wacom);
 fail_battery:
-	wacom_remove_shared_data(wacom_wac);
+	wacom_remove_shared_data(wacom);
 fail_shared_data:
 	wacom_clean_inputs(wacom);
 fail_allocate_inputs:
@@ -1554,7 +1611,7 @@
 	if (hdev->bus == BUS_BLUETOOTH)
 		device_remove_file(&hdev->dev, &dev_attr_speed);
 	wacom_destroy_battery(wacom);
-	wacom_remove_shared_data(&wacom->wacom_wac);
+	wacom_remove_shared_data(wacom);
 
 	hid_set_drvdata(hdev, NULL);
 	kfree(wacom);
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index bbe32d6..fa54d32 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -45,6 +45,27 @@
  */
 static unsigned short batcap_i4[8] = { 1, 15, 30, 45, 60, 70, 85, 100 };
 
+static void wacom_notify_battery(struct wacom_wac *wacom_wac,
+	int bat_capacity, bool bat_charging, bool bat_connected,
+	bool ps_connected)
+{
+	struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac);
+	bool changed = wacom_wac->battery_capacity != bat_capacity  ||
+		       wacom_wac->bat_charging     != bat_charging  ||
+		       wacom_wac->bat_connected    != bat_connected ||
+		       wacom_wac->ps_connected     != ps_connected;
+
+	if (changed) {
+		wacom_wac->battery_capacity = bat_capacity;
+		wacom_wac->bat_charging = bat_charging;
+		wacom_wac->bat_connected = bat_connected;
+		wacom_wac->ps_connected = ps_connected;
+
+		if (wacom->battery)
+			power_supply_changed(wacom->battery);
+	}
+}
+
 static int wacom_penpartner_irq(struct wacom_wac *wacom)
 {
 	unsigned char *data = wacom->data;
@@ -419,17 +440,26 @@
 		rw = (data[7] >> 2 & 0x07);
 		battery_capacity = batcap_gr[rw];
 		ps_connected = rw == 7;
-		if ((wacom->battery_capacity != battery_capacity) ||
-		    (wacom->ps_connected != ps_connected)) {
-			wacom->battery_capacity = battery_capacity;
-			wacom->ps_connected = ps_connected;
-			wacom_notify_battery(wacom);
-		}
+		wacom_notify_battery(wacom, battery_capacity, ps_connected,
+				     1, ps_connected);
 	}
 exit:
 	return retval;
 }
 
+static void wacom_intuos_schedule_prox_event(struct wacom_wac *wacom_wac)
+{
+	struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac);
+	struct hid_report *r;
+	struct hid_report_enum *re;
+
+	re = &(wacom->hdev->report_enum[HID_FEATURE_REPORT]);
+	r = re->report_id_hash[WACOM_REPORT_INTUOSREAD];
+	if (r) {
+		hid_hw_request(wacom->hdev, r, HID_REQ_GET_REPORT);
+	}
+}
+
 static int wacom_intuos_inout(struct wacom_wac *wacom)
 {
 	struct wacom_features *features = &wacom->features;
@@ -551,12 +581,9 @@
 	   (features->type == CINTIQ && !(data[1] & 0x40)))
 		return 1;
 
-	if (wacom->shared) {
-		wacom->shared->stylus_in_proximity = true;
-
-		if (wacom->shared->touch_down)
-			return 1;
-	}
+	wacom->shared->stylus_in_proximity = true;
+	if (wacom->shared->touch_down)
+		return 1;
 
 	/* in Range while exiting */
 	if (((data[1] & 0xfe) == 0x20) && wacom->reporting_data) {
@@ -568,8 +595,7 @@
 
 	/* Exit report */
 	if ((data[1] & 0xfe) == 0x80) {
-		if (features->quirks & WACOM_QUIRK_MULTI_INPUT)
-			wacom->shared->stylus_in_proximity = false;
+		wacom->shared->stylus_in_proximity = false;
 		wacom->reporting_data = false;
 
 		/* don't report exit if we don't know the ID */
@@ -610,8 +636,11 @@
 	}
 
 	/* don't report other events if we don't know the ID */
-	if (!wacom->id[idx])
+	if (!wacom->id[idx]) {
+		/* but reschedule a read of the current tool */
+		wacom_intuos_schedule_prox_event(wacom);
 		return 1;
+	}
 
 	return 0;
 }
@@ -1023,15 +1052,9 @@
 		bat_charging = (power_raw & 0x08) ? 1 : 0;
 		ps_connected = (power_raw & 0x10) ? 1 : 0;
 		battery_capacity = batcap_i4[power_raw & 0x07];
-		if ((wacom->battery_capacity != battery_capacity) ||
-		    (wacom->bat_charging != bat_charging) ||
-		    (wacom->ps_connected != ps_connected)) {
-			wacom->battery_capacity = battery_capacity;
-			wacom->bat_charging = bat_charging;
-			wacom->ps_connected = ps_connected;
-			wacom_notify_battery(wacom);
-		}
-
+		wacom_notify_battery(wacom, battery_capacity, bat_charging,
+				     battery_capacity || bat_charging,
+				     ps_connected);
 		break;
 	default:
 		dev_dbg(wacom->input->dev.parent,
@@ -1042,6 +1065,28 @@
 	return 0;
 }
 
+static int wacom_wac_finger_count_touches(struct wacom_wac *wacom)
+{
+	struct input_dev *input = wacom->input;
+	unsigned touch_max = wacom->features.touch_max;
+	int count = 0;
+	int i;
+
+	/* non-HID_GENERIC single touch input doesn't call this routine */
+	if ((touch_max == 1) && (wacom->features.type == HID_GENERIC))
+		return wacom->hid_data.tipswitch &&
+		       !wacom->shared->stylus_in_proximity;
+
+	for (i = 0; i < input->mt->num_slots; i++) {
+		struct input_mt_slot *ps = &input->mt->slots[i];
+		int id = input_mt_get_value(ps, ABS_MT_TRACKING_ID);
+		if (id >= 0)
+			count++;
+	}
+
+	return count;
+}
+
 static int wacom_24hdt_irq(struct wacom_wac *wacom)
 {
 	struct input_dev *input = wacom->input;
@@ -1052,7 +1097,6 @@
 	int num_contacts_left = 4; /* maximum contacts per packet */
 	int byte_per_packet = WACOM_BYTES_PER_24HDT_PACKET;
 	int y_offset = 2;
-	static int contact_with_no_pen_down_count = 0;
 
 	if (wacom->features.type == WACOM_27QHDT) {
 		current_num_contacts = data[63];
@@ -1065,10 +1109,8 @@
 	 * First packet resets the counter since only the first
 	 * packet in series will have non-zero current_num_contacts.
 	 */
-	if (current_num_contacts) {
+	if (current_num_contacts)
 		wacom->num_contacts_left = current_num_contacts;
-		contact_with_no_pen_down_count = 0;
-	}
 
 	contacts_to_send = min(num_contacts_left, wacom->num_contacts_left);
 
@@ -1101,15 +1143,14 @@
 				input_report_abs(input, ABS_MT_WIDTH_MINOR, min(w, h));
 				input_report_abs(input, ABS_MT_ORIENTATION, w > h);
 			}
-			contact_with_no_pen_down_count++;
 		}
 	}
-	input_mt_report_pointer_emulation(input, true);
+	input_mt_sync_frame(input);
 
 	wacom->num_contacts_left -= contacts_to_send;
 	if (wacom->num_contacts_left <= 0) {
 		wacom->num_contacts_left = 0;
-		wacom->shared->touch_down = (contact_with_no_pen_down_count > 0);
+		wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom);
 	}
 	return 1;
 }
@@ -1122,7 +1163,6 @@
 	int current_num_contacts = data[2];
 	int contacts_to_send = 0;
 	int x_offset = 0;
-	static int contact_with_no_pen_down_count = 0;
 
 	/* MTTPC does not support Height and Width */
 	if (wacom->features.type == MTTPC || wacom->features.type == MTTPC_B)
@@ -1132,10 +1172,8 @@
 	 * First packet resets the counter since only the first
 	 * packet in series will have non-zero current_num_contacts.
 	 */
-	if (current_num_contacts) {
+	if (current_num_contacts)
 		wacom->num_contacts_left = current_num_contacts;
-		contact_with_no_pen_down_count = 0;
-	}
 
 	/* There are at most 5 contacts per packet */
 	contacts_to_send = min(5, wacom->num_contacts_left);
@@ -1156,15 +1194,14 @@
 			int y = get_unaligned_le16(&data[offset + x_offset + 9]);
 			input_report_abs(input, ABS_MT_POSITION_X, x);
 			input_report_abs(input, ABS_MT_POSITION_Y, y);
-			contact_with_no_pen_down_count++;
 		}
 	}
-	input_mt_report_pointer_emulation(input, true);
+	input_mt_sync_frame(input);
 
 	wacom->num_contacts_left -= contacts_to_send;
 	if (wacom->num_contacts_left <= 0) {
 		wacom->num_contacts_left = 0;
-		wacom->shared->touch_down = (contact_with_no_pen_down_count > 0);
+		wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom);
 	}
 	return 1;
 }
@@ -1173,7 +1210,6 @@
 {
 	struct input_dev *input = wacom->input;
 	unsigned char *data = wacom->data;
-	int contact_with_no_pen_down_count = 0;
 	int i;
 
 	for (i = 0; i < 2; i++) {
@@ -1188,13 +1224,12 @@
 
 			input_report_abs(input, ABS_MT_POSITION_X, x);
 			input_report_abs(input, ABS_MT_POSITION_Y, y);
-			contact_with_no_pen_down_count++;
 		}
 	}
-	input_mt_report_pointer_emulation(input, true);
+	input_mt_sync_frame(input);
 
 	/* keep touch state for pen event */
-	wacom->shared->touch_down = (contact_with_no_pen_down_count > 0);
+	wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom);
 
 	return 1;
 }
@@ -1522,29 +1557,6 @@
 	return 0;
 }
 
-static int wacom_wac_finger_count_touches(struct hid_device *hdev)
-{
-	struct wacom *wacom = hid_get_drvdata(hdev);
-	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
-	struct input_dev *input = wacom_wac->input;
-	unsigned touch_max = wacom_wac->features.touch_max;
-	int count = 0;
-	int i;
-
-	if (touch_max == 1)
-		return wacom_wac->hid_data.tipswitch &&
-		       !wacom_wac->shared->stylus_in_proximity;
-
-	for (i = 0; i < input->mt->num_slots; i++) {
-		struct input_mt_slot *ps = &input->mt->slots[i];
-		int id = input_mt_get_value(ps, ABS_MT_TRACKING_ID);
-		if (id >= 0)
-			count++;
-	}
-
-	return count;
-}
-
 static void wacom_wac_finger_report(struct hid_device *hdev,
 		struct hid_report *report)
 {
@@ -1559,7 +1571,7 @@
 	input_sync(input);
 
 	/* keep touch state for pen event */
-	wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(hdev);
+	wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(wacom_wac);
 }
 
 void wacom_wac_usage_mapping(struct hid_device *hdev,
@@ -1619,7 +1631,6 @@
 	struct input_dev *pad_input = wacom->pad_input;
 	unsigned char *data = wacom->data;
 	int i;
-	int contact_with_no_pen_down_count = 0;
 
 	if (data[0] != 0x02)
 	    return 0;
@@ -1647,22 +1658,21 @@
 			}
 			input_report_abs(input, ABS_MT_POSITION_X, x);
 			input_report_abs(input, ABS_MT_POSITION_Y, y);
-			contact_with_no_pen_down_count++;
 		}
 	}
 
-	input_mt_report_pointer_emulation(input, true);
+	input_mt_sync_frame(input);
 
 	input_report_key(pad_input, BTN_LEFT, (data[1] & 0x08) != 0);
 	input_report_key(pad_input, BTN_FORWARD, (data[1] & 0x04) != 0);
 	input_report_key(pad_input, BTN_BACK, (data[1] & 0x02) != 0);
 	input_report_key(pad_input, BTN_RIGHT, (data[1] & 0x01) != 0);
-	wacom->shared->touch_down = (contact_with_no_pen_down_count > 0);
+	wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom);
 
 	return 1;
 }
 
-static int wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data, int last_touch_count)
+static void wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data)
 {
 	struct wacom_features *features = &wacom->features;
 	struct input_dev *input = wacom->input;
@@ -1670,7 +1680,7 @@
 	int slot = input_mt_get_slot_by_key(input, data[0]);
 
 	if (slot < 0)
-		return 0;
+		return;
 
 	touch = touch && !wacom->shared->stylus_in_proximity;
 
@@ -1702,9 +1712,7 @@
 		input_report_abs(input, ABS_MT_POSITION_Y, y);
 		input_report_abs(input, ABS_MT_TOUCH_MAJOR, width);
 		input_report_abs(input, ABS_MT_TOUCH_MINOR, height);
-		last_touch_count++;
 	}
-	return last_touch_count;
 }
 
 static void wacom_bpt3_button_msg(struct wacom_wac *wacom, unsigned char *data)
@@ -1729,7 +1737,6 @@
 	unsigned char *data = wacom->data;
 	int count = data[1] & 0x07;
 	int i;
-	int contact_with_no_pen_down_count = 0;
 
 	if (data[0] != 0x02)
 	    return 0;
@@ -1740,15 +1747,13 @@
 		int msg_id = data[offset];
 
 		if (msg_id >= 2 && msg_id <= 17)
-			contact_with_no_pen_down_count = 
-			    wacom_bpt3_touch_msg(wacom, data + offset,
-						 contact_with_no_pen_down_count);
+			wacom_bpt3_touch_msg(wacom, data + offset);
 		else if (msg_id == 128)
 			wacom_bpt3_button_msg(wacom, data + offset);
 
 	}
-	input_mt_report_pointer_emulation(input, true);
-	wacom->shared->touch_down = (contact_with_no_pen_down_count > 0);
+	input_mt_sync_frame(input);
+	wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom);
 
 	return 1;
 }
@@ -1760,23 +1765,9 @@
 	unsigned char *data = wacom->data;
 	int prox = 0, x = 0, y = 0, p = 0, d = 0, pen = 0, btn1 = 0, btn2 = 0;
 
-	if (data[0] != WACOM_REPORT_PENABLED && data[0] != WACOM_REPORT_USB)
+	if (data[0] != WACOM_REPORT_PENABLED)
 	    return 0;
 
-	if (data[0] == WACOM_REPORT_USB) {
-		if (features->type == INTUOSHT &&
-		    wacom->shared->touch_input &&
-		    features->touch_max) {
-			input_report_switch(wacom->shared->touch_input,
-					    SW_MUTE_DEVICE, data[8] & 0x40);
-			input_sync(wacom->shared->touch_input);
-		}
-		return 0;
-	}
-
-	if (wacom->shared->touch_down)
-		return 0;
-
 	prox = (data[1] & 0x20) == 0x20;
 
 	/*
@@ -1789,17 +1780,21 @@
 	 *
 	 * Hardware does report zero in most out-of-prox cases but not all.
 	 */
-	if (prox) {
-		if (!wacom->shared->stylus_in_proximity) {
-			if (data[1] & 0x08) {
-				wacom->tool[0] = BTN_TOOL_RUBBER;
-				wacom->id[0] = ERASER_DEVICE_ID;
-			} else {
-				wacom->tool[0] = BTN_TOOL_PEN;
-				wacom->id[0] = STYLUS_DEVICE_ID;
-			}
-			wacom->shared->stylus_in_proximity = true;
+	if (!wacom->shared->stylus_in_proximity) {
+		if (data[1] & 0x08) {
+			wacom->tool[0] = BTN_TOOL_RUBBER;
+			wacom->id[0] = ERASER_DEVICE_ID;
+		} else {
+			wacom->tool[0] = BTN_TOOL_PEN;
+			wacom->id[0] = STYLUS_DEVICE_ID;
 		}
+	}
+
+	wacom->shared->stylus_in_proximity = prox;
+	if (wacom->shared->touch_down)
+		return 0;
+
+	if (prox) {
 		x = le16_to_cpup((__le16 *)&data[2]);
 		y = le16_to_cpup((__le16 *)&data[4]);
 		p = le16_to_cpup((__le16 *)&data[6]);
@@ -1815,6 +1810,8 @@
 		pen = data[1] & 0x01;
 		btn1 = data[1] & 0x02;
 		btn2 = data[1] & 0x04;
+	} else {
+		wacom->id[0] = 0;
 	}
 
 	input_report_key(input, BTN_TOUCH, pen);
@@ -1826,11 +1823,6 @@
 	input_report_abs(input, ABS_PRESSURE, p);
 	input_report_abs(input, ABS_DISTANCE, d);
 
-	if (!prox) {
-		wacom->id[0] = 0;
-		wacom->shared->stylus_in_proximity = false;
-	}
-
 	input_report_key(input, wacom->tool[0], prox); /* PEN or RUBBER */
 	input_report_abs(input, ABS_MISC, wacom->id[0]); /* TOOL ID */
 
@@ -1849,6 +1841,91 @@
 	return 0;
 }
 
+static void wacom_bamboo_pad_pen_event(struct wacom_wac *wacom,
+		unsigned char *data)
+{
+	unsigned char prefix;
+
+	/*
+	 * We need to reroute the event from the debug interface to the
+	 * pen interface.
+	 * We need to add the report ID to the actual pen report, so we
+	 * temporary overwrite the first byte to prevent having to kzalloc/kfree
+	 * and memcpy the report.
+	 */
+	prefix = data[0];
+	data[0] = WACOM_REPORT_BPAD_PEN;
+
+	/*
+	 * actually reroute the event.
+	 * No need to check if wacom->shared->pen is valid, hid_input_report()
+	 * will check for us.
+	 */
+	hid_input_report(wacom->shared->pen, HID_INPUT_REPORT, data,
+			 WACOM_PKGLEN_PENABLED, 1);
+
+	data[0] = prefix;
+}
+
+static int wacom_bamboo_pad_touch_event(struct wacom_wac *wacom,
+		unsigned char *data)
+{
+	struct input_dev *input = wacom->input;
+	unsigned char *finger_data, prefix;
+	unsigned id;
+	int x, y;
+	bool valid;
+
+	prefix = data[0];
+
+	for (id = 0; id < wacom->features.touch_max; id++) {
+		valid = !!(prefix & BIT(id)) &&
+			!wacom->shared->stylus_in_proximity;
+
+		input_mt_slot(input, id);
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, valid);
+
+		if (!valid)
+			continue;
+
+		finger_data = data + 1 + id * 3;
+		x = finger_data[0] | ((finger_data[1] & 0x0f) << 8);
+		y = (finger_data[2] << 4) | (finger_data[1] >> 4);
+
+		input_report_abs(input, ABS_MT_POSITION_X, x);
+		input_report_abs(input, ABS_MT_POSITION_Y, y);
+	}
+
+	input_mt_sync_frame(input);
+
+	input_report_key(input, BTN_LEFT, prefix & 0x40);
+	input_report_key(input, BTN_RIGHT, prefix & 0x80);
+
+	/* keep touch state for pen event */
+	wacom->shared->touch_down = !!prefix &&
+				    !wacom->shared->stylus_in_proximity;
+
+	return 1;
+}
+
+static int wacom_bamboo_pad_irq(struct wacom_wac *wacom, size_t len)
+{
+	unsigned char *data = wacom->data;
+
+	if (!((len == WACOM_PKGLEN_BPAD_TOUCH) ||
+	      (len == WACOM_PKGLEN_BPAD_TOUCH_USB)) ||
+	    (data[0] != WACOM_REPORT_BPAD_TOUCH))
+		return 0;
+
+	if (data[1] & 0x01)
+		wacom_bamboo_pad_pen_event(wacom, &data[1]);
+
+	if (data[1] & 0x02)
+		return wacom_bamboo_pad_touch_event(wacom, &data[9]);
+
+	return 0;
+}
+
 static int wacom_wireless_irq(struct wacom_wac *wacom, size_t len)
 {
 	unsigned char *data = wacom->data;
@@ -1859,7 +1936,7 @@
 
 	connected = data[1] & 0x01;
 	if (connected) {
-		int pid, battery, ps_connected;
+		int pid, battery, charging;
 
 		if ((wacom->shared->type == INTUOSHT) &&
 		    wacom->shared->touch_input &&
@@ -1871,33 +1948,66 @@
 
 		pid = get_unaligned_be16(&data[6]);
 		battery = (data[5] & 0x3f) * 100 / 31;
-		ps_connected = !!(data[5] & 0x80);
+		charging = !!(data[5] & 0x80);
 		if (wacom->pid != pid) {
 			wacom->pid = pid;
 			wacom_schedule_work(wacom);
 		}
 
-		if (wacom->shared->type &&
-		    (battery != wacom->battery_capacity ||
-		     ps_connected != wacom->ps_connected)) {
-			wacom->battery_capacity = battery;
-			wacom->ps_connected = ps_connected;
-			wacom->bat_charging = ps_connected &&
-						wacom->battery_capacity < 100;
-			wacom_notify_battery(wacom);
-		}
+		if (wacom->shared->type)
+			wacom_notify_battery(wacom, battery, charging, 1, 0);
+
 	} else if (wacom->pid != 0) {
 		/* disconnected while previously connected */
 		wacom->pid = 0;
 		wacom_schedule_work(wacom);
-		wacom->battery_capacity = 0;
-		wacom->bat_charging = 0;
-		wacom->ps_connected = 0;
+		wacom_notify_battery(wacom, 0, 0, 0, 0);
 	}
 
 	return 0;
 }
 
+static int wacom_status_irq(struct wacom_wac *wacom_wac, size_t len)
+{
+	struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac);
+	struct wacom_features *features = &wacom_wac->features;
+	unsigned char *data = wacom_wac->data;
+
+	if (data[0] != WACOM_REPORT_USB)
+		return 0;
+
+	if (features->type == INTUOSHT &&
+	    wacom_wac->shared->touch_input &&
+	    features->touch_max) {
+		input_report_switch(wacom_wac->shared->touch_input,
+				    SW_MUTE_DEVICE, data[8] & 0x40);
+		input_sync(wacom_wac->shared->touch_input);
+	}
+
+	if (data[9] & 0x02) { /* wireless module is attached */
+		int battery = (data[8] & 0x3f) * 100 / 31;
+		bool charging = !!(data[8] & 0x80);
+
+		wacom_notify_battery(wacom_wac, battery, charging,
+				     battery || charging, 1);
+
+		if (!wacom->battery &&
+		    !(features->quirks & WACOM_QUIRK_BATTERY)) {
+			features->quirks |= WACOM_QUIRK_BATTERY;
+			INIT_WORK(&wacom->work, wacom_battery_work);
+			wacom_schedule_work(wacom_wac);
+		}
+	}
+	else if ((features->quirks & WACOM_QUIRK_BATTERY) &&
+		 wacom->battery) {
+		features->quirks &= ~WACOM_QUIRK_BATTERY;
+		INIT_WORK(&wacom->work, wacom_battery_work);
+		wacom_schedule_work(wacom_wac);
+		wacom_notify_battery(wacom_wac, 0, 0, 0, 0);
+	}
+	return 0;
+}
+
 void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len)
 {
 	bool sync;
@@ -1967,6 +2077,8 @@
 	case INTUOSPL:
 		if (len == WACOM_PKGLEN_BBTOUCH3)
 			sync = wacom_bpt3_touch(wacom_wac);
+		else if (wacom_wac->data[0] == WACOM_REPORT_USB)
+			sync = wacom_status_irq(wacom_wac, len);
 		else
 			sync = wacom_intuos_irq(wacom_wac);
 		break;
@@ -1982,7 +2094,14 @@
 
 	case BAMBOO_PT:
 	case INTUOSHT:
-		sync = wacom_bpt_irq(wacom_wac, len);
+		if (wacom_wac->data[0] == WACOM_REPORT_USB)
+			sync = wacom_status_irq(wacom_wac, len);
+		else
+			sync = wacom_bpt_irq(wacom_wac, len);
+		break;
+
+	case BAMBOO_PAD:
+		sync = wacom_bamboo_pad_irq(wacom_wac, len);
 		break;
 
 	case WIRELESS:
@@ -2054,12 +2173,6 @@
 		features->y_max = 1023;
 	}
 
-	/* these device have multiple inputs */
-	if (features->type >= WIRELESS ||
-	    (features->type >= INTUOS5S && features->type <= INTUOSHT) ||
-	    (features->oVid && features->oPid))
-		features->quirks |= WACOM_QUIRK_MULTI_INPUT;
-
 	/* quirk for bamboo touch with 2 low res touches */
 	if (features->type == BAMBOO_PT &&
 	    features->pktlen == WACOM_PKGLEN_BBTOUCH) {
@@ -2323,6 +2436,13 @@
 					      0, 0);
 		}
 		break;
+	case BAMBOO_PAD:
+		__clear_bit(ABS_MISC, input_dev->absbit);
+		input_mt_init_slots(input_dev, features->touch_max,
+				    INPUT_MT_POINTER);
+		__set_bit(BTN_LEFT, input_dev->keybit);
+		__set_bit(BTN_RIGHT, input_dev->keybit);
+		break;
 	}
 	return 0;
 }
@@ -2772,6 +2892,15 @@
 	{ "Wacom Cintiq 13HD", 59152, 33448, 1023, 63,
 	  WACOM_13HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
 	  WACOM_CINTIQ_OFFSET, WACOM_CINTIQ_OFFSET };
+static const struct wacom_features wacom_features_0x333 =
+	{ "Wacom Cintiq 13HD touch", 59152, 33448, 2047, 63,
+	  WACOM_13HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
+	  WACOM_CINTIQ_OFFSET, WACOM_CINTIQ_OFFSET,
+	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x335 };
+static const struct wacom_features wacom_features_0x335 =
+	{ "Wacom Cintiq 13HD touch", .type = WACOM_24HDT, /* Touch */
+	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x333, .touch_max = 10,
+	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 static const struct wacom_features wacom_features_0xC7 =
 	{ "Wacom DTU1931", 37832, 30305, 511, 0,
 	  PL, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -2976,6 +3105,12 @@
 	{ "Wacom ISDv5 30C", .type = WACOM_24HDT, /* Touch */
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x30A, .touch_max = 10,
 	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
+static const struct wacom_features wacom_features_0x318 =
+	{ "Wacom USB Bamboo PAD", 4095, 4095, /* Touch */
+	  .type = BAMBOO_PAD, 35, 48, .touch_max = 4 };
+static const struct wacom_features wacom_features_0x319 =
+	{ "Wacom Wireless Bamboo PAD", 4095, 4095, /* Touch */
+	  .type = BAMBOO_PAD, 35, 48, .touch_max = 4 };
 static const struct wacom_features wacom_features_0x323 =
 	{ "Wacom Intuos P M", 21600, 13500, 1023, 31,
 	  INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES,
@@ -2992,6 +3127,10 @@
 	HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\
 	.driver_data = (kernel_ulong_t)&wacom_features_##prod
 
+#define I2C_DEVICE_WACOM(prod)						\
+	HID_DEVICE(BUS_I2C, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\
+	.driver_data = (kernel_ulong_t)&wacom_features_##prod
+
 #define USB_DEVICE_LENOVO(prod)					\
 	HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, prod),			\
 	.driver_data = (kernel_ulong_t)&wacom_features_##prod
@@ -3124,11 +3263,15 @@
 	{ USB_DEVICE_WACOM(0x314) },
 	{ USB_DEVICE_WACOM(0x315) },
 	{ USB_DEVICE_WACOM(0x317) },
+	{ USB_DEVICE_WACOM(0x318) },
+	{ USB_DEVICE_WACOM(0x319) },
 	{ USB_DEVICE_WACOM(0x323) },
 	{ USB_DEVICE_WACOM(0x32A) },
 	{ USB_DEVICE_WACOM(0x32B) },
 	{ USB_DEVICE_WACOM(0x32C) },
 	{ USB_DEVICE_WACOM(0x32F) },
+	{ USB_DEVICE_WACOM(0x333) },
+	{ USB_DEVICE_WACOM(0x335) },
 	{ USB_DEVICE_WACOM(0x4001) },
 	{ USB_DEVICE_WACOM(0x4004) },
 	{ USB_DEVICE_WACOM(0x5000) },
@@ -3136,6 +3279,7 @@
 	{ USB_DEVICE_LENOVO(0x6004) },
 
 	{ USB_DEVICE_WACOM(HID_ANY_ID) },
+	{ I2C_DEVICE_WACOM(HID_ANY_ID) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, wacom_ids);
diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index 021ee1c..4700ac9 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h
@@ -33,6 +33,8 @@
 #define WACOM_PKGLEN_MTTPC	40
 #define WACOM_PKGLEN_DTUS	68
 #define WACOM_PKGLEN_PENABLED	 8
+#define WACOM_PKGLEN_BPAD_TOUCH	32
+#define WACOM_PKGLEN_BPAD_TOUCH_USB	64
 
 /* wacom data size per MT contact */
 #define WACOM_BYTES_PER_MT_PACKET	11
@@ -67,13 +69,14 @@
 #define WACOM_REPORT_24HDT		1
 #define WACOM_REPORT_WL			128
 #define WACOM_REPORT_USB		192
+#define WACOM_REPORT_BPAD_PEN		3
+#define WACOM_REPORT_BPAD_TOUCH		16
 
 /* device quirks */
-#define WACOM_QUIRK_MULTI_INPUT		0x0001
-#define WACOM_QUIRK_BBTOUCH_LOWRES	0x0002
-#define WACOM_QUIRK_NO_INPUT		0x0004
-#define WACOM_QUIRK_MONITOR		0x0008
-#define WACOM_QUIRK_BATTERY		0x0010
+#define WACOM_QUIRK_BBTOUCH_LOWRES	0x0001
+#define WACOM_QUIRK_NO_INPUT		0x0002
+#define WACOM_QUIRK_MONITOR		0x0004
+#define WACOM_QUIRK_BATTERY		0x0008
 
 #define WACOM_PEN_FIELD(f)	(((f)->logical == HID_DG_STYLUS) || \
 				 ((f)->physical == HID_DG_STYLUS) || \
@@ -122,6 +125,7 @@
 	BAMBOO_PT,
 	WACOM_24HDT,
 	WACOM_27QHDT,
+	BAMBOO_PAD,
 	TABLETPC,   /* add new TPC below */
 	TABLETPCE,
 	TABLETPC2FG,
@@ -169,6 +173,8 @@
 	unsigned touch_max;
 	int type;
 	struct input_dev *touch_input;
+	struct hid_device *pen;
+	struct hid_device *touch;
 };
 
 struct hid_data {
@@ -205,6 +211,7 @@
 	int battery_capacity;
 	int num_contacts_left;
 	int bat_charging;
+	int bat_connected;
 	int ps_connected;
 	u8 bt_features;
 	u8 bt_high_speed;
diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index df6a593..2b4fad6 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -123,7 +123,8 @@
 			*val = sensor_hub_input_attr_get_raw_value(
 					accel_state->common_attributes.hsdev,
 					HID_USAGE_SENSOR_ACCEL_3D, address,
-					report_id);
+					report_id,
+					SENSOR_HUB_SYNC);
 		else {
 			*val = 0;
 			hid_sensor_power_state(&accel_state->common_attributes,
diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c
index 87ee1c7..44bf815 100644
--- a/drivers/iio/adc/max1027.c
+++ b/drivers/iio/adc/max1027.c
@@ -436,7 +436,7 @@
 				  indio_dev->num_channels * 2,
 				  GFP_KERNEL);
 	if (st->buffer == NULL) {
-		dev_err(&indio_dev->dev, "Can't allocate bufffer\n");
+		dev_err(&indio_dev->dev, "Can't allocate buffer\n");
 		return -ENOMEM;
 	}
 
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
index 25b01e1..e81f434 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -153,8 +153,8 @@
 	int ret;
 
 	ret = sensor_hub_get_feature(st->hsdev,
-		st->poll.report_id,
-		st->poll.index, &value);
+				     st->poll.report_id,
+				     st->poll.index, sizeof(value), &value);
 
 	if (ret < 0 || value < 0) {
 		return -EINVAL;
@@ -174,8 +174,8 @@
 	int ret;
 
 	ret = sensor_hub_get_feature(st->hsdev,
-		st->poll.report_id,
-		st->poll.index, &value);
+				     st->poll.report_id,
+				     st->poll.index, sizeof(value), &value);
 	if (ret < 0 || value < 0) {
 		*val1 = *val2 = 0;
 		return -EINVAL;
@@ -212,9 +212,8 @@
 		else
 			value = 0;
 	}
-	ret = sensor_hub_set_feature(st->hsdev,
-		st->poll.report_id,
-		st->poll.index, value);
+	ret = sensor_hub_set_feature(st->hsdev, st->poll.report_id,
+				     st->poll.index, sizeof(value), &value);
 	if (ret < 0 || value < 0)
 		ret = -EINVAL;
 
@@ -229,8 +228,9 @@
 	int ret;
 
 	ret = sensor_hub_get_feature(st->hsdev,
-		st->sensitivity.report_id,
-		st->sensitivity.index, &value);
+				     st->sensitivity.report_id,
+				     st->sensitivity.index, sizeof(value),
+				     &value);
 	if (ret < 0 || value < 0) {
 		*val1 = *val2 = 0;
 		return -EINVAL;
@@ -253,9 +253,9 @@
 	value = convert_to_vtf_format(st->sensitivity.size,
 				st->sensitivity.unit_expo,
 				val1, val2);
-	ret = sensor_hub_set_feature(st->hsdev,
-		st->sensitivity.report_id,
-		st->sensitivity.index, value);
+	ret = sensor_hub_set_feature(st->hsdev, st->sensitivity.report_id,
+				     st->sensitivity.index, sizeof(value),
+				     &value);
 	if (ret < 0 || value < 0)
 		ret = -EINVAL;
 
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index 2f1d535b..610fc98 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -68,20 +68,21 @@
 	if (state_val >= 0) {
 		state_val += st->power_state.logical_minimum;
 		sensor_hub_set_feature(st->hsdev, st->power_state.report_id,
-					st->power_state.index,
-					(s32)state_val);
+				       st->power_state.index, sizeof(state_val),
+				       &state_val);
 	}
 
 	if (report_val >= 0) {
 		report_val += st->report_state.logical_minimum;
 		sensor_hub_set_feature(st->hsdev, st->report_state.report_id,
-					st->report_state.index,
-					(s32)report_val);
+				       st->report_state.index,
+				       sizeof(report_val),
+				       &report_val);
 	}
 
 	sensor_hub_get_feature(st->hsdev, st->power_state.report_id,
-					st->power_state.index,
-					&state_val);
+			       st->power_state.index,
+			       sizeof(state_val), &state_val);
 	if (state && poll_value)
 		msleep_interruptible(poll_value * 2);
 
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index a3c3e19..b5883b6 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -123,7 +123,8 @@
 			*val = sensor_hub_input_attr_get_raw_value(
 					gyro_state->common_attributes.hsdev,
 					HID_USAGE_SENSOR_GYRO_3D, address,
-					report_id);
+					report_id,
+					SENSOR_HUB_SYNC);
 		else {
 			*val = 0;
 			hid_sensor_power_state(&gyro_state->common_attributes,
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 948acfc..1609ecd 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -101,7 +101,8 @@
 			*val = sensor_hub_input_attr_get_raw_value(
 					als_state->common_attributes.hsdev,
 					HID_USAGE_SENSOR_ALS, address,
-					report_id);
+					report_id,
+					SENSOR_HUB_SYNC);
 			hid_sensor_power_state(&als_state->common_attributes,
 						false);
 		} else {
diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c
index 3ecf79e..91ecc46 100644
--- a/drivers/iio/light/hid-sensor-prox.c
+++ b/drivers/iio/light/hid-sensor-prox.c
@@ -96,7 +96,8 @@
 			*val = sensor_hub_input_attr_get_raw_value(
 				prox_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_PROX, address,
-				report_id);
+				report_id,
+				SENSOR_HUB_SYNC);
 			hid_sensor_power_state(&prox_state->common_attributes,
 						false);
 		} else {
diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
index d22993b..4f9c0be 100644
--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c
+++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
@@ -170,7 +170,8 @@
 			*val = sensor_hub_input_attr_get_raw_value(
 				magn_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_COMPASS_3D, address,
-				report_id);
+				report_id,
+				SENSOR_HUB_SYNC);
 		else {
 			*val = 0;
 			hid_sensor_power_state(&magn_state->common_attributes,
diff --git a/drivers/iio/orientation/hid-sensor-incl-3d.c b/drivers/iio/orientation/hid-sensor-incl-3d.c
index 7385446..5930fa3 100644
--- a/drivers/iio/orientation/hid-sensor-incl-3d.c
+++ b/drivers/iio/orientation/hid-sensor-incl-3d.c
@@ -124,7 +124,8 @@
 			*val = sensor_hub_input_attr_get_raw_value(
 				incl_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_INCLINOMETER_3D, address,
-				report_id);
+				report_id,
+				SENSOR_HUB_SYNC);
 		else {
 			hid_sensor_power_state(&incl_state->common_attributes,
 						false);
diff --git a/drivers/iio/pressure/hid-sensor-press.c b/drivers/iio/pressure/hid-sensor-press.c
index 1af3149..7bb8d4c 100644
--- a/drivers/iio/pressure/hid-sensor-press.c
+++ b/drivers/iio/pressure/hid-sensor-press.c
@@ -100,7 +100,8 @@
 			*val = sensor_hub_input_attr_get_raw_value(
 				press_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_PRESSURE, address,
-				report_id);
+				report_id,
+				SENSOR_HUB_SYNC);
 			hid_sensor_power_state(&press_state->common_attributes,
 						false);
 		} else {
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 6d7f453..aed8afe 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -40,7 +40,6 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
-#include <linux/aio.h>
 #include <linux/jiffies.h>
 #include <linux/cpu.h>
 #include <asm/pgtable.h>
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 41937c6..14046f5 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -39,7 +39,6 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
-#include <linux/aio.h>
 #include <linux/jiffies.h>
 #include <asm/pgtable.h>
 #include <linux/delay.h>
diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c
index cb150a1..34feb3e 100644
--- a/drivers/input/input-mt.c
+++ b/drivers/input/input-mt.c
@@ -88,10 +88,13 @@
 			goto err_mem;
 	}
 
-	/* Mark slots as 'unused' */
+	/* Mark slots as 'inactive' */
 	for (i = 0; i < num_slots; i++)
 		input_mt_set_value(&mt->slots[i], ABS_MT_TRACKING_ID, -1);
 
+	/* Mark slots as 'unused' */
+	mt->frame = 1;
+
 	dev->mt = mt;
 	return 0;
 err_mem:
@@ -439,6 +442,8 @@
  * set the key on the first unused slot and return.
  *
  * If no available slot can be found, -1 is returned.
+ * Note that for this function to work properly, input_mt_sync_frame() has
+ * to be called at each frame.
  */
 int input_mt_get_slot_by_key(struct input_dev *dev, int key)
 {
@@ -453,7 +458,7 @@
 			return s - mt->slots;
 
 	for (s = mt->slots; s != mt->slots + mt->num_slots; s++)
-		if (!input_mt_is_active(s)) {
+		if (!input_mt_is_active(s) && !input_mt_is_used(mt, s)) {
 			s->key = key;
 			return s - mt->slots;
 		}
diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
index c1493f4..d5bdbaf 100644
--- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c
+++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
@@ -1092,7 +1092,7 @@
 	}
 	card->ci = get_card_info(ent->driver_data);
 	if (!card->ci) {
-		pr_info("mISDN: do not have informations about adapter at %s\n",
+		pr_info("mISDN: do not have information about adapter at %s\n",
 			pci_name(pdev));
 		kfree(card);
 		pci_disable_device(pdev);
diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c
index 87f7dff..52c4382 100644
--- a/drivers/isdn/mISDN/dsp_cmx.c
+++ b/drivers/isdn/mISDN/dsp_cmx.c
@@ -295,7 +295,7 @@
 		}
 	}
 	printk(KERN_WARNING
-	       "%s: dsp is not present in its own conf_meber list.\n",
+	       "%s: dsp is not present in its own conf_member list.\n",
 	       __func__);
 
 	return -EINVAL;
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 77025f5..0222b1a 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -460,7 +460,7 @@
 		}
 		if (dsp_debug & DEBUG_DSP_CORE)
 			printk(KERN_DEBUG "%s: enable mixing of "
-			       "tx-data with conf mebers\n", __func__);
+			       "tx-data with conf members\n", __func__);
 		dsp->tx_mix = 1;
 		dsp_cmx_hardware(dsp->conf, dsp);
 		dsp_rx_off(dsp);
@@ -474,7 +474,7 @@
 		}
 		if (dsp_debug & DEBUG_DSP_CORE)
 			printk(KERN_DEBUG "%s: disable mixing of "
-			       "tx-data with conf mebers\n", __func__);
+			       "tx-data with conf members\n", __func__);
 		dsp->tx_mix = 0;
 		dsp_cmx_hardware(dsp->conf, dsp);
 		dsp_rx_off(dsp);
diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c
index ba4ee0b..d3d928e 100644
--- a/drivers/media/dvb-frontends/m88ds3103.c
+++ b/drivers/media/dvb-frontends/m88ds3103.c
@@ -630,7 +630,7 @@
 	/* request the firmware, this will block and timeout */
 	ret = request_firmware(&fw, fw_file, priv->i2c->dev.parent);
 	if (ret) {
-		dev_err(&priv->i2c->dev, "%s: firmare file '%s' not found\n",
+		dev_err(&priv->i2c->dev, "%s: firmware file '%s' not found\n",
 				KBUILD_MODNAME, fw_file);
 		goto err;
 	}
diff --git a/drivers/media/dvb-frontends/si2168_priv.h b/drivers/media/dvb-frontends/si2168_priv.h
index aadd136..d7efce8 100644
--- a/drivers/media/dvb-frontends/si2168_priv.h
+++ b/drivers/media/dvb-frontends/si2168_priv.h
@@ -40,7 +40,7 @@
 	bool ts_clock_inv;
 };
 
-/* firmare command struct */
+/* firmware command struct */
 #define SI2168_ARGLEN      30
 struct si2168_cmd {
 	u8 args[SI2168_ARGLEN];
diff --git a/drivers/media/dvb-frontends/tda10071_priv.h b/drivers/media/dvb-frontends/tda10071_priv.h
index 4204861..03f839c 100644
--- a/drivers/media/dvb-frontends/tda10071_priv.h
+++ b/drivers/media/dvb-frontends/tda10071_priv.h
@@ -99,7 +99,7 @@
 #define CMD_BER_CONTROL         0x3e
 #define CMD_BER_UPDATE_COUNTERS 0x3f
 
-/* firmare command struct */
+/* firmware command struct */
 #define TDA10071_ARGLEN      30
 struct tda10071_cmd {
 	u8 args[TDA10071_ARGLEN];
diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c
index 26019e7..74cfc3c 100644
--- a/drivers/media/tuners/msi001.c
+++ b/drivers/media/tuners/msi001.c
@@ -408,7 +408,7 @@
 				s->mixer_gain->cur.val, s->if_gain->val);
 		break;
 	default:
-		dev_dbg(&s->spi->dev, "unkown control %d\n", ctrl->id);
+		dev_dbg(&s->spi->dev, "unknown control %d\n", ctrl->id);
 		ret = -EINVAL;
 	}
 
diff --git a/drivers/mfd/si476x-i2c.c b/drivers/mfd/si476x-i2c.c
index 0e4a76d..7f87c62 100644
--- a/drivers/mfd/si476x-i2c.c
+++ b/drivers/mfd/si476x-i2c.c
@@ -766,7 +766,7 @@
 			   sizeof(struct v4l2_rds_data),
 			   GFP_KERNEL);
 	if (rval) {
-		dev_err(&client->dev, "Could not alloate the FIFO\n");
+		dev_err(&client->dev, "Could not allocate the FIFO\n");
 		goto free_gpio;
 	}
 	mutex_init(&core->rds_drainer_status_lock);
diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
index c4cb9a9..40ea639 100644
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@@ -19,7 +19,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/ioctl.h>
 #include <linux/cdev.h>
 #include <linux/list.h>
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 3c019c0..47680c8 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -22,7 +22,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index bd3039a..af44ee2 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -21,7 +21,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/pci.h>
 #include <linux/poll.h>
 #include <linux/ioctl.h>
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index c84131e..f36c76f 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1758,7 +1758,7 @@
 
 	err = _mmc_suspend(host, true);
 	if (err)
-		pr_err("%s: error %d doing aggessive suspend\n",
+		pr_err("%s: error %d doing aggressive suspend\n",
 			mmc_hostname(host), err);
 
 	return err;
@@ -1776,7 +1776,7 @@
 
 	err = _mmc_resume(host);
 	if (err)
-		pr_err("%s: error %d doing aggessive resume\n",
+		pr_err("%s: error %d doing aggressive resume\n",
 			mmc_hostname(host), err);
 
 	return 0;
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index ad4d43e..31a9ef2 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1157,7 +1157,7 @@
 
 	err = _mmc_sd_suspend(host);
 	if (err)
-		pr_err("%s: error %d doing aggessive suspend\n",
+		pr_err("%s: error %d doing aggressive suspend\n",
 			mmc_hostname(host), err);
 
 	return err;
@@ -1175,7 +1175,7 @@
 
 	err = _mmc_sd_resume(host);
 	if (err)
-		pr_err("%s: error %d doing aggessive resume\n",
+		pr_err("%s: error %d doing aggressive resume\n",
 			mmc_hostname(host), err);
 
 	return 0;
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 27f272e..43fa16b 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -1105,7 +1105,7 @@
 		mask = MX28_BF_GPMI_STAT_READY_BUSY(1 << chip);
 		reg = readl(r->gpmi_regs + HW_GPMI_STAT);
 	} else
-		dev_err(this->dev, "unknow arch.\n");
+		dev_err(this->dev, "unknown arch.\n");
 	return reg & mask;
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index cfe3c769..4c9678c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -592,7 +592,7 @@
 		mc = kzalloc(mc_num * sizeof(struct bnx2x_mcast_list_elem),
 			     GFP_KERNEL);
 		if (!mc) {
-			BNX2X_ERR("Cannot Configure mulicasts due to lack of memory\n");
+			BNX2X_ERR("Cannot Configure multicasts due to lack of memory\n");
 			return -ENOMEM;
 		}
 	}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 3e0f705..75ee9e4 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -818,7 +818,7 @@
 
 	if (rv)
 		netdev_err(adapter->netdev,
-			   "Failed to set Rx coalescing parametrs\n");
+			   "Failed to set Rx coalescing parameters\n");
 
 	return rv;
 }
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index e22e602..4c2b5a80 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -614,7 +614,7 @@
 	trigger = CAL_TRIGGER;
 	cal_gpio = 1 + ptp_find_pin(clock->ptp_clock, PTP_PF_PHYSYNC, 0);
 	if (cal_gpio < 1) {
-		pr_err("PHY calibration pin not avaible - PHY is not calibrated.");
+		pr_err("PHY calibration pin not available - PHY is not calibrated.");
 		return;
 	}
 
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index e71a2ce..b97367d 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -3211,7 +3211,7 @@
 			airo_print_dbg(devname, "link lost (TSF sync lost)");
 			break;
 		default:
-			airo_print_dbg(devname, "unknow status %x\n", status);
+			airo_print_dbg(devname, "unknown status %x\n", status);
 			break;
 		}
 		break;
@@ -3233,7 +3233,7 @@
 	case STAT_REASSOC:
 		break;
 	default:
-		airo_print_dbg(devname, "unknow status %x\n", status);
+		airo_print_dbg(devname, "unknown status %x\n", status);
 		break;
 	}
 }
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index d6d2f0f..4ce433f 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4857,7 +4857,7 @@
 			bw = WMI_PEER_CHWIDTH_80MHZ;
 			break;
 		case IEEE80211_STA_RX_BW_160:
-			ath10k_warn(ar, "Invalid bandwith %d in rc update for %pM\n",
+			ath10k_warn(ar, "Invalid bandwidth %d in rc update for %pM\n",
 				    sta->bandwidth, sta->addr);
 			bw = WMI_PEER_CHWIDTH_20MHZ;
 			break;
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 69ed397..dbd8944 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -1701,7 +1701,7 @@
 	} else if (packet_type == WCN36XX_HAL_KEEP_ALIVE_UNSOLICIT_ARP_RSP) {
 		/* TODO: it also support ARP response type */
 	} else {
-		wcn36xx_warn("unknow keep alive packet type %d\n", packet_type);
+		wcn36xx_warn("unknown keep alive packet type %d\n", packet_type);
 		ret = -EINVAL;
 		goto out;
 	}
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac.h b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac.h
new file mode 100644
index 0000000..a0da324
--- /dev/null
+++ b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2011 Broadcom Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(__TRACE_BRCMSMAC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __TRACE_BRCMSMAC_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM brcmsmac
+
+/*
+ * We define a tracepoint, its arguments, its printk format and its
+ * 'fast binary record' layout.
+ */
+TRACE_EVENT(brcms_timer,
+	/* TPPROTO is the prototype of the function called by this tracepoint */
+	TP_PROTO(struct brcms_timer *t),
+	/*
+	 * TPARGS(firstarg, p) are the parameters names, same as found in the
+	 * prototype.
+	 */
+	TP_ARGS(t),
+	/*
+	 * Fast binary tracing: define the trace record via TP_STRUCT__entry().
+	 * You can think about it like a regular C structure local variable
+	 * definition.
+	 */
+	TP_STRUCT__entry(
+		__field(uint, ms)
+		__field(uint, set)
+		__field(uint, periodic)
+	),
+	TP_fast_assign(
+		__entry->ms = t->ms;
+		__entry->set = t->set;
+		__entry->periodic = t->periodic;
+	),
+	TP_printk(
+		"ms=%u set=%u periodic=%u",
+		__entry->ms, __entry->set, __entry->periodic
+	)
+);
+
+TRACE_EVENT(brcms_dpc,
+	TP_PROTO(unsigned long data),
+	TP_ARGS(data),
+	TP_STRUCT__entry(
+		__field(unsigned long, data)
+	),
+	TP_fast_assign(
+		__entry->data = data;
+	),
+	TP_printk(
+		"data=%p",
+		(void *)__entry->data
+	)
+);
+
+TRACE_EVENT(brcms_macintstatus,
+	TP_PROTO(const struct device *dev, int in_isr, u32 macintstatus,
+		 u32 mask),
+	TP_ARGS(dev, in_isr, macintstatus, mask),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(int, in_isr)
+		__field(u32, macintstatus)
+		__field(u32, mask)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev));
+		__entry->in_isr = in_isr;
+		__entry->macintstatus = macintstatus;
+		__entry->mask = mask;
+	),
+	TP_printk("[%s] in_isr=%d macintstatus=%#x mask=%#x", __get_str(dev),
+		  __entry->in_isr, __entry->macintstatus, __entry->mask)
+);
+#endif /* __TRACE_BRCMSMAC_H */
+
+#ifdef CONFIG_BRCM_TRACING
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE brcms_trace_brcmsmac
+#include <trace/define_trace.h>
+
+#endif /* CONFIG_BRCM_TRACING */
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h
new file mode 100644
index 0000000..0e8a69a
--- /dev/null
+++ b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2011 Broadcom Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(__TRACE_BRCMSMAC_MSG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __TRACE_BRCMSMAC_MSG_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM brcmsmac_msg
+
+#define MAX_MSG_LEN	100
+
+DECLARE_EVENT_CLASS(brcms_msg_event,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf),
+	TP_STRUCT__entry(
+		__dynamic_array(char, msg, MAX_MSG_LEN)
+	),
+	TP_fast_assign(
+		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+				       MAX_MSG_LEN, vaf->fmt,
+				       *vaf->va) >= MAX_MSG_LEN);
+	),
+	TP_printk("%s", __get_str(msg))
+);
+
+DEFINE_EVENT(brcms_msg_event, brcms_info,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+
+DEFINE_EVENT(brcms_msg_event, brcms_warn,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+
+DEFINE_EVENT(brcms_msg_event, brcms_err,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+
+DEFINE_EVENT(brcms_msg_event, brcms_crit,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+
+TRACE_EVENT(brcms_dbg,
+	TP_PROTO(u32 level, const char *func, struct va_format *vaf),
+	TP_ARGS(level, func, vaf),
+	TP_STRUCT__entry(
+		__field(u32, level)
+		__string(func, func)
+		__dynamic_array(char, msg, MAX_MSG_LEN)
+	),
+	TP_fast_assign(
+		__entry->level = level;
+		__assign_str(func, func);
+		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+				       MAX_MSG_LEN, vaf->fmt,
+				       *vaf->va) >= MAX_MSG_LEN);
+	),
+	TP_printk("%s: %s", __get_str(func), __get_str(msg))
+);
+#endif /* __TRACE_BRCMSMAC_MSG_H */
+
+#ifdef CONFIG_BRCM_TRACING
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE brcms_trace_brcmsmac_msg
+#include <trace/define_trace.h>
+
+#endif /* CONFIG_BRCM_TRACING */
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_tx.h b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_tx.h
new file mode 100644
index 0000000..cf2cc07
--- /dev/null
+++ b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_tx.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2011 Broadcom Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(__TRACE_BRCMSMAC_TX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __TRACE_BRCMSMAC_TX_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM brcmsmac_tx
+
+TRACE_EVENT(brcms_txdesc,
+	TP_PROTO(const struct device *dev,
+		 void *txh, size_t txh_len),
+	TP_ARGS(dev, txh, txh_len),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__dynamic_array(u8, txh, txh_len)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev));
+		memcpy(__get_dynamic_array(txh), txh, txh_len);
+	),
+	TP_printk("[%s] txdesc", __get_str(dev))
+);
+
+TRACE_EVENT(brcms_txstatus,
+	TP_PROTO(const struct device *dev, u16 framelen, u16 frameid,
+		 u16 status, u16 lasttxtime, u16 sequence, u16 phyerr,
+		 u16 ackphyrxsh),
+	TP_ARGS(dev, framelen, frameid, status, lasttxtime, sequence, phyerr,
+		ackphyrxsh),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(u16, framelen)
+		__field(u16, frameid)
+		__field(u16, status)
+		__field(u16, lasttxtime)
+		__field(u16, sequence)
+		__field(u16, phyerr)
+		__field(u16, ackphyrxsh)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev));
+		__entry->framelen = framelen;
+		__entry->frameid = frameid;
+		__entry->status = status;
+		__entry->lasttxtime = lasttxtime;
+		__entry->sequence = sequence;
+		__entry->phyerr = phyerr;
+		__entry->ackphyrxsh = ackphyrxsh;
+	),
+	TP_printk("[%s] FrameId %#04x TxStatus %#04x LastTxTime %#04x "
+		  "Seq %#04x PHYTxStatus %#04x RxAck %#04x",
+		  __get_str(dev), __entry->frameid, __entry->status,
+		  __entry->lasttxtime, __entry->sequence, __entry->phyerr,
+		  __entry->ackphyrxsh)
+);
+
+TRACE_EVENT(brcms_ampdu_session,
+	TP_PROTO(const struct device *dev, unsigned max_ampdu_len,
+		 u16 max_ampdu_frames, u16 ampdu_len, u16 ampdu_frames,
+		 u16 dma_len),
+	TP_ARGS(dev, max_ampdu_len, max_ampdu_frames, ampdu_len, ampdu_frames,
+		dma_len),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(unsigned, max_ampdu_len)
+		__field(u16, max_ampdu_frames)
+		__field(u16, ampdu_len)
+		__field(u16, ampdu_frames)
+		__field(u16, dma_len)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev));
+		__entry->max_ampdu_len = max_ampdu_len;
+		__entry->max_ampdu_frames = max_ampdu_frames;
+		__entry->ampdu_len = ampdu_len;
+		__entry->ampdu_frames = ampdu_frames;
+		__entry->dma_len = dma_len;
+	),
+	TP_printk("[%s] ampdu session max_len=%u max_frames=%u len=%u frames=%u dma_len=%u",
+		  __get_str(dev), __entry->max_ampdu_len,
+		  __entry->max_ampdu_frames, __entry->ampdu_len,
+		  __entry->ampdu_frames, __entry->dma_len)
+);
+#endif /* __TRACE_BRCMSMAC_TX_H */
+
+#ifdef CONFIG_BRCM_TRACING
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE brcms_trace_brcmsmac_tx
+#include <trace/define_trace.h>
+
+#endif /* CONFIG_BRCM_TRACING */
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_events.h b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_events.h
index 871781e..cbf2f06 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_events.h
+++ b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_events.h
@@ -14,9 +14,8 @@
  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
-#if !defined(__TRACE_BRCMSMAC_H) || defined(TRACE_HEADER_MULTI_READ)
-
-#define __TRACE_BRCMSMAC_H
+#ifndef __BRCMS_TRACE_EVENTS_H
+#define __BRCMS_TRACE_EVENTS_H
 
 #include <linux/types.h>
 #include <linux/device.h>
@@ -34,222 +33,8 @@
 static inline void trace_ ## name(proto) {}
 #endif
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM brcmsmac
-
-/*
- * We define a tracepoint, its arguments, its printk format and its
- * 'fast binary record' layout.
- */
-TRACE_EVENT(brcms_timer,
-	/* TPPROTO is the prototype of the function called by this tracepoint */
-	TP_PROTO(struct brcms_timer *t),
-	/*
-	 * TPARGS(firstarg, p) are the parameters names, same as found in the
-	 * prototype.
-	 */
-	TP_ARGS(t),
-	/*
-	 * Fast binary tracing: define the trace record via TP_STRUCT__entry().
-	 * You can think about it like a regular C structure local variable
-	 * definition.
-	 */
-	TP_STRUCT__entry(
-		__field(uint, ms)
-		__field(uint, set)
-		__field(uint, periodic)
-	),
-	TP_fast_assign(
-		__entry->ms = t->ms;
-		__entry->set = t->set;
-		__entry->periodic = t->periodic;
-	),
-	TP_printk(
-		"ms=%u set=%u periodic=%u",
-		__entry->ms, __entry->set, __entry->periodic
-	)
-);
-
-TRACE_EVENT(brcms_dpc,
-	TP_PROTO(unsigned long data),
-	TP_ARGS(data),
-	TP_STRUCT__entry(
-		__field(unsigned long, data)
-	),
-	TP_fast_assign(
-		__entry->data = data;
-	),
-	TP_printk(
-		"data=%p",
-		(void *)__entry->data
-	)
-);
-
-TRACE_EVENT(brcms_macintstatus,
-	TP_PROTO(const struct device *dev, int in_isr, u32 macintstatus,
-		 u32 mask),
-	TP_ARGS(dev, in_isr, macintstatus, mask),
-	TP_STRUCT__entry(
-		__string(dev, dev_name(dev))
-		__field(int, in_isr)
-		__field(u32, macintstatus)
-		__field(u32, mask)
-	),
-	TP_fast_assign(
-		__assign_str(dev, dev_name(dev));
-		__entry->in_isr = in_isr;
-		__entry->macintstatus = macintstatus;
-		__entry->mask = mask;
-	),
-	TP_printk("[%s] in_isr=%d macintstatus=%#x mask=%#x", __get_str(dev),
-		  __entry->in_isr, __entry->macintstatus, __entry->mask)
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM brcmsmac_tx
-
-TRACE_EVENT(brcms_txdesc,
-	TP_PROTO(const struct device *dev,
-		 void *txh, size_t txh_len),
-	TP_ARGS(dev, txh, txh_len),
-	TP_STRUCT__entry(
-		__string(dev, dev_name(dev))
-		__dynamic_array(u8, txh, txh_len)
-	),
-	TP_fast_assign(
-		__assign_str(dev, dev_name(dev));
-		memcpy(__get_dynamic_array(txh), txh, txh_len);
-	),
-	TP_printk("[%s] txdesc", __get_str(dev))
-);
-
-TRACE_EVENT(brcms_txstatus,
-	TP_PROTO(const struct device *dev, u16 framelen, u16 frameid,
-		 u16 status, u16 lasttxtime, u16 sequence, u16 phyerr,
-		 u16 ackphyrxsh),
-	TP_ARGS(dev, framelen, frameid, status, lasttxtime, sequence, phyerr,
-		ackphyrxsh),
-	TP_STRUCT__entry(
-		__string(dev, dev_name(dev))
-		__field(u16, framelen)
-		__field(u16, frameid)
-		__field(u16, status)
-		__field(u16, lasttxtime)
-		__field(u16, sequence)
-		__field(u16, phyerr)
-		__field(u16, ackphyrxsh)
-	),
-	TP_fast_assign(
-		__assign_str(dev, dev_name(dev));
-		__entry->framelen = framelen;
-		__entry->frameid = frameid;
-		__entry->status = status;
-		__entry->lasttxtime = lasttxtime;
-		__entry->sequence = sequence;
-		__entry->phyerr = phyerr;
-		__entry->ackphyrxsh = ackphyrxsh;
-	),
-	TP_printk("[%s] FrameId %#04x TxStatus %#04x LastTxTime %#04x "
-		  "Seq %#04x PHYTxStatus %#04x RxAck %#04x",
-		  __get_str(dev), __entry->frameid, __entry->status,
-		  __entry->lasttxtime, __entry->sequence, __entry->phyerr,
-		  __entry->ackphyrxsh)
-);
-
-TRACE_EVENT(brcms_ampdu_session,
-	TP_PROTO(const struct device *dev, unsigned max_ampdu_len,
-		 u16 max_ampdu_frames, u16 ampdu_len, u16 ampdu_frames,
-		 u16 dma_len),
-	TP_ARGS(dev, max_ampdu_len, max_ampdu_frames, ampdu_len, ampdu_frames,
-		dma_len),
-	TP_STRUCT__entry(
-		__string(dev, dev_name(dev))
-		__field(unsigned, max_ampdu_len)
-		__field(u16, max_ampdu_frames)
-		__field(u16, ampdu_len)
-		__field(u16, ampdu_frames)
-		__field(u16, dma_len)
-	),
-	TP_fast_assign(
-		__assign_str(dev, dev_name(dev));
-		__entry->max_ampdu_len = max_ampdu_len;
-		__entry->max_ampdu_frames = max_ampdu_frames;
-		__entry->ampdu_len = ampdu_len;
-		__entry->ampdu_frames = ampdu_frames;
-		__entry->dma_len = dma_len;
-	),
-	TP_printk("[%s] ampdu session max_len=%u max_frames=%u len=%u frames=%u dma_len=%u",
-		  __get_str(dev), __entry->max_ampdu_len,
-		  __entry->max_ampdu_frames, __entry->ampdu_len,
-		  __entry->ampdu_frames, __entry->dma_len)
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM brcmsmac_msg
-
-#define MAX_MSG_LEN	100
-
-DECLARE_EVENT_CLASS(brcms_msg_event,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf),
-	TP_STRUCT__entry(
-		__dynamic_array(char, msg, MAX_MSG_LEN)
-	),
-	TP_fast_assign(
-		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
-				       MAX_MSG_LEN, vaf->fmt,
-				       *vaf->va) >= MAX_MSG_LEN);
-	),
-	TP_printk("%s", __get_str(msg))
-);
-
-DEFINE_EVENT(brcms_msg_event, brcms_info,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-
-DEFINE_EVENT(brcms_msg_event, brcms_warn,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-
-DEFINE_EVENT(brcms_msg_event, brcms_err,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-
-DEFINE_EVENT(brcms_msg_event, brcms_crit,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-
-TRACE_EVENT(brcms_dbg,
-	TP_PROTO(u32 level, const char *func, struct va_format *vaf),
-	TP_ARGS(level, func, vaf),
-	TP_STRUCT__entry(
-		__field(u32, level)
-		__string(func, func)
-		__dynamic_array(char, msg, MAX_MSG_LEN)
-	),
-	TP_fast_assign(
-		__entry->level = level;
-		__assign_str(func, func);
-		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
-				       MAX_MSG_LEN, vaf->fmt,
-				       *vaf->va) >= MAX_MSG_LEN);
-	),
-	TP_printk("%s: %s", __get_str(func), __get_str(msg))
-);
+#include "brcms_trace_brcmsmac.h"
+#include "brcms_trace_brcmsmac_tx.h"
+#include "brcms_trace_brcmsmac_msg.h"
 
 #endif /* __TRACE_BRCMSMAC_H */
-
-#ifdef CONFIG_BRCM_TRACING
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE brcms_trace_events
-
-#include <trace/define_trace.h>
-
-#endif /* CONFIG_BRCM_TRACING */
diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-data.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-data.h
new file mode 100644
index 0000000..04e6649
--- /dev/null
+++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-data.h
@@ -0,0 +1,79 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <ilw@linux.intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ *****************************************************************************/
+
+#if !defined(__IWLWIFI_DEVICE_TRACE_DATA) || defined(TRACE_HEADER_MULTI_READ)
+#define __IWLWIFI_DEVICE_TRACE_DATA
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM iwlwifi_data
+
+TRACE_EVENT(iwlwifi_dev_tx_data,
+	TP_PROTO(const struct device *dev,
+		 struct sk_buff *skb,
+		 void *data, size_t data_len),
+	TP_ARGS(dev, skb, data, data_len),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+
+		__dynamic_array(u8, data, iwl_trace_data(skb) ? data_len : 0)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		if (iwl_trace_data(skb))
+			memcpy(__get_dynamic_array(data), data, data_len);
+	),
+	TP_printk("[%s] TX frame data", __get_str(dev))
+);
+
+TRACE_EVENT(iwlwifi_dev_rx_data,
+	TP_PROTO(const struct device *dev,
+		 const struct iwl_trans *trans,
+		 void *rxbuf, size_t len),
+	TP_ARGS(dev, trans, rxbuf, len),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+
+		__dynamic_array(u8, data,
+				len - iwl_rx_trace_len(trans, rxbuf, len))
+	),
+	TP_fast_assign(
+		size_t offs = iwl_rx_trace_len(trans, rxbuf, len);
+		DEV_ASSIGN;
+		if (offs < len)
+			memcpy(__get_dynamic_array(data),
+			       ((u8 *)rxbuf) + offs, len - offs);
+	),
+	TP_printk("[%s] RX frame data", __get_str(dev))
+);
+#endif /* __IWLWIFI_DEVICE_TRACE_DATA */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE iwl-devtrace-data
+#include <trace/define_trace.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-io.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-io.h
new file mode 100644
index 0000000..f62c5448
--- /dev/null
+++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-io.h
@@ -0,0 +1,155 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <ilw@linux.intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ *****************************************************************************/
+
+#if !defined(__IWLWIFI_DEVICE_TRACE_IO) || defined(TRACE_HEADER_MULTI_READ)
+#define __IWLWIFI_DEVICE_TRACE_IO
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM iwlwifi_io
+
+TRACE_EVENT(iwlwifi_dev_ioread32,
+	TP_PROTO(const struct device *dev, u32 offs, u32 val),
+	TP_ARGS(dev, offs, val),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] read io[%#x] = %#x",
+		  __get_str(dev), __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(iwlwifi_dev_iowrite8,
+	TP_PROTO(const struct device *dev, u32 offs, u8 val),
+	TP_ARGS(dev, offs, val),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u32, offs)
+		__field(u8, val)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] write io[%#x] = %#x)",
+		  __get_str(dev), __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(iwlwifi_dev_iowrite32,
+	TP_PROTO(const struct device *dev, u32 offs, u32 val),
+	TP_ARGS(dev, offs, val),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] write io[%#x] = %#x)",
+		  __get_str(dev), __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(iwlwifi_dev_iowrite_prph32,
+	TP_PROTO(const struct device *dev, u32 offs, u32 val),
+	TP_ARGS(dev, offs, val),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] write PRPH[%#x] = %#x)",
+		  __get_str(dev), __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(iwlwifi_dev_ioread_prph32,
+	TP_PROTO(const struct device *dev, u32 offs, u32 val),
+	TP_ARGS(dev, offs, val),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] read PRPH[%#x] = %#x",
+		  __get_str(dev), __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(iwlwifi_dev_irq,
+	TP_PROTO(const struct device *dev),
+	TP_ARGS(dev),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+	),
+	/* TP_printk("") doesn't compile */
+	TP_printk("%d", 0)
+);
+
+TRACE_EVENT(iwlwifi_dev_ict_read,
+	TP_PROTO(const struct device *dev, u32 index, u32 value),
+	TP_ARGS(dev, index, value),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u32, index)
+		__field(u32, value)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->index = index;
+		__entry->value = value;
+	),
+	TP_printk("[%s] read ict[%d] = %#.8x",
+		  __get_str(dev), __entry->index, __entry->value)
+);
+#endif /* __IWLWIFI_DEVICE_TRACE_IO */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE iwl-devtrace-io
+#include <trace/define_trace.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-iwlwifi.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-iwlwifi.h
new file mode 100644
index 0000000..6cb66a9
--- /dev/null
+++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-iwlwifi.h
@@ -0,0 +1,200 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <ilw@linux.intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ *****************************************************************************/
+
+#if !defined(__IWLWIFI_DEVICE_TRACE_IWLWIFI) || defined(TRACE_HEADER_MULTI_READ)
+#define __IWLWIFI_DEVICE_TRACE_IWLWIFI
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM iwlwifi
+
+TRACE_EVENT(iwlwifi_dev_hcmd,
+	TP_PROTO(const struct device *dev,
+		 struct iwl_host_cmd *cmd, u16 total_size,
+		 struct iwl_cmd_header *hdr),
+	TP_ARGS(dev, cmd, total_size, hdr),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__dynamic_array(u8, hcmd, total_size)
+		__field(u32, flags)
+	),
+	TP_fast_assign(
+		int i, offset = sizeof(*hdr);
+
+		DEV_ASSIGN;
+		__entry->flags = cmd->flags;
+		memcpy(__get_dynamic_array(hcmd), hdr, sizeof(*hdr));
+
+		for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
+			if (!cmd->len[i])
+				continue;
+			memcpy((u8 *)__get_dynamic_array(hcmd) + offset,
+			       cmd->data[i], cmd->len[i]);
+			offset += cmd->len[i];
+		}
+	),
+	TP_printk("[%s] hcmd %#.2x (%ssync)",
+		  __get_str(dev), ((u8 *)__get_dynamic_array(hcmd))[0],
+		  __entry->flags & CMD_ASYNC ? "a" : "")
+);
+
+TRACE_EVENT(iwlwifi_dev_rx,
+	TP_PROTO(const struct device *dev, const struct iwl_trans *trans,
+		 void *rxbuf, size_t len),
+	TP_ARGS(dev, trans, rxbuf, len),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__dynamic_array(u8, rxbuf, iwl_rx_trace_len(trans, rxbuf, len))
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		memcpy(__get_dynamic_array(rxbuf), rxbuf,
+		       iwl_rx_trace_len(trans, rxbuf, len));
+	),
+	TP_printk("[%s] RX cmd %#.2x",
+		  __get_str(dev), ((u8 *)__get_dynamic_array(rxbuf))[4])
+);
+
+TRACE_EVENT(iwlwifi_dev_tx,
+	TP_PROTO(const struct device *dev, struct sk_buff *skb,
+		 void *tfd, size_t tfdlen,
+		 void *buf0, size_t buf0_len,
+		 void *buf1, size_t buf1_len),
+	TP_ARGS(dev, skb, tfd, tfdlen, buf0, buf0_len, buf1, buf1_len),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+
+		__field(size_t, framelen)
+		__dynamic_array(u8, tfd, tfdlen)
+
+		/*
+		 * Do not insert between or below these items,
+		 * we want to keep the frame together (except
+		 * for the possible padding).
+		 */
+		__dynamic_array(u8, buf0, buf0_len)
+		__dynamic_array(u8, buf1, iwl_trace_data(skb) ? 0 : buf1_len)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->framelen = buf0_len + buf1_len;
+		memcpy(__get_dynamic_array(tfd), tfd, tfdlen);
+		memcpy(__get_dynamic_array(buf0), buf0, buf0_len);
+		if (!iwl_trace_data(skb))
+			memcpy(__get_dynamic_array(buf1), buf1, buf1_len);
+	),
+	TP_printk("[%s] TX %.2x (%zu bytes)",
+		  __get_str(dev), ((u8 *)__get_dynamic_array(buf0))[0],
+		  __entry->framelen)
+);
+
+TRACE_EVENT(iwlwifi_dev_ucode_error,
+	TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low,
+		 u32 data1, u32 data2, u32 line, u32 blink1,
+		 u32 blink2, u32 ilink1, u32 ilink2, u32 bcon_time,
+		 u32 gp1, u32 gp2, u32 gp3, u32 ucode_ver, u32 hw_ver,
+		 u32 brd_ver),
+	TP_ARGS(dev, desc, tsf_low, data1, data2, line,
+		blink1, blink2, ilink1, ilink2, bcon_time, gp1, gp2,
+		gp3, ucode_ver, hw_ver, brd_ver),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u32, desc)
+		__field(u32, tsf_low)
+		__field(u32, data1)
+		__field(u32, data2)
+		__field(u32, line)
+		__field(u32, blink1)
+		__field(u32, blink2)
+		__field(u32, ilink1)
+		__field(u32, ilink2)
+		__field(u32, bcon_time)
+		__field(u32, gp1)
+		__field(u32, gp2)
+		__field(u32, gp3)
+		__field(u32, ucode_ver)
+		__field(u32, hw_ver)
+		__field(u32, brd_ver)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->desc = desc;
+		__entry->tsf_low = tsf_low;
+		__entry->data1 = data1;
+		__entry->data2 = data2;
+		__entry->line = line;
+		__entry->blink1 = blink1;
+		__entry->blink2 = blink2;
+		__entry->ilink1 = ilink1;
+		__entry->ilink2 = ilink2;
+		__entry->bcon_time = bcon_time;
+		__entry->gp1 = gp1;
+		__entry->gp2 = gp2;
+		__entry->gp3 = gp3;
+		__entry->ucode_ver = ucode_ver;
+		__entry->hw_ver = hw_ver;
+		__entry->brd_ver = brd_ver;
+	),
+	TP_printk("[%s] #%02d %010u data 0x%08X 0x%08X line %u, "
+		  "blink 0x%05X 0x%05X ilink 0x%05X 0x%05X "
+		  "bcon_tm %010u gp 0x%08X 0x%08X 0x%08X uCode 0x%08X "
+		  "hw 0x%08X brd 0x%08X",
+		  __get_str(dev), __entry->desc, __entry->tsf_low,
+		  __entry->data1,
+		  __entry->data2, __entry->line, __entry->blink1,
+		  __entry->blink2, __entry->ilink1, __entry->ilink2,
+		  __entry->bcon_time, __entry->gp1, __entry->gp2,
+		  __entry->gp3, __entry->ucode_ver, __entry->hw_ver,
+		  __entry->brd_ver)
+);
+
+TRACE_EVENT(iwlwifi_dev_ucode_event,
+	TP_PROTO(const struct device *dev, u32 time, u32 data, u32 ev),
+	TP_ARGS(dev, time, data, ev),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+
+		__field(u32, time)
+		__field(u32, data)
+		__field(u32, ev)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->time = time;
+		__entry->data = data;
+		__entry->ev = ev;
+	),
+	TP_printk("[%s] EVT_LOGT:%010u:0x%08x:%04u",
+		  __get_str(dev), __entry->time, __entry->data, __entry->ev)
+);
+#endif /* __IWLWIFI_DEVICE_TRACE_IWLWIFI */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE iwl-devtrace-iwlwifi
+#include <trace/define_trace.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-msg.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-msg.h
new file mode 100644
index 0000000..a3b3c24
--- /dev/null
+++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-msg.h
@@ -0,0 +1,97 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <ilw@linux.intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ *****************************************************************************/
+
+#if !defined(__IWLWIFI_DEVICE_TRACE_MSG) || defined(TRACE_HEADER_MULTI_READ)
+#define __IWLWIFI_DEVICE_TRACE_MSG
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM iwlwifi_msg
+
+#define MAX_MSG_LEN	110
+
+DECLARE_EVENT_CLASS(iwlwifi_msg_event,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf),
+	TP_STRUCT__entry(
+		__dynamic_array(char, msg, MAX_MSG_LEN)
+	),
+	TP_fast_assign(
+		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+				       MAX_MSG_LEN, vaf->fmt,
+				       *vaf->va) >= MAX_MSG_LEN);
+	),
+	TP_printk("%s", __get_str(msg))
+);
+
+DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_err,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+
+DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_warn,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+
+DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_info,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+
+DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_crit,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+
+TRACE_EVENT(iwlwifi_dbg,
+	TP_PROTO(u32 level, bool in_interrupt, const char *function,
+		 struct va_format *vaf),
+	TP_ARGS(level, in_interrupt, function, vaf),
+	TP_STRUCT__entry(
+		__field(u32, level)
+		__field(u8, in_interrupt)
+		__string(function, function)
+		__dynamic_array(char, msg, MAX_MSG_LEN)
+	),
+	TP_fast_assign(
+		__entry->level = level;
+		__entry->in_interrupt = in_interrupt;
+		__assign_str(function, function);
+		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+				       MAX_MSG_LEN, vaf->fmt,
+				       *vaf->va) >= MAX_MSG_LEN);
+	),
+	TP_printk("%s", __get_str(msg))
+);
+#endif /* __IWLWIFI_DEVICE_TRACE_MSG */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE iwl-devtrace-msg
+#include <trace/define_trace.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-ucode.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-ucode.h
new file mode 100644
index 0000000..10839fa
--- /dev/null
+++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-ucode.h
@@ -0,0 +1,81 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <ilw@linux.intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ *****************************************************************************/
+
+#if !defined(__IWLWIFI_DEVICE_TRACE_UCODE) || defined(TRACE_HEADER_MULTI_READ)
+#define __IWLWIFI_DEVICE_TRACE_UCODE
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM iwlwifi_ucode
+
+TRACE_EVENT(iwlwifi_dev_ucode_cont_event,
+	TP_PROTO(const struct device *dev, u32 time, u32 data, u32 ev),
+	TP_ARGS(dev, time, data, ev),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+
+		__field(u32, time)
+		__field(u32, data)
+		__field(u32, ev)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->time = time;
+		__entry->data = data;
+		__entry->ev = ev;
+	),
+	TP_printk("[%s] EVT_LOGT:%010u:0x%08x:%04u",
+		  __get_str(dev), __entry->time, __entry->data, __entry->ev)
+);
+
+TRACE_EVENT(iwlwifi_dev_ucode_wrap_event,
+	TP_PROTO(const struct device *dev, u32 wraps, u32 n_entry, u32 p_entry),
+	TP_ARGS(dev, wraps, n_entry, p_entry),
+	TP_STRUCT__entry(
+		DEV_ENTRY
+
+		__field(u32, wraps)
+		__field(u32, n_entry)
+		__field(u32, p_entry)
+	),
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->wraps = wraps;
+		__entry->n_entry = n_entry;
+		__entry->p_entry = p_entry;
+	),
+	TP_printk("[%s] wraps=#%02d n=0x%X p=0x%X",
+		  __get_str(dev), __entry->wraps, __entry->n_entry,
+		  __entry->p_entry)
+);
+#endif /* __IWLWIFI_DEVICE_TRACE_UCODE */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE iwl-devtrace-ucode
+#include <trace/define_trace.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace.h b/drivers/net/wireless/iwlwifi/iwl-devtrace.h
index 78bd41b..b87acd6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-devtrace.h
+++ b/drivers/net/wireless/iwlwifi/iwl-devtrace.h
@@ -24,7 +24,7 @@
  *
  *****************************************************************************/
 
-#if !defined(__IWLWIFI_DEVICE_TRACE) || defined(TRACE_HEADER_MULTI_READ)
+#ifndef __IWLWIFI_DEVICE_TRACE
 #include <linux/skbuff.h>
 #include <linux/ieee80211.h>
 #include <net/cfg80211.h>
@@ -80,436 +80,10 @@
 #define DEV_ENTRY	__string(dev, dev_name(dev))
 #define DEV_ASSIGN	__assign_str(dev, dev_name(dev))
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM iwlwifi_io
+#include "iwl-devtrace-io.h"
+#include "iwl-devtrace-ucode.h"
+#include "iwl-devtrace-msg.h"
+#include "iwl-devtrace-data.h"
+#include "iwl-devtrace-iwlwifi.h"
 
-TRACE_EVENT(iwlwifi_dev_ioread32,
-	TP_PROTO(const struct device *dev, u32 offs, u32 val),
-	TP_ARGS(dev, offs, val),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-		__field(u32, offs)
-		__field(u32, val)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->offs = offs;
-		__entry->val = val;
-	),
-	TP_printk("[%s] read io[%#x] = %#x",
-		  __get_str(dev), __entry->offs, __entry->val)
-);
-
-TRACE_EVENT(iwlwifi_dev_iowrite8,
-	TP_PROTO(const struct device *dev, u32 offs, u8 val),
-	TP_ARGS(dev, offs, val),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-		__field(u32, offs)
-		__field(u8, val)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->offs = offs;
-		__entry->val = val;
-	),
-	TP_printk("[%s] write io[%#x] = %#x)",
-		  __get_str(dev), __entry->offs, __entry->val)
-);
-
-TRACE_EVENT(iwlwifi_dev_iowrite32,
-	TP_PROTO(const struct device *dev, u32 offs, u32 val),
-	TP_ARGS(dev, offs, val),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-		__field(u32, offs)
-		__field(u32, val)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->offs = offs;
-		__entry->val = val;
-	),
-	TP_printk("[%s] write io[%#x] = %#x)",
-		  __get_str(dev), __entry->offs, __entry->val)
-);
-
-TRACE_EVENT(iwlwifi_dev_iowrite_prph32,
-	TP_PROTO(const struct device *dev, u32 offs, u32 val),
-	TP_ARGS(dev, offs, val),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-		__field(u32, offs)
-		__field(u32, val)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->offs = offs;
-		__entry->val = val;
-	),
-	TP_printk("[%s] write PRPH[%#x] = %#x)",
-		  __get_str(dev), __entry->offs, __entry->val)
-);
-
-TRACE_EVENT(iwlwifi_dev_ioread_prph32,
-	TP_PROTO(const struct device *dev, u32 offs, u32 val),
-	TP_ARGS(dev, offs, val),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-		__field(u32, offs)
-		__field(u32, val)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->offs = offs;
-		__entry->val = val;
-	),
-	TP_printk("[%s] read PRPH[%#x] = %#x",
-		  __get_str(dev), __entry->offs, __entry->val)
-);
-
-TRACE_EVENT(iwlwifi_dev_irq,
-	TP_PROTO(const struct device *dev),
-	TP_ARGS(dev),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-	),
-	/* TP_printk("") doesn't compile */
-	TP_printk("%d", 0)
-);
-
-TRACE_EVENT(iwlwifi_dev_ict_read,
-	TP_PROTO(const struct device *dev, u32 index, u32 value),
-	TP_ARGS(dev, index, value),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-		__field(u32, index)
-		__field(u32, value)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->index = index;
-		__entry->value = value;
-	),
-	TP_printk("[%s] read ict[%d] = %#.8x",
-		  __get_str(dev), __entry->index, __entry->value)
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM iwlwifi_ucode
-
-TRACE_EVENT(iwlwifi_dev_ucode_cont_event,
-	TP_PROTO(const struct device *dev, u32 time, u32 data, u32 ev),
-	TP_ARGS(dev, time, data, ev),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-
-		__field(u32, time)
-		__field(u32, data)
-		__field(u32, ev)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->time = time;
-		__entry->data = data;
-		__entry->ev = ev;
-	),
-	TP_printk("[%s] EVT_LOGT:%010u:0x%08x:%04u",
-		  __get_str(dev), __entry->time, __entry->data, __entry->ev)
-);
-
-TRACE_EVENT(iwlwifi_dev_ucode_wrap_event,
-	TP_PROTO(const struct device *dev, u32 wraps, u32 n_entry, u32 p_entry),
-	TP_ARGS(dev, wraps, n_entry, p_entry),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-
-		__field(u32, wraps)
-		__field(u32, n_entry)
-		__field(u32, p_entry)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->wraps = wraps;
-		__entry->n_entry = n_entry;
-		__entry->p_entry = p_entry;
-	),
-	TP_printk("[%s] wraps=#%02d n=0x%X p=0x%X",
-		  __get_str(dev), __entry->wraps, __entry->n_entry,
-		  __entry->p_entry)
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM iwlwifi_msg
-
-#define MAX_MSG_LEN	110
-
-DECLARE_EVENT_CLASS(iwlwifi_msg_event,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf),
-	TP_STRUCT__entry(
-		__dynamic_array(char, msg, MAX_MSG_LEN)
-	),
-	TP_fast_assign(
-		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
-				       MAX_MSG_LEN, vaf->fmt,
-				       *vaf->va) >= MAX_MSG_LEN);
-	),
-	TP_printk("%s", __get_str(msg))
-);
-
-DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_err,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-
-DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_warn,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-
-DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_info,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-
-DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_crit,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-
-TRACE_EVENT(iwlwifi_dbg,
-	TP_PROTO(u32 level, bool in_interrupt, const char *function,
-		 struct va_format *vaf),
-	TP_ARGS(level, in_interrupt, function, vaf),
-	TP_STRUCT__entry(
-		__field(u32, level)
-		__field(u8, in_interrupt)
-		__string(function, function)
-		__dynamic_array(char, msg, MAX_MSG_LEN)
-	),
-	TP_fast_assign(
-		__entry->level = level;
-		__entry->in_interrupt = in_interrupt;
-		__assign_str(function, function);
-		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
-				       MAX_MSG_LEN, vaf->fmt,
-				       *vaf->va) >= MAX_MSG_LEN);
-	),
-	TP_printk("%s", __get_str(msg))
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM iwlwifi_data
-
-TRACE_EVENT(iwlwifi_dev_tx_data,
-	TP_PROTO(const struct device *dev,
-		 struct sk_buff *skb,
-		 void *data, size_t data_len),
-	TP_ARGS(dev, skb, data, data_len),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-
-		__dynamic_array(u8, data, iwl_trace_data(skb) ? data_len : 0)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		if (iwl_trace_data(skb))
-			memcpy(__get_dynamic_array(data), data, data_len);
-	),
-	TP_printk("[%s] TX frame data", __get_str(dev))
-);
-
-TRACE_EVENT(iwlwifi_dev_rx_data,
-	TP_PROTO(const struct device *dev,
-		 const struct iwl_trans *trans,
-		 void *rxbuf, size_t len),
-	TP_ARGS(dev, trans, rxbuf, len),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-
-		__dynamic_array(u8, data,
-				len - iwl_rx_trace_len(trans, rxbuf, len))
-	),
-	TP_fast_assign(
-		size_t offs = iwl_rx_trace_len(trans, rxbuf, len);
-		DEV_ASSIGN;
-		if (offs < len)
-			memcpy(__get_dynamic_array(data),
-			       ((u8 *)rxbuf) + offs, len - offs);
-	),
-	TP_printk("[%s] RX frame data", __get_str(dev))
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM iwlwifi
-
-TRACE_EVENT(iwlwifi_dev_hcmd,
-	TP_PROTO(const struct device *dev,
-		 struct iwl_host_cmd *cmd, u16 total_size,
-		 struct iwl_cmd_header *hdr),
-	TP_ARGS(dev, cmd, total_size, hdr),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-		__dynamic_array(u8, hcmd, total_size)
-		__field(u32, flags)
-	),
-	TP_fast_assign(
-		int i, offset = sizeof(*hdr);
-
-		DEV_ASSIGN;
-		__entry->flags = cmd->flags;
-		memcpy(__get_dynamic_array(hcmd), hdr, sizeof(*hdr));
-
-		for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
-			if (!cmd->len[i])
-				continue;
-			memcpy((u8 *)__get_dynamic_array(hcmd) + offset,
-			       cmd->data[i], cmd->len[i]);
-			offset += cmd->len[i];
-		}
-	),
-	TP_printk("[%s] hcmd %#.2x (%ssync)",
-		  __get_str(dev), ((u8 *)__get_dynamic_array(hcmd))[0],
-		  __entry->flags & CMD_ASYNC ? "a" : "")
-);
-
-TRACE_EVENT(iwlwifi_dev_rx,
-	TP_PROTO(const struct device *dev, const struct iwl_trans *trans,
-		 void *rxbuf, size_t len),
-	TP_ARGS(dev, trans, rxbuf, len),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-		__dynamic_array(u8, rxbuf, iwl_rx_trace_len(trans, rxbuf, len))
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		memcpy(__get_dynamic_array(rxbuf), rxbuf,
-		       iwl_rx_trace_len(trans, rxbuf, len));
-	),
-	TP_printk("[%s] RX cmd %#.2x",
-		  __get_str(dev), ((u8 *)__get_dynamic_array(rxbuf))[4])
-);
-
-TRACE_EVENT(iwlwifi_dev_tx,
-	TP_PROTO(const struct device *dev, struct sk_buff *skb,
-		 void *tfd, size_t tfdlen,
-		 void *buf0, size_t buf0_len,
-		 void *buf1, size_t buf1_len),
-	TP_ARGS(dev, skb, tfd, tfdlen, buf0, buf0_len, buf1, buf1_len),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-
-		__field(size_t, framelen)
-		__dynamic_array(u8, tfd, tfdlen)
-
-		/*
-		 * Do not insert between or below these items,
-		 * we want to keep the frame together (except
-		 * for the possible padding).
-		 */
-		__dynamic_array(u8, buf0, buf0_len)
-		__dynamic_array(u8, buf1, iwl_trace_data(skb) ? 0 : buf1_len)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->framelen = buf0_len + buf1_len;
-		memcpy(__get_dynamic_array(tfd), tfd, tfdlen);
-		memcpy(__get_dynamic_array(buf0), buf0, buf0_len);
-		if (!iwl_trace_data(skb))
-			memcpy(__get_dynamic_array(buf1), buf1, buf1_len);
-	),
-	TP_printk("[%s] TX %.2x (%zu bytes)",
-		  __get_str(dev), ((u8 *)__get_dynamic_array(buf0))[0],
-		  __entry->framelen)
-);
-
-TRACE_EVENT(iwlwifi_dev_ucode_error,
-	TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low,
-		 u32 data1, u32 data2, u32 line, u32 blink1,
-		 u32 blink2, u32 ilink1, u32 ilink2, u32 bcon_time,
-		 u32 gp1, u32 gp2, u32 gp3, u32 ucode_ver, u32 hw_ver,
-		 u32 brd_ver),
-	TP_ARGS(dev, desc, tsf_low, data1, data2, line,
-		blink1, blink2, ilink1, ilink2, bcon_time, gp1, gp2,
-		gp3, ucode_ver, hw_ver, brd_ver),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-		__field(u32, desc)
-		__field(u32, tsf_low)
-		__field(u32, data1)
-		__field(u32, data2)
-		__field(u32, line)
-		__field(u32, blink1)
-		__field(u32, blink2)
-		__field(u32, ilink1)
-		__field(u32, ilink2)
-		__field(u32, bcon_time)
-		__field(u32, gp1)
-		__field(u32, gp2)
-		__field(u32, gp3)
-		__field(u32, ucode_ver)
-		__field(u32, hw_ver)
-		__field(u32, brd_ver)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->desc = desc;
-		__entry->tsf_low = tsf_low;
-		__entry->data1 = data1;
-		__entry->data2 = data2;
-		__entry->line = line;
-		__entry->blink1 = blink1;
-		__entry->blink2 = blink2;
-		__entry->ilink1 = ilink1;
-		__entry->ilink2 = ilink2;
-		__entry->bcon_time = bcon_time;
-		__entry->gp1 = gp1;
-		__entry->gp2 = gp2;
-		__entry->gp3 = gp3;
-		__entry->ucode_ver = ucode_ver;
-		__entry->hw_ver = hw_ver;
-		__entry->brd_ver = brd_ver;
-	),
-	TP_printk("[%s] #%02d %010u data 0x%08X 0x%08X line %u, "
-		  "blink 0x%05X 0x%05X ilink 0x%05X 0x%05X "
-		  "bcon_tm %010u gp 0x%08X 0x%08X 0x%08X uCode 0x%08X "
-		  "hw 0x%08X brd 0x%08X",
-		  __get_str(dev), __entry->desc, __entry->tsf_low,
-		  __entry->data1,
-		  __entry->data2, __entry->line, __entry->blink1,
-		  __entry->blink2, __entry->ilink1, __entry->ilink2,
-		  __entry->bcon_time, __entry->gp1, __entry->gp2,
-		  __entry->gp3, __entry->ucode_ver, __entry->hw_ver,
-		  __entry->brd_ver)
-);
-
-TRACE_EVENT(iwlwifi_dev_ucode_event,
-	TP_PROTO(const struct device *dev, u32 time, u32 data, u32 ev),
-	TP_ARGS(dev, time, data, ev),
-	TP_STRUCT__entry(
-		DEV_ENTRY
-
-		__field(u32, time)
-		__field(u32, data)
-		__field(u32, ev)
-	),
-	TP_fast_assign(
-		DEV_ASSIGN;
-		__entry->time = time;
-		__entry->data = data;
-		__entry->ev = ev;
-	),
-	TP_printk("[%s] EVT_LOGT:%010u:0x%08x:%04u",
-		  __get_str(dev), __entry->time, __entry->data, __entry->ev)
-);
 #endif /* __IWLWIFI_DEVICE_TRACE */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE iwl-devtrace
-#include <trace/define_trace.h>
diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/dm.c b/drivers/net/wireless/rtlwifi/rtl8723be/dm.c
index 2367e8f4..e77c3a4 100644
--- a/drivers/net/wireless/rtlwifi/rtl8723be/dm.c
+++ b/drivers/net/wireless/rtlwifi/rtl8723be/dm.c
@@ -309,7 +309,7 @@
 		rtl_dm_dig->min_undec_pwdb_for_dm =
 				rtlpriv->dm.entry_min_undec_sm_pwdb;
 		RT_TRACE(rtlpriv, COMP_BB_POWERSAVING, DBG_LOUD,
-			 "AP Ext Port or disconnet PWDB = 0x%x\n",
+			 "AP Ext Port or disconnect PWDB = 0x%x\n",
 			  rtl_dm_dig->min_undec_pwdb_for_dm);
 	}
 	RT_TRACE(rtlpriv, COMP_DIG, DBG_LOUD, "MinUndecoratedPWDBForDM =%d\n",
diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
index 0b2082d..342678d 100644
--- a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
+++ b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
@@ -873,7 +873,7 @@
 
 	if (rtlpriv->falsealm_cnt.cnt_all > 10000) {
 		RT_TRACE(rtlpriv, COMP_DIG, DBG_LOUD,
-			 "Abnornally false alarm case.\n");
+			 "Abnormally false alarm case.\n");
 
 		if (dm_digtable->large_fa_hit != 3)
 			dm_digtable->large_fa_hit++;
diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c
index caa1531..a656d9e 100644
--- a/drivers/parisc/eisa_enumerator.c
+++ b/drivers/parisc/eisa_enumerator.c
@@ -357,7 +357,7 @@
 		}
 		if (flags & HPEE_FUNCTION_INFO_CFG_FREE_FORM) {
 			/* I have no idea how to handle this */
-			printk("function %d have free-form confgiuration, skipping ",
+			printk("function %d have free-form configuration, skipping ",
 				num_func);
 			pos = p0 + function_len;
 			continue;
diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c
index 1806b24..23db4c9 100644
--- a/drivers/pinctrl/nomadik/pinctrl-abx500.c
+++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c
@@ -466,7 +466,7 @@
 		break;
 
 	default:
-		dev_dbg(pct->dev, "unknow alt_setting %d\n", alt_setting);
+		dev_dbg(pct->dev, "unknown alt_setting %d\n", alt_setting);
 
 		return -EINVAL;
 	}
diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c
index ae7c2ba..af4f85a 100644
--- a/drivers/rtc/rtc-hid-sensor-time.c
+++ b/drivers/rtc/rtc-hid-sensor-time.c
@@ -213,7 +213,7 @@
 	/* get a report with all values through requesting one value */
 	sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev,
 			HID_USAGE_SENSOR_TIME, hid_time_addresses[0],
-			time_state->info[0].report_id);
+			time_state->info[0].report_id, SENSOR_HUB_SYNC);
 	/* wait for all values (event) */
 	ret = wait_for_completion_killable_timeout(
 			&time_state->comp_last_time, HZ*6);
diff --git a/drivers/scsi/be2iscsi/be_cmds.c b/drivers/scsi/be2iscsi/be_cmds.c
index 80d97f3..1028760 100644
--- a/drivers/scsi/be2iscsi/be_cmds.c
+++ b/drivers/scsi/be2iscsi/be_cmds.c
@@ -237,7 +237,7 @@
 			beiscsi_log(phba, KERN_WARNING,
 				    BEISCSI_LOG_INIT | BEISCSI_LOG_EH |
 				    BEISCSI_LOG_CONFIG,
-				    "BC_%d : Insufficent Buffer Error "
+				    "BC_%d : Insufficient Buffer Error "
 				    "Resp_Len : %d Actual_Resp_Len : %d\n",
 				    mbx_resp_hdr->response_length,
 				    mbx_resp_hdr->actual_resp_len);
diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c
index 0045742..dad959f 100644
--- a/drivers/scsi/ch.c
+++ b/drivers/scsi/ch.c
@@ -339,7 +339,7 @@
 			ch->firsts[CHET_DT],
 			ch->counts[CHET_DT]);
 	} else {
-		VPRINTK(KERN_INFO, "reading element address assigment page failed!\n");
+		VPRINTK(KERN_INFO, "reading element address assignment page failed!\n");
 	}
 
 	/* vendor specific element types */
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 488c392..0cccd60 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -186,7 +186,7 @@
 
 		if (unlikely(len > sizeof(odi->systemid))) {
 			OSD_ERR("OSD Target error: OSD_SYSTEM_ID too long(%d). "
-				"device idetification might not work\n", len);
+				"device identification might not work\n", len);
 			len = sizeof(odi->systemid);
 		}
 		odi->systemid_len = len;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index e59f25b..5bb57c5 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -5364,7 +5364,7 @@
 	blob = qla2x00_request_firmware(vha);
 	if (!blob) {
 		ql_log(ql_log_info, vha, 0x0083,
-		    "Fimware image unavailable.\n");
+		    "Firmware image unavailable.\n");
 		ql_log(ql_log_info, vha, 0x0084,
 		    "Firmware images can be retrieved from: "QLA_FW_URL ".\n");
 		return QLA_FUNCTION_FAILED;
@@ -5467,7 +5467,7 @@
 	blob = qla2x00_request_firmware(vha);
 	if (!blob) {
 		ql_log(ql_log_warn, vha, 0x0090,
-		    "Fimware image unavailable.\n");
+		    "Firmware image unavailable.\n");
 		ql_log(ql_log_warn, vha, 0x0091,
 		    "Firmware images can be retrieved from: "
 		    QLA_FW_URL ".\n");
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 5c2e031..ca3804e 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -788,7 +788,7 @@
 		rsp->msix = &ha->msix_entries[que_id + 1];
 	else
 		ql_log(ql_log_warn, base_vha, 0x00e3,
-		    "MSIX not enalbled.\n");
+		    "MSIX not enabled.\n");
 
 	ha->rsp_q_map[que_id] = rsp;
 	rsp->rid = rid;
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 54cb2ac..7d2b18f 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -1274,7 +1274,7 @@
 
 		if (off == ADDR_ERROR) {
 			ql_log(ql_log_fatal, vha, 0x0116,
-			    "Unknow addr: 0x%08lx.\n", buf[i].addr);
+			    "Unknown addr: 0x%08lx.\n", buf[i].addr);
 			continue;
 		}
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index cce1cbc..5319b3c 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -446,11 +446,11 @@
 		}
 		ha->flags.cpu_affinity_enabled = 1;
 		ql_dbg(ql_dbg_multiq, vha, 0xc007,
-		    "CPU affinity mode enalbed, "
+		    "CPU affinity mode enabled, "
 		    "no. of response queues:%d no. of request queues:%d.\n",
 		    ha->max_rsp_queues, ha->max_req_queues);
 		ql_dbg(ql_dbg_init, vha, 0x00e9,
-		    "CPU affinity mode enalbed, "
+		    "CPU affinity mode enabled, "
 		    "no. of response queues:%d no. of request queues:%d.\n",
 		    ha->max_rsp_queues, ha->max_req_queues);
 	}
@@ -4102,7 +4102,7 @@
 		break;
 	default:
 		ql_log(ql_log_warn, base_vha, 0xb05f,
-		    "Unknow work-code=0x%x.\n", work_code);
+		    "Unknown work-code=0x%x.\n", work_code);
 	}
 
 	return;
@@ -4702,7 +4702,7 @@
 			break;
 		default:
 			ql_log(ql_log_warn, base_vha, 0xb071,
-			    "Unknow Device State: %x.\n", dev_state);
+			    "Unknown Device State: %x.\n", dev_state);
 			qla83xx_idc_unlock(base_vha, 0);
 			qla8xxx_dev_failed_handler(base_vha);
 			rval = QLA_FUNCTION_FAILED;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 2270bd5..9d7b7db 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -33,7 +33,6 @@
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/mm.h>
-#include <linux/aio.h>
 #include <linux/errno.h>
 #include <linux/mtio.h>
 #include <linux/ioctl.h>
@@ -51,6 +50,7 @@
 #include <linux/mutex.h>
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
+#include <linux/uio.h>
 
 #include "scsi.h"
 #include <scsi/scsi_dbg.h>
@@ -1745,17 +1745,14 @@
 	}
 
 	if (iov_count) {
-		int size = sizeof(struct iovec) * iov_count;
-		struct iovec *iov;
+		struct iovec *iov = NULL;
 		struct iov_iter i;
 
-		iov = memdup_user(hp->dxferp, size);
-		if (IS_ERR(iov))
-			return PTR_ERR(iov);
+		res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i);
+		if (res < 0)
+			return res;
 
-		iov_iter_init(&i, rw, iov, iov_count,
-			      min_t(size_t, hp->dxfer_len,
-				    iov_length(iov, iov_count)));
+		iov_iter_truncate(&i, hp->dxfer_len);
 
 		res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC);
 		kfree(iov);
diff --git a/drivers/staging/unisys/include/timskmod.h b/drivers/staging/unisys/include/timskmod.h
index 5a933d7..cde2494 100644
--- a/drivers/staging/unisys/include/timskmod.h
+++ b/drivers/staging/unisys/include/timskmod.h
@@ -46,7 +46,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c
index 967b2c2..0655fec 100644
--- a/drivers/tty/goldfish.c
+++ b/drivers/tty/goldfish.c
@@ -229,7 +229,6 @@
 {
 	struct goldfish_tty *qtty;
 	int ret = -EINVAL;
-	int i;
 	struct resource *r;
 	struct device *ttydev;
 	void __iomem *base;
@@ -293,7 +292,6 @@
 	mutex_unlock(&goldfish_tty_lock);
 	return 0;
 
-	tty_unregister_device(goldfish_tty_driver, i);
 err_tty_register_device_failed:
 	free_irq(irq, pdev);
 err_request_irq_failed:
diff --git a/drivers/tty/ipwireless/hardware.c b/drivers/tty/ipwireless/hardware.c
index 5c77e1e..ad7031a 100644
--- a/drivers/tty/ipwireless/hardware.c
+++ b/drivers/tty/ipwireless/hardware.c
@@ -1455,7 +1455,7 @@
 			return;
 		}
 
-		set_RTS(hw, PRIO_SETUP, channel_idx,
+		ret = set_RTS(hw, PRIO_SETUP, channel_idx,
 			(hw->control_lines [channel_idx] &
 			 IPW_CONTROL_LINE_RTS) != 0);
 		if (ret) {
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 175c995..a12315a78 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -23,6 +23,7 @@
 #include <linux/export.h>
 #include <linux/hid.h>
 #include <linux/module.h>
+#include <linux/uio.h>
 #include <asm/unaligned.h>
 
 #include <linux/usb/composite.h>
@@ -655,9 +656,10 @@
 		unuse_mm(io_data->mm);
 	}
 
-	aio_complete(io_data->kiocb, ret, ret);
+	io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);
 
-	if (io_data->ffs->ffs_eventfd && !io_data->kiocb->ki_eventfd)
+	if (io_data->ffs->ffs_eventfd &&
+	    !(io_data->kiocb->ki_flags & IOCB_EVENTFD))
 		eventfd_signal(io_data->ffs->ffs_eventfd, 1);
 
 	usb_ep_free_request(io_data->ep, io_data->req);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 200f9a5..662ef2c 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -26,6 +26,7 @@
 #include <linux/poll.h>
 #include <linux/mmu_context.h>
 #include <linux/aio.h>
+#include <linux/uio.h>
 
 #include <linux/device.h>
 #include <linux/moduleparam.h>
@@ -469,7 +470,7 @@
 		ret = -EFAULT;
 
 	/* completing the iocb can drop the ctx and mm, don't touch mm after */
-	aio_complete(iocb, ret, ret);
+	iocb->ki_complete(iocb, ret, ret);
 
 	kfree(priv->buf);
 	kfree(priv->to_free);
@@ -497,7 +498,8 @@
 		kfree(priv);
 		iocb->private = NULL;
 		/* aio_complete() reports bytes-transferred _and_ faults */
-		aio_complete(iocb, req->actual ? req->actual : req->status,
+
+		iocb->ki_complete(iocb, req->actual ? req->actual : req->status,
 				req->status);
 	} else {
 		/* ep_copy_to_user() won't report both; we hide some faults */
diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h
index dde3959..59c0565 100644
--- a/drivers/usb/host/xhci-trace.h
+++ b/drivers/usb/host/xhci-trace.h
@@ -14,6 +14,13 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM xhci-hcd
 
+/*
+ * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a
+ * legitimate C variable. It is not exported to user space.
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR xhci_hcd
+
 #if !defined(__XHCI_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __XHCI_TRACE_H
 
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index eb14e05..ff1a5ba 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,7 +33,7 @@
 #include <linux/pagemap.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
diff --git a/fs/Makefile b/fs/Makefile
index a88ac48..cb92fd4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -118,6 +118,7 @@
 obj-$(CONFIG_HPPFS)		+= hppfs/
 obj-$(CONFIG_CACHEFILES)	+= cachefiles/
 obj-$(CONFIG_DEBUG_FS)		+= debugfs/
+obj-$(CONFIG_TRACING)		+= tracefs/
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
 obj-$(CONFIG_BTRFS_FS)		+= btrfs/
 obj-$(CONFIG_GFS2_FS)           += gfs2/
diff --git a/fs/affs/file.c b/fs/affs/file.c
index a91795e..3aa7eb6 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -12,7 +12,7 @@
  *  affs regular file handling primitives
  */
 
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "affs.h"
 
 static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c13cb08..0714abc 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,7 +14,6 @@
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
-#include <linux/aio.h>
 #include "internal.h"
 
 static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/aio.c b/fs/aio.c
index a793f70..1ab6001 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -151,6 +151,38 @@
 	unsigned		id;
 };
 
+/*
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
+ * cancelled or completed (this makes a certain amount of sense because
+ * successful cancellation - io_cancel() - does deliver the completion to
+ * userspace).
+ *
+ * And since most things don't implement kiocb cancellation and we'd really like
+ * kiocb completion to be lockless when possible, we use ki_cancel to
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
+ */
+#define KIOCB_CANCELLED		((void *) (~0ULL))
+
+struct aio_kiocb {
+	struct kiocb		common;
+
+	struct kioctx		*ki_ctx;
+	kiocb_cancel_fn		*ki_cancel;
+
+	struct iocb __user	*ki_user_iocb;	/* user's aiocb */
+	__u64			ki_user_data;	/* user's data for completion */
+
+	struct list_head	ki_list;	/* the aio core uses this
+						 * for cancellation */
+
+	/*
+	 * If the aio_resfd field of the userspace iocb is not zero,
+	 * this is the underlying eventfd context to deliver events to.
+	 */
+	struct eventfd_ctx	*ki_eventfd;
+};
+
 /*------ sysctl variables----*/
 static DEFINE_SPINLOCK(aio_nr_lock);
 unsigned long aio_nr;		/* current system wide number of aio requests */
@@ -220,7 +252,7 @@
 	if (IS_ERR(aio_mnt))
 		panic("Failed to create aio fs mount.");
 
-	kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 
 	pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -484,8 +516,9 @@
 #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
 
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 {
+	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
 	struct kioctx *ctx = req->ki_ctx;
 	unsigned long flags;
 
@@ -500,7 +533,7 @@
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct kiocb *kiocb)
+static int kiocb_cancel(struct aio_kiocb *kiocb)
 {
 	kiocb_cancel_fn *old, *cancel;
 
@@ -518,7 +551,7 @@
 		cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
 	} while (cancel != old);
 
-	return cancel(kiocb);
+	return cancel(&kiocb->common);
 }
 
 static void free_ioctx(struct work_struct *work)
@@ -554,13 +587,13 @@
 static void free_ioctx_users(struct percpu_ref *ref)
 {
 	struct kioctx *ctx = container_of(ref, struct kioctx, users);
-	struct kiocb *req;
+	struct aio_kiocb *req;
 
 	spin_lock_irq(&ctx->ctx_lock);
 
 	while (!list_empty(&ctx->active_reqs)) {
 		req = list_first_entry(&ctx->active_reqs,
-				       struct kiocb, ki_list);
+				       struct aio_kiocb, ki_list);
 
 		list_del_init(&req->ki_list);
 		kiocb_cancel(req);
@@ -786,22 +819,6 @@
 	return 0;
 }
 
-/* wait_on_sync_kiocb:
- *	Waits on the given sync kiocb to complete.
- */
-ssize_t wait_on_sync_kiocb(struct kiocb *req)
-{
-	while (!req->ki_ctx) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (req->ki_ctx)
-			break;
-		io_schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-	return req->ki_user_data;
-}
-EXPORT_SYMBOL(wait_on_sync_kiocb);
-
 /*
  * exit_aio: called when the last user of mm goes away.  At this point, there is
  * no way for any new requests to be submited or any of the io_* syscalls to be
@@ -956,9 +973,9 @@
  *	Allocate a slot for an aio request.
  * Returns NULL if no requests are free.
  */
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 {
-	struct kiocb *req;
+	struct aio_kiocb *req;
 
 	if (!get_reqs_available(ctx)) {
 		user_refill_reqs_available(ctx);
@@ -979,10 +996,10 @@
 	return NULL;
 }
 
-static void kiocb_free(struct kiocb *req)
+static void kiocb_free(struct aio_kiocb *req)
 {
-	if (req->ki_filp)
-		fput(req->ki_filp);
+	if (req->common.ki_filp)
+		fput(req->common.ki_filp);
 	if (req->ki_eventfd != NULL)
 		eventfd_ctx_put(req->ki_eventfd);
 	kmem_cache_free(kiocb_cachep, req);
@@ -1018,8 +1035,9 @@
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
-void aio_complete(struct kiocb *iocb, long res, long res2)
+static void aio_complete(struct kiocb *kiocb, long res, long res2)
 {
+	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
@@ -1033,13 +1051,7 @@
 	 *    ref, no other paths have a way to get another ref
 	 *  - the sync task helpfully left a reference to itself in the iocb
 	 */
-	if (is_sync_kiocb(iocb)) {
-		iocb->ki_user_data = res;
-		smp_wmb();
-		iocb->ki_ctx = ERR_PTR(-EXDEV);
-		wake_up_process(iocb->ki_obj.tsk);
-		return;
-	}
+	BUG_ON(is_sync_kiocb(kiocb));
 
 	if (iocb->ki_list.next) {
 		unsigned long flags;
@@ -1065,7 +1077,7 @@
 	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
 
-	event->obj = (u64)(unsigned long)iocb->ki_obj.user;
+	event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
 	event->data = iocb->ki_user_data;
 	event->res = res;
 	event->res2 = res2;
@@ -1074,7 +1086,7 @@
 	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 
 	pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
-		 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
+		 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
 		 res, res2);
 
 	/* after flagging the request as done, we
@@ -1121,7 +1133,6 @@
 
 	percpu_ref_put(&ctx->reqs);
 }
-EXPORT_SYMBOL(aio_complete);
 
 /* aio_read_events_ring
  *	Pull an event off of the ioctx's event ring.  Returns the number of
@@ -1349,46 +1360,19 @@
 			    unsigned long, loff_t);
 typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
 
-static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
-				     int rw, char __user *buf,
-				     unsigned long *nr_segs,
-				     struct iovec **iovec,
-				     bool compat)
+static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
+				 struct iovec **iovec,
+				 bool compat,
+				 struct iov_iter *iter)
 {
-	ssize_t ret;
-
-	*nr_segs = kiocb->ki_nbytes;
-
 #ifdef CONFIG_COMPAT
 	if (compat)
-		ret = compat_rw_copy_check_uvector(rw,
+		return compat_import_iovec(rw,
 				(struct compat_iovec __user *)buf,
-				*nr_segs, UIO_FASTIOV, *iovec, iovec);
-	else
+				len, UIO_FASTIOV, iovec, iter);
 #endif
-		ret = rw_copy_check_uvector(rw,
-				(struct iovec __user *)buf,
-				*nr_segs, UIO_FASTIOV, *iovec, iovec);
-	if (ret < 0)
-		return ret;
-
-	/* ki_nbytes now reflect bytes instead of segs */
-	kiocb->ki_nbytes = ret;
-	return 0;
-}
-
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
-				       int rw, char __user *buf,
-				       unsigned long *nr_segs,
-				       struct iovec *iovec)
-{
-	if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
-		return -EFAULT;
-
-	iovec->iov_base = buf;
-	iovec->iov_len = kiocb->ki_nbytes;
-	*nr_segs = 1;
-	return 0;
+	return import_iovec(rw, (struct iovec __user *)buf,
+				len, UIO_FASTIOV, iovec, iter);
 }
 
 /*
@@ -1396,11 +1380,10 @@
  *	Performs the initial checks and io submission.
  */
 static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
-			    char __user *buf, bool compat)
+			    char __user *buf, size_t len, bool compat)
 {
 	struct file *file = req->ki_filp;
 	ssize_t ret;
-	unsigned long nr_segs;
 	int rw;
 	fmode_t mode;
 	aio_rw_op *rw_op;
@@ -1431,21 +1414,22 @@
 		if (!rw_op && !iter_op)
 			return -EINVAL;
 
-		ret = (opcode == IOCB_CMD_PREADV ||
-		       opcode == IOCB_CMD_PWRITEV)
-			? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
-						&iovec, compat)
-			: aio_setup_single_vector(req, rw, buf, &nr_segs,
-						  iovec);
+		if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
+			ret = aio_setup_vectored_rw(rw, buf, len,
+						&iovec, compat, &iter);
+		else {
+			ret = import_single_range(rw, buf, len, iovec, &iter);
+			iovec = NULL;
+		}
 		if (!ret)
-			ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+			ret = rw_verify_area(rw, file, &req->ki_pos,
+					     iov_iter_count(&iter));
 		if (ret < 0) {
-			if (iovec != inline_vecs)
-				kfree(iovec);
+			kfree(iovec);
 			return ret;
 		}
 
-		req->ki_nbytes = ret;
+		len = ret;
 
 		/* XXX: move/kill - rw_verify_area()? */
 		/* This matches the pread()/pwrite() logic */
@@ -1458,14 +1442,14 @@
 			file_start_write(file);
 
 		if (iter_op) {
-			iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
 			ret = iter_op(req, &iter);
 		} else {
-			ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+			ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos);
 		}
 
 		if (rw == WRITE)
 			file_end_write(file);
+		kfree(iovec);
 		break;
 
 	case IOCB_CMD_FDSYNC:
@@ -1487,9 +1471,6 @@
 		return -EINVAL;
 	}
 
-	if (iovec != inline_vecs)
-		kfree(iovec);
-
 	if (ret != -EIOCBQUEUED) {
 		/*
 		 * There's no easy way to restart the syscall since other AIO's
@@ -1508,7 +1489,7 @@
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			 struct iocb *iocb, bool compat)
 {
-	struct kiocb *req;
+	struct aio_kiocb *req;
 	ssize_t ret;
 
 	/* enforce forwards compatibility on users */
@@ -1531,11 +1512,14 @@
 	if (unlikely(!req))
 		return -EAGAIN;
 
-	req->ki_filp = fget(iocb->aio_fildes);
-	if (unlikely(!req->ki_filp)) {
+	req->common.ki_filp = fget(iocb->aio_fildes);
+	if (unlikely(!req->common.ki_filp)) {
 		ret = -EBADF;
 		goto out_put_req;
 	}
+	req->common.ki_pos = iocb->aio_offset;
+	req->common.ki_complete = aio_complete;
+	req->common.ki_flags = 0;
 
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
 		/*
@@ -1550,6 +1534,8 @@
 			req->ki_eventfd = NULL;
 			goto out_put_req;
 		}
+
+		req->common.ki_flags |= IOCB_EVENTFD;
 	}
 
 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1558,13 +1544,12 @@
 		goto out_put_req;
 	}
 
-	req->ki_obj.user = user_iocb;
+	req->ki_user_iocb = user_iocb;
 	req->ki_user_data = iocb->aio_data;
-	req->ki_pos = iocb->aio_offset;
-	req->ki_nbytes = iocb->aio_nbytes;
 
-	ret = aio_run_iocb(req, iocb->aio_lio_opcode,
+	ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
 			   (char __user *)(unsigned long)iocb->aio_buf,
+			   iocb->aio_nbytes,
 			   compat);
 	if (ret)
 		goto out_put_req;
@@ -1651,10 +1636,10 @@
 /* lookup_kiocb
  *	Finds a given iocb for cancellation.
  */
-static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
-				  u32 key)
+static struct aio_kiocb *
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
 {
-	struct list_head *pos;
+	struct aio_kiocb *kiocb;
 
 	assert_spin_locked(&ctx->ctx_lock);
 
@@ -1662,9 +1647,8 @@
 		return NULL;
 
 	/* TODO: use a hash or array, this sucks. */
-	list_for_each(pos, &ctx->active_reqs) {
-		struct kiocb *kiocb = list_kiocb(pos);
-		if (kiocb->ki_obj.user == iocb)
+	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+		if (kiocb->ki_user_iocb == iocb)
 			return kiocb;
 	}
 	return NULL;
@@ -1684,7 +1668,7 @@
 		struct io_event __user *, result)
 {
 	struct kioctx *ctx;
-	struct kiocb *kiocb;
+	struct aio_kiocb *kiocb;
 	u32 key;
 	int ret;
 
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 90bc079..fdcb4d6 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
 #include <linux/writeback.h>
+#include <linux/uio.h>
 #include <asm/uaccess.h>
 #include "bfs.h"
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 975266b..2e522ae 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,7 +27,6 @@
 #include <linux/namei.h>
 #include <linux/log2.h>
 #include <linux/cleancache.h>
-#include <linux/aio.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 30982bb..aee18f8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,7 +24,6 @@
 #include <linux/string.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
@@ -32,6 +31,7 @@
 #include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/btrfs.h>
+#include <linux/uio.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2e732d..686331f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,7 +32,6 @@
 #include <linux/writeback.h>
 #include <linux/statfs.h>
 #include <linux/compat.h>
-#include <linux/aio.h>
 #include <linux/bit_spinlock.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
@@ -43,6 +42,7 @@
 #include <linux/btrfs.h>
 #include <linux/blkdev.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/uio.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d533075..139f2fe 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,7 +7,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
 
 #include "super.h"
@@ -808,7 +807,7 @@
 {
 	struct file *filp = iocb->ki_filp;
 	struct ceph_file_info *fi = filp->private_data;
-	size_t len = iocb->ki_nbytes;
+	size_t len = iov_iter_count(to);
 	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct page *pinned_page = NULL;
diff --git a/fs/dcache.c b/fs/dcache.c
index c71e373..d99736a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2690,7 +2690,7 @@
 		struct dentry *dentry, struct dentry *alias)
 {
 	struct mutex *m1 = NULL, *m2 = NULL;
-	int ret = -EBUSY;
+	int ret = -ESTALE;
 
 	/* If alias and dentry share a parent, then no extra locks required */
 	if (alias->d_parent == dentry->d_parent)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e181b6b..6fb00e3 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
 #include <linux/uio.h>
 #include <linux/atomic.h>
 #include <linux/prefetch.h>
-#include <linux/aio.h>
 
 /*
  * How many user pages to map in one call to get_user_pages().  This determines
@@ -265,7 +264,7 @@
 				ret = err;
 		}
 
-		aio_complete(dio->iocb, ret, 0);
+		dio->iocb->ki_complete(dio->iocb, ret, 0);
 	}
 
 	kmem_cache_free(dio_cache, dio);
@@ -1056,7 +1055,7 @@
 	 * operation.  AIO can if it was a broken operation described above or
 	 * in fact if all the bios race to complete before we get here.  In
 	 * that case dio_complete() translates the EIOCBQUEUED into the proper
-	 * return code that the caller will hand to aio_complete().
+	 * return code that the caller will hand to ->complete().
 	 *
 	 * This is managed by the bio_lock instead of being an atomic_t so that
 	 * completion paths can drop their ref and use the remaining count to
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index fd39bad..7967508 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,7 +31,6 @@
 #include <linux/security.h>
 #include <linux/compat.h>
 #include <linux/fs_stack.h>
-#include <linux/aio.h>
 #include "ecryptfs_kernel.h"
 
 /**
@@ -52,12 +51,6 @@
 	struct file *file = iocb->ki_filp;
 
 	rc = generic_file_read_iter(iocb, to);
-	/*
-	 * Even though this is a async interface, we need to wait
-	 * for IO to finish to update atime
-	 */
-	if (-EIOCBQUEUED == rc)
-		rc = wait_on_sync_kiocb(iocb);
 	if (rc >= 0) {
 		path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
 		touch_atime(path);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 6434bc0..df9d6af 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,7 +31,7 @@
 #include <linux/mpage.h>
 #include <linux/fiemap.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "ext2.h"
 #include "acl.h"
 #include "xattr.h"
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc4..db07ffb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,7 +27,7 @@
 #include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "ext3.h"
 #include "xattr.h"
 #include "acl.h"
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 33a09da..598abbb 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,9 +23,9 @@
 #include <linux/jbd2.h>
 #include <linux/mount.h>
 #include <linux/path.h>
-#include <linux/aio.h>
 #include <linux/quotaops.h>
 #include <linux/pagevec.h>
+#include <linux/uio.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 45fe924..740c787 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,9 +20,9 @@
  *	(sct@redhat.com), 1993, 1998
  */
 
-#include <linux/aio.h>
 #include "ext4_jbd2.h"
 #include "truncate.h"
+#include <linux/uio.h>
 
 #include <trace/events/ext4.h>
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cb9a21..a3f4513 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,7 +37,6 @@
 #include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
-#include <linux/aio.h>
 #include <linux/bitops.h>
 
 #include "ext4_jbd2.h"
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b24a254..4649842 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -18,7 +18,6 @@
 #include <linux/pagevec.h>
 #include <linux/mpage.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
 #include <linux/workqueue.h>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 985ed02..497f851 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,12 +12,12 @@
 #include <linux/f2fs_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/prefetch.h>
+#include <linux/uio.h>
 
 #include "f2fs.h"
 #include "node.h"
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 497c7c5..8521207 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -19,7 +19,6 @@
 #include <linux/mpage.h>
 #include <linux/buffer_head.h>
 #include <linux/mount.h>
-#include <linux/aio.h>
 #include <linux/vfs.h>
 #include <linux/parser.h>
 #include <linux/uio.h>
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 28d0c7ab..b3fa050 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,7 +38,6 @@
 #include <linux/device.h>
 #include <linux/file.h>
 #include <linux/fs.h>
-#include <linux/aio.h>
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
@@ -48,6 +47,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/module.h>
+#include <linux/uio.h>
 
 #include "fuse_i.h"
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 39706c5..95a2797 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,6 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/swap.h>
 #include <linux/splice.h>
-#include <linux/aio.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c01ec3b..ff102cb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,8 +15,8 @@
 #include <linux/module.h>
 #include <linux/compat.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
+#include <linux/uio.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
@@ -528,6 +528,17 @@
 	}
 }
 
+static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
+{
+	if (io->err)
+		return io->err;
+
+	if (io->bytes >= 0 && io->write)
+		return -EIO;
+
+	return io->bytes < 0 ? io->size : io->bytes;
+}
+
 /**
  * In case of short read, the caller sets 'pos' to the position of
  * actual end of fuse request in IO request. Otherwise, if bytes_requested
@@ -546,6 +557,7 @@
  */
 static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 {
+	bool is_sync = is_sync_kiocb(io->iocb);
 	int left;
 
 	spin_lock(&io->lock);
@@ -555,30 +567,24 @@
 		io->bytes = pos;
 
 	left = --io->reqs;
+	if (!left && is_sync)
+		complete(io->done);
 	spin_unlock(&io->lock);
 
-	if (!left) {
-		long res;
+	if (!left && !is_sync) {
+		ssize_t res = fuse_get_res_by_io(io);
 
-		if (io->err)
-			res = io->err;
-		else if (io->bytes >= 0 && io->write)
-			res = -EIO;
-		else {
-			res = io->bytes < 0 ? io->size : io->bytes;
+		if (res >= 0) {
+			struct inode *inode = file_inode(io->iocb->ki_filp);
+			struct fuse_conn *fc = get_fuse_conn(inode);
+			struct fuse_inode *fi = get_fuse_inode(inode);
 
-			if (!is_sync_kiocb(io->iocb)) {
-				struct inode *inode = file_inode(io->iocb->ki_filp);
-				struct fuse_conn *fc = get_fuse_conn(inode);
-				struct fuse_inode *fi = get_fuse_inode(inode);
-
-				spin_lock(&fc->lock);
-				fi->attr_version = ++fc->attr_version;
-				spin_unlock(&fc->lock);
-			}
+			spin_lock(&fc->lock);
+			fi->attr_version = ++fc->attr_version;
+			spin_unlock(&fc->lock);
 		}
 
-		aio_complete(io->iocb, res, 0);
+		io->iocb->ki_complete(io->iocb, res, 0);
 		kfree(io);
 	}
 }
@@ -2801,6 +2807,7 @@
 fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 			loff_t offset)
 {
+	DECLARE_COMPLETION_ONSTACK(wait);
 	ssize_t ret = 0;
 	struct file *file = iocb->ki_filp;
 	struct fuse_file *ff = file->private_data;
@@ -2852,6 +2859,9 @@
 	if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
 		io->async = false;
 
+	if (io->async && is_sync_kiocb(iocb))
+		io->done = &wait;
+
 	if (rw == WRITE)
 		ret = __fuse_direct_write(io, iter, &pos);
 	else
@@ -2864,11 +2874,12 @@
 		if (!is_sync_kiocb(iocb))
 			return -EIOCBQUEUED;
 
-		ret = wait_on_sync_kiocb(iocb);
-	} else {
-		kfree(io);
+		wait_for_completion(&wait);
+		ret = fuse_get_res_by_io(io);
 	}
 
+	kfree(io);
+
 	if (rw == WRITE) {
 		if (ret > 0)
 			fuse_write_update_size(inode, pos);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1cdfb07..7354dc1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -263,6 +263,7 @@
 	int err;
 	struct kiocb *iocb;
 	struct file *file;
+	struct completion *done;
 };
 
 /**
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7b31430..1be3b06 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -110,11 +110,7 @@
 	error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
 	if (error)
 		goto out;
-
-	if (acl)
-		set_cached_acl(inode, type, acl);
-	else
-		forget_cached_acl(inode, type);
+	set_cached_acl(inode, type, acl);
 out:
 	kfree(data);
 	return error;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4ad4f94..a6e6990 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,7 +20,7 @@
 #include <linux/swap.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/backing-dev.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <trace/events/writeback.h>
 
 #include "gfs2.h"
@@ -671,12 +671,12 @@
 
 	if (alloc_required) {
 		struct gfs2_alloc_parms ap = { .aflags = 0, };
-		error = gfs2_quota_lock_check(ip);
+		requested = data_blocks + ind_blocks;
+		ap.target = requested;
+		error = gfs2_quota_lock_check(ip, &ap);
 		if (error)
 			goto out_unlock;
 
-		requested = data_blocks + ind_blocks;
-		ap.target = requested;
 		error = gfs2_inplace_reserve(ip, &ap);
 		if (error)
 			goto out_qunlock;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index f0b945a..61296ec 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1224,7 +1224,7 @@
 
 	if (gfs2_is_stuffed(ip) &&
 	    (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
-		error = gfs2_quota_lock_check(ip);
+		error = gfs2_quota_lock_check(ip, &ap);
 		if (error)
 			return error;
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3e32bb8..8ec43ab 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -25,7 +25,6 @@
 #include <asm/uaccess.h>
 #include <linux/dlm.h>
 #include <linux/dlm_plock.h>
-#include <linux/aio.h>
 #include <linux/delay.h>
 
 #include "gfs2.h"
@@ -429,11 +428,11 @@
 	if (ret)
 		goto out_unlock;
 
-	ret = gfs2_quota_lock_check(ip);
-	if (ret)
-		goto out_unlock;
 	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
 	ap.target = data_blocks + ind_blocks;
+	ret = gfs2_quota_lock_check(ip, &ap);
+	if (ret)
+		goto out_unlock;
 	ret = gfs2_inplace_reserve(ip, &ap);
 	if (ret)
 		goto out_quota_unlock;
@@ -765,22 +764,30 @@
 	brelse(dibh);
 	return error;
 }
-
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
-			    unsigned int *data_blocks, unsigned int *ind_blocks)
+/**
+ * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
+ *                     blocks, determine how many bytes can be written.
+ * @ip:          The inode in question.
+ * @len:         Max cap of bytes. What we return in *len must be <= this.
+ * @data_blocks: Compute and return the number of data blocks needed
+ * @ind_blocks:  Compute and return the number of indirect blocks needed
+ * @max_blocks:  The total blocks available to work with.
+ *
+ * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
+ */
+static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
+			    unsigned int *data_blocks, unsigned int *ind_blocks,
+			    unsigned int max_blocks)
 {
+	loff_t max = *len;
 	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	unsigned int max_blocks = ip->i_rgd->rd_free_clone;
 	unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
 
 	for (tmp = max_data; tmp > sdp->sd_diptrs;) {
 		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
 		max_data -= tmp;
 	}
-	/* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
-	   so it might end up with fewer data blocks */
-	if (max_data <= *data_blocks)
-		return;
+
 	*data_blocks = max_data;
 	*ind_blocks = max_blocks - max_data;
 	*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
@@ -797,7 +804,7 @@
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_alloc_parms ap = { .aflags = 0, };
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-	loff_t bytes, max_bytes;
+	loff_t bytes, max_bytes, max_blks = UINT_MAX;
 	int error;
 	const loff_t pos = offset;
 	const loff_t count = len;
@@ -819,6 +826,9 @@
 
 	gfs2_size_hint(file, offset, len);
 
+	gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
+	ap.min_target = data_blocks + ind_blocks;
+
 	while (len > 0) {
 		if (len < bytes)
 			bytes = len;
@@ -827,27 +837,41 @@
 			offset += bytes;
 			continue;
 		}
-		error = gfs2_quota_lock_check(ip);
+
+		/* We need to determine how many bytes we can actually
+		 * fallocate without exceeding quota or going over the
+		 * end of the fs. We start off optimistically by assuming
+		 * we can write max_bytes */
+		max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
+
+		/* Since max_bytes is most likely a theoretical max, we
+		 * calculate a more realistic 'bytes' to serve as a good
+		 * starting point for the number of bytes we may be able
+		 * to write */
+		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+		ap.target = data_blocks + ind_blocks;
+
+		error = gfs2_quota_lock_check(ip, &ap);
 		if (error)
 			return error;
-retry:
-		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+		/* ap.allowed tells us how many blocks quota will allow
+		 * us to write. Check if this reduces max_blks */
+		if (ap.allowed && ap.allowed < max_blks)
+			max_blks = ap.allowed;
 
-		ap.target = data_blocks + ind_blocks;
 		error = gfs2_inplace_reserve(ip, &ap);
-		if (error) {
-			if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
-				bytes >>= 1;
-				bytes &= bsize_mask;
-				if (bytes == 0)
-					bytes = sdp->sd_sb.sb_bsize;
-				goto retry;
-			}
+		if (error)
 			goto out_qunlock;
-		}
-		max_bytes = bytes;
-		calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
-				&max_bytes, &data_blocks, &ind_blocks);
+
+		/* check if the selected rgrp limits our max_blks further */
+		if (ap.allowed && ap.allowed < max_blks)
+			max_blks = ap.allowed;
+
+		/* Almost done. Calculate bytes that can be written using
+		 * max_blks. We also recompute max_bytes, data_blocks and
+		 * ind_blocks */
+		calc_max_reserv(ip, &max_bytes, &data_blocks,
+				&ind_blocks, max_blks);
 
 		rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
 			  RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
@@ -931,6 +955,22 @@
 	return ret;
 }
 
+static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
+				      struct file *out, loff_t *ppos,
+				      size_t len, unsigned int flags)
+{
+	int error;
+	struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
+
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		return (ssize_t)error;
+
+	gfs2_size_hint(out, *ppos, len);
+
+	return iter_file_splice_write(pipe, out, ppos, len, flags);
+}
+
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
 
 /**
@@ -1077,7 +1117,7 @@
 	.lock		= gfs2_lock,
 	.flock		= gfs2_flock,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= iter_file_splice_write,
+	.splice_write	= gfs2_file_splice_write,
 	.setlease	= simple_nosetlease,
 	.fallocate	= gfs2_fallocate,
 };
@@ -1107,7 +1147,7 @@
 	.release	= gfs2_release,
 	.fsync		= gfs2_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= iter_file_splice_write,
+	.splice_write	= gfs2_file_splice_write,
 	.setlease	= generic_setlease,
 	.fallocate	= gfs2_fallocate,
 };
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f42dffb..0fa8062 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2047,34 +2047,41 @@
 
 int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
 {
-	sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
-	if (!sdp->debugfs_dir)
-		return -ENOMEM;
-	sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
-							 S_IFREG | S_IRUGO,
-							 sdp->debugfs_dir, sdp,
-							 &gfs2_glocks_fops);
-	if (!sdp->debugfs_dentry_glocks)
-		goto fail;
+	struct dentry *dent;
 
-	sdp->debugfs_dentry_glstats = debugfs_create_file("glstats",
-							S_IFREG | S_IRUGO,
-							sdp->debugfs_dir, sdp,
-							&gfs2_glstats_fops);
-	if (!sdp->debugfs_dentry_glstats)
+	dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
+	if (IS_ERR_OR_NULL(dent))
 		goto fail;
+	sdp->debugfs_dir = dent;
 
-	sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats",
-							S_IFREG | S_IRUGO,
-							sdp->debugfs_dir, sdp,
-							&gfs2_sbstats_fops);
-	if (!sdp->debugfs_dentry_sbstats)
+	dent = debugfs_create_file("glocks",
+				   S_IFREG | S_IRUGO,
+				   sdp->debugfs_dir, sdp,
+				   &gfs2_glocks_fops);
+	if (IS_ERR_OR_NULL(dent))
 		goto fail;
+	sdp->debugfs_dentry_glocks = dent;
+
+	dent = debugfs_create_file("glstats",
+				   S_IFREG | S_IRUGO,
+				   sdp->debugfs_dir, sdp,
+				   &gfs2_glstats_fops);
+	if (IS_ERR_OR_NULL(dent))
+		goto fail;
+	sdp->debugfs_dentry_glstats = dent;
+
+	dent = debugfs_create_file("sbstats",
+				   S_IFREG | S_IRUGO,
+				   sdp->debugfs_dir, sdp,
+				   &gfs2_sbstats_fops);
+	if (IS_ERR_OR_NULL(dent))
+		goto fail;
+	sdp->debugfs_dentry_sbstats = dent;
 
 	return 0;
 fail:
 	gfs2_delete_debugfs_file(sdp);
-	return -ENOMEM;
+	return dent ? PTR_ERR(dent) : -ENOMEM;
 }
 
 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
@@ -2100,6 +2107,8 @@
 int gfs2_register_debugfs(void)
 {
 	gfs2_root = debugfs_create_dir("gfs2", NULL);
+	if (IS_ERR(gfs2_root))
+		return PTR_ERR(gfs2_root);
 	return gfs2_root ? 0 : -ENOMEM;
 }
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7a2dbbc..58b75ab 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -301,8 +301,10 @@
  * to the allocation code.
  */
 struct gfs2_alloc_parms {
-	u32 target;
+	u64 target;
+	u32 min_target;
 	u32 aflags;
+	u64 allowed;
 };
 
 enum {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 73c72253..08bc84d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -382,7 +382,7 @@
 	struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, };
 	int error;
 
-	error = gfs2_quota_lock_check(ip);
+	error = gfs2_quota_lock_check(ip, &ap);
 	if (error)
 		goto out;
 
@@ -525,7 +525,7 @@
 	int error;
 
 	if (da->nr_blocks) {
-		error = gfs2_quota_lock_check(dip);
+		error = gfs2_quota_lock_check(dip, &ap);
 		if (error)
 			goto fail_quota_locks;
 
@@ -953,7 +953,7 @@
 
 	if (da.nr_blocks) {
 		struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
-		error = gfs2_quota_lock_check(dip);
+		error = gfs2_quota_lock_check(dip, &ap);
 		if (error)
 			goto out_gunlock;
 
@@ -1470,7 +1470,7 @@
 
 	if (da.nr_blocks) {
 		struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
-		error = gfs2_quota_lock_check(ndip);
+		error = gfs2_quota_lock_check(ndip, &ap);
 		if (error)
 			goto out_gunlock;
 
@@ -1669,6 +1669,7 @@
 	kuid_t ouid, nuid;
 	kgid_t ogid, ngid;
 	int error;
+	struct gfs2_alloc_parms ap;
 
 	ouid = inode->i_uid;
 	ogid = inode->i_gid;
@@ -1696,9 +1697,11 @@
 	if (error)
 		goto out;
 
+	ap.target = gfs2_get_inode_blocks(&ip->i_inode);
+
 	if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
 	    !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
-		error = gfs2_quota_check(ip, nuid, ngid);
+		error = gfs2_quota_check(ip, nuid, ngid, &ap);
 		if (error)
 			goto out_gunlock_q;
 	}
@@ -1713,9 +1716,8 @@
 
 	if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
 	    !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
-		u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
-		gfs2_quota_change(ip, -blocks, ouid, ogid);
-		gfs2_quota_change(ip, blocks, nuid, ngid);
+		gfs2_quota_change(ip, -ap.target, ouid, ogid);
+		gfs2_quota_change(ip, ap.target, nuid, ngid);
 	}
 
 out_end_trans:
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3aa17d4..5c27e48 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -923,6 +923,9 @@
 	if (error)
 		return error;
 
+	if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
+		force_refresh = FORCE;
+
 	qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
 
 	if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
@@ -974,11 +977,8 @@
 	     sizeof(struct gfs2_quota_data *), sort_qd, NULL);
 
 	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
-		int force = NO_FORCE;
 		qd = ip->i_res->rs_qa_qd[x];
-		if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
-			force = FORCE;
-		error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
+		error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
 		if (error)
 			break;
 	}
@@ -1094,14 +1094,33 @@
 	return 0;
 }
 
-int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
+/**
+ * gfs2_quota_check - check if allocating new blocks will exceed quota
+ * @ip:  The inode for which this check is being performed
+ * @uid: The uid to check against
+ * @gid: The gid to check against
+ * @ap:  The allocation parameters. ap->target contains the requested
+ *       blocks. ap->min_target, if set, contains the minimum blks
+ *       requested.
+ *
+ * Returns: 0 on success.
+ *                  min_req = ap->min_target ? ap->min_target : ap->target;
+ *                  quota must allow atleast min_req blks for success and
+ *                  ap->allowed is set to the number of blocks allowed
+ *
+ *          -EDQUOT otherwise, quota violation. ap->allowed is set to number
+ *                  of blocks available.
+ */
+int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+		     struct gfs2_alloc_parms *ap)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_quota_data *qd;
-	s64 value;
+	s64 value, warn, limit;
 	unsigned int x;
 	int error = 0;
 
+	ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
 	if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
 		return 0;
 
@@ -1115,30 +1134,37 @@
 		      qid_eq(qd->qd_id, make_kqid_gid(gid))))
 			continue;
 
+		warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn);
+		limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
 		value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
 		spin_lock(&qd_lock);
 		value += qd->qd_change;
 		spin_unlock(&qd_lock);
 
-		if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
-			print_message(qd, "exceeded");
-			quota_send_warning(qd->qd_id,
-					   sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
-
-			error = -EDQUOT;
-			break;
-		} else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
-			   (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value &&
+		if (limit > 0 && (limit - value) < ap->allowed)
+			ap->allowed = limit - value;
+		/* If we can't meet the target */
+		if (limit && limit < (value + (s64)ap->target)) {
+			/* If no min_target specified or we don't meet
+			 * min_target, return -EDQUOT */
+			if (!ap->min_target || ap->min_target > ap->allowed) {
+				print_message(qd, "exceeded");
+				quota_send_warning(qd->qd_id,
+						   sdp->sd_vfs->s_dev,
+						   QUOTA_NL_BHARDWARN);
+				error = -EDQUOT;
+				break;
+			}
+		} else if (warn && warn < value &&
 			   time_after_eq(jiffies, qd->qd_last_warn +
-					 gfs2_tune_get(sdp,
-						gt_quota_warn_period) * HZ)) {
+					 gfs2_tune_get(sdp, gt_quota_warn_period)
+					 * HZ)) {
 			quota_send_warning(qd->qd_id,
 					   sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
 			error = print_message(qd, "warning");
 			qd->qd_last_warn = jiffies;
 		}
 	}
-
 	return error;
 }
 
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 55d506e..ad04b3ac 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -24,7 +24,8 @@
 extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
 extern void gfs2_quota_unlock(struct gfs2_inode *ip);
 
-extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
+extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+			    struct gfs2_alloc_parms *ap);
 extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 			      kuid_t uid, kgid_t gid);
 
@@ -37,7 +38,8 @@
 
 extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
 
-static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
+static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
+					struct gfs2_alloc_parms *ap)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	int ret;
@@ -48,7 +50,7 @@
 		return ret;
 	if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
 		return 0;
-	ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
+	ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
 	if (ret)
 		gfs2_quota_unlock(ip);
 	return ret;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9150207..6af2396 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1946,10 +1946,18 @@
  * @ip: the inode to reserve space for
  * @ap: the allocation parameters
  *
- * Returns: errno
+ * We try our best to find an rgrp that has at least ap->target blocks
+ * available. After a couple of passes (loops == 2), the prospects of finding
+ * such an rgrp diminish. At this stage, we return the first rgrp that has
+ * atleast ap->min_target blocks available. Either way, we set ap->allowed to
+ * the number of blocks available in the chosen rgrp.
+ *
+ * Returns: 0 on success,
+ *          -ENOMEM if a suitable rgrp can't be found
+ *          errno otherwise
  */
 
-int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *begin = NULL;
@@ -2012,7 +2020,7 @@
 		/* Skip unuseable resource groups */
 		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
 						 GFS2_RDF_ERROR)) ||
-		    (ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
+		    (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
 			goto skip_rgrp;
 
 		if (sdp->sd_args.ar_rgrplvb)
@@ -2027,11 +2035,13 @@
 			goto check_rgrp;
 
 		/* If rgrp has enough free space, use it */
-		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
+		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
+		    (loops == 2 && ap->min_target &&
+		     rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
 			ip->i_rgd = rs->rs_rbm.rgd;
+			ap->allowed = ip->i_rgd->rd_free_clone;
 			return 0;
 		}
-
 check_rgrp:
 		/* Check for unlinked inodes which can be reclaimed */
 		if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b104f4a..68972ec 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -41,7 +41,8 @@
 extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
 
 #define GFS2_AF_ORLOV 1
-extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap);
+extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
+				struct gfs2_alloc_parms *ap);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f78..fd260ce 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -732,7 +732,7 @@
 	if (error)
 		return error;
 
-	error = gfs2_quota_lock_check(ip);
+	error = gfs2_quota_lock_check(ip, &ap);
 	if (error)
 		return error;
 
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d0929bc..98d4ea4 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "hfs_fs.h"
 #include "btree.h"
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 0cf786f..f541196 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d72817a..762c7a3 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -195,7 +195,7 @@
 	/* unchecked xdatum is chained with c->xattr_unchecked */
 	list_del_init(&xd->xindex);
 
-	dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n",
+	dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n",
 		  xd->xid, xd->version);
 
 	return 0;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index bd3df1c..3197aed 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,8 +22,8 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
+#include <linux/uio.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5d30c56..4cd9798f 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -102,7 +102,7 @@
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	pr_err("ERROR: (device %s): %pf: %pV\n",
+	pr_err("ERROR: (device %s): %ps: %pV\n",
 	       sb->s_id, __builtin_return_address(0), &vaf);
 
 	va_end(args);
diff --git a/fs/namei.c b/fs/namei.c
index c83145a..76fb76a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -119,15 +119,14 @@
  * PATH_MAX includes the nul terminator --RR.
  */
 
-#define EMBEDDED_NAME_MAX	(PATH_MAX - sizeof(struct filename))
+#define EMBEDDED_NAME_MAX	(PATH_MAX - offsetof(struct filename, iname))
 
 struct filename *
 getname_flags(const char __user *filename, int flags, int *empty)
 {
-	struct filename *result, *err;
-	int len;
-	long max;
+	struct filename *result;
 	char *kname;
+	int len;
 
 	result = audit_reusename(filename);
 	if (result)
@@ -136,22 +135,18 @@
 	result = __getname();
 	if (unlikely(!result))
 		return ERR_PTR(-ENOMEM);
-	result->refcnt = 1;
 
 	/*
 	 * First, try to embed the struct filename inside the names_cache
 	 * allocation
 	 */
-	kname = (char *)result + sizeof(*result);
+	kname = (char *)result->iname;
 	result->name = kname;
-	result->separate = false;
-	max = EMBEDDED_NAME_MAX;
 
-recopy:
-	len = strncpy_from_user(kname, filename, max);
+	len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
 	if (unlikely(len < 0)) {
-		err = ERR_PTR(len);
-		goto error;
+		__putname(result);
+		return ERR_PTR(len);
 	}
 
 	/*
@@ -160,43 +155,49 @@
 	 * names_cache allocation for the pathname, and re-do the copy from
 	 * userland.
 	 */
-	if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
+	if (unlikely(len == EMBEDDED_NAME_MAX)) {
+		const size_t size = offsetof(struct filename, iname[1]);
 		kname = (char *)result;
 
-		result = kzalloc(sizeof(*result), GFP_KERNEL);
-		if (!result) {
-			err = ERR_PTR(-ENOMEM);
-			result = (struct filename *)kname;
-			goto error;
+		/*
+		 * size is chosen that way we to guarantee that
+		 * result->iname[0] is within the same object and that
+		 * kname can't be equal to result->iname, no matter what.
+		 */
+		result = kzalloc(size, GFP_KERNEL);
+		if (unlikely(!result)) {
+			__putname(kname);
+			return ERR_PTR(-ENOMEM);
 		}
 		result->name = kname;
-		result->separate = true;
-		result->refcnt = 1;
-		max = PATH_MAX;
-		goto recopy;
+		len = strncpy_from_user(kname, filename, PATH_MAX);
+		if (unlikely(len < 0)) {
+			__putname(kname);
+			kfree(result);
+			return ERR_PTR(len);
+		}
+		if (unlikely(len == PATH_MAX)) {
+			__putname(kname);
+			kfree(result);
+			return ERR_PTR(-ENAMETOOLONG);
+		}
 	}
 
+	result->refcnt = 1;
 	/* The empty path is special. */
 	if (unlikely(!len)) {
 		if (empty)
 			*empty = 1;
-		err = ERR_PTR(-ENOENT);
-		if (!(flags & LOOKUP_EMPTY))
-			goto error;
+		if (!(flags & LOOKUP_EMPTY)) {
+			putname(result);
+			return ERR_PTR(-ENOENT);
+		}
 	}
 
-	err = ERR_PTR(-ENAMETOOLONG);
-	if (unlikely(len >= PATH_MAX))
-		goto error;
-
 	result->uptr = filename;
 	result->aname = NULL;
 	audit_getname(result);
 	return result;
-
-error:
-	putname(result);
-	return err;
 }
 
 struct filename *
@@ -216,8 +217,7 @@
 		return ERR_PTR(-ENOMEM);
 
 	if (len <= EMBEDDED_NAME_MAX) {
-		result->name = (char *)(result) + sizeof(*result);
-		result->separate = false;
+		result->name = (char *)result->iname;
 	} else if (len <= PATH_MAX) {
 		struct filename *tmp;
 
@@ -227,7 +227,6 @@
 			return ERR_PTR(-ENOMEM);
 		}
 		tmp->name = (char *)result;
-		tmp->separate = true;
 		result = tmp;
 	} else {
 		__putname(result);
@@ -249,7 +248,7 @@
 	if (--name->refcnt > 0)
 		return;
 
-	if (name->separate) {
+	if (name->name != name->iname) {
 		__putname(name->name);
 		kfree(name);
 	} else
@@ -1851,10 +1850,11 @@
 	return err;
 }
 
-static int path_init(int dfd, const char *name, unsigned int flags,
+static int path_init(int dfd, const struct filename *name, unsigned int flags,
 		     struct nameidata *nd)
 {
 	int retval = 0;
+	const char *s = name->name;
 
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -1863,7 +1863,7 @@
 	if (flags & LOOKUP_ROOT) {
 		struct dentry *root = nd->root.dentry;
 		struct inode *inode = root->d_inode;
-		if (*name) {
+		if (*s) {
 			if (!d_can_lookup(root))
 				return -ENOTDIR;
 			retval = inode_permission(inode, MAY_EXEC);
@@ -1885,7 +1885,7 @@
 	nd->root.mnt = NULL;
 
 	nd->m_seq = read_seqbegin(&mount_lock);
-	if (*name=='/') {
+	if (*s == '/') {
 		if (flags & LOOKUP_RCU) {
 			rcu_read_lock();
 			nd->seq = set_root_rcu(nd);
@@ -1919,7 +1919,7 @@
 
 		dentry = f.file->f_path.dentry;
 
-		if (*name) {
+		if (*s) {
 			if (!d_can_lookup(dentry)) {
 				fdput(f);
 				return -ENOTDIR;
@@ -1949,7 +1949,7 @@
 	return -ECHILD;
 done:
 	current->total_link_count = 0;
-	return link_path_walk(name, nd);
+	return link_path_walk(s, nd);
 }
 
 static void path_cleanup(struct nameidata *nd)
@@ -1972,7 +1972,7 @@
 }
 
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int path_lookupat(int dfd, const char *name,
+static int path_lookupat(int dfd, const struct filename *name,
 				unsigned int flags, struct nameidata *nd)
 {
 	struct path path;
@@ -2027,31 +2027,17 @@
 static int filename_lookup(int dfd, struct filename *name,
 				unsigned int flags, struct nameidata *nd)
 {
-	int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd);
+	int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
 	if (unlikely(retval == -ECHILD))
-		retval = path_lookupat(dfd, name->name, flags, nd);
+		retval = path_lookupat(dfd, name, flags, nd);
 	if (unlikely(retval == -ESTALE))
-		retval = path_lookupat(dfd, name->name,
-						flags | LOOKUP_REVAL, nd);
+		retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
 
 	if (likely(!retval))
 		audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
 	return retval;
 }
 
-static int do_path_lookup(int dfd, const char *name,
-				unsigned int flags, struct nameidata *nd)
-{
-	struct filename *filename = getname_kernel(name);
-	int retval = PTR_ERR(filename);
-
-	if (!IS_ERR(filename)) {
-		retval = filename_lookup(dfd, filename, flags, nd);
-		putname(filename);
-	}
-	return retval;
-}
-
 /* does lookup, returns the object with parent locked */
 struct dentry *kern_path_locked(const char *name, struct path *path)
 {
@@ -2089,9 +2075,15 @@
 int kern_path(const char *name, unsigned int flags, struct path *path)
 {
 	struct nameidata nd;
-	int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
-	if (!res)
-		*path = nd.path;
+	struct filename *filename = getname_kernel(name);
+	int res = PTR_ERR(filename);
+
+	if (!IS_ERR(filename)) {
+		res = filename_lookup(AT_FDCWD, filename, flags, &nd);
+		putname(filename);
+		if (!res)
+			*path = nd.path;
+	}
 	return res;
 }
 EXPORT_SYMBOL(kern_path);
@@ -2108,15 +2100,22 @@
 		    const char *name, unsigned int flags,
 		    struct path *path)
 {
-	struct nameidata nd;
-	int err;
-	nd.root.dentry = dentry;
-	nd.root.mnt = mnt;
+	struct filename *filename = getname_kernel(name);
+	int err = PTR_ERR(filename);
+
 	BUG_ON(flags & LOOKUP_PARENT);
-	/* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
-	err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
-	if (!err)
-		*path = nd.path;
+
+	/* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
+	if (!IS_ERR(filename)) {
+		struct nameidata nd;
+		nd.root.dentry = dentry;
+		nd.root.mnt = mnt;
+		err = filename_lookup(AT_FDCWD, filename,
+				      flags | LOOKUP_ROOT, &nd);
+		if (!err)
+			*path = nd.path;
+		putname(filename);
+	}
 	return err;
 }
 EXPORT_SYMBOL(vfs_path_lookup);
@@ -2138,9 +2137,7 @@
  * @len:	maximum length @len should be interpreted to
  *
  * Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.  Also note that by using this function the
- * nameidata argument is passed to the filesystem methods and a filesystem
- * using this helper needs to be prepared for that.
+ * not be called by generic code.
  */
 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 {
@@ -2341,7 +2338,8 @@
  * Returns 0 and "path" will be valid on success; Returns error otherwise.
  */
 static int
-path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
+path_mountpoint(int dfd, const struct filename *name, struct path *path,
+		unsigned int flags)
 {
 	struct nameidata nd;
 	int err;
@@ -2370,20 +2368,20 @@
 }
 
 static int
-filename_mountpoint(int dfd, struct filename *s, struct path *path,
+filename_mountpoint(int dfd, struct filename *name, struct path *path,
 			unsigned int flags)
 {
 	int error;
-	if (IS_ERR(s))
-		return PTR_ERR(s);
-	error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU);
+	if (IS_ERR(name))
+		return PTR_ERR(name);
+	error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
 	if (unlikely(error == -ECHILD))
-		error = path_mountpoint(dfd, s->name, path, flags);
+		error = path_mountpoint(dfd, name, path, flags);
 	if (unlikely(error == -ESTALE))
-		error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL);
+		error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
 	if (likely(!error))
-		audit_inode(s, path->dentry, 0);
-	putname(s);
+		audit_inode(name, path->dentry, 0);
+	putname(name);
 	return error;
 }
 
@@ -3156,7 +3154,7 @@
 	static const struct qstr name = QSTR_INIT("/", 1);
 	struct dentry *dentry, *child;
 	struct inode *dir;
-	int error = path_lookupat(dfd, pathname->name,
+	int error = path_lookupat(dfd, pathname,
 				  flags | LOOKUP_DIRECTORY, nd);
 	if (unlikely(error))
 		return error;
@@ -3229,7 +3227,7 @@
 		goto out;
 	}
 
-	error = path_init(dfd, pathname->name, flags, nd);
+	error = path_init(dfd, pathname, flags, nd);
 	if (unlikely(error))
 		goto out;
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e907c8c..c3929fb 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -265,7 +265,7 @@
 
 	return -EINVAL;
 #else
-	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
+	VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
 
 	if (rw == READ)
 		return nfs_file_direct_read(iocb, iter, pos);
@@ -393,7 +393,7 @@
 		long res = (long) dreq->error;
 		if (!res)
 			res = (long) dreq->count;
-		aio_complete(dreq->iocb, res, 0);
+		dreq->iocb->ki_complete(dreq->iocb, res, 0);
 	}
 
 	complete_all(&dreq->completion);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e679d24..37b1558 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
-#include <linux/aio.h>
 #include <linux/gfp.h>
 #include <linux/swap.h>
 
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 8b59695..ab4987b 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -26,7 +26,7 @@
 #include <linux/mpage.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "nilfs.h"
 #include "btnode.h"
 #include "segment.h"
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 36ae529..2ff263e 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -8,7 +8,7 @@
 
 ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
 
-ccflags-y := -DNTFS_VERSION=\"2.1.31\"
+ccflags-y := -DNTFS_VERSION=\"2.1.32\"
 ccflags-$(CONFIG_NTFS_DEBUG)	+= -DDEBUG
 ccflags-$(CONFIG_NTFS_RW)	+= -DNTFS_RW
 
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 1da9b2d..c1da78d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
 /*
  * file.c - NTFS kernel file operations.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -28,7 +28,6 @@
 #include <linux/swap.h>
 #include <linux/uio.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -329,62 +328,168 @@
 	return err;
 }
 
-/**
- * ntfs_fault_in_pages_readable -
- *
- * Fault a number of userspace pages into pagetables.
- *
- * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
- * with more than two userspace pages as well as handling the single page case
- * elegantly.
- *
- * If you find this difficult to understand, then think of the while loop being
- * the following code, except that we do without the integer variable ret:
- *
- *	do {
- *		ret = __get_user(c, uaddr);
- *		uaddr += PAGE_SIZE;
- *	} while (!ret && uaddr < end);
- *
- * Note, the final __get_user() may well run out-of-bounds of the user buffer,
- * but _not_ out-of-bounds of the page the user buffer belongs to, and since
- * this is only a read and not a write, and since it is still in the same page,
- * it should not matter and this makes the code much simpler.
- */
-static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
-		int bytes)
+static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
+		size_t *count)
 {
-	const char __user *end;
-	volatile char c;
+	loff_t pos;
+	s64 end, ll;
+	ssize_t err;
+	unsigned long flags;
+	struct inode *vi = file_inode(file);
+	ntfs_inode *base_ni, *ni = NTFS_I(vi);
+	ntfs_volume *vol = ni->vol;
 
-	/* Set @end to the first byte outside the last page we care about. */
-	end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
-
-	while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
-		;
-}
-
-/**
- * ntfs_fault_in_pages_readable_iovec -
- *
- * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
- */
-static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
-		size_t iov_ofs, int bytes)
-{
-	do {
-		const char __user *buf;
-		unsigned len;
-
-		buf = iov->iov_base + iov_ofs;
-		len = iov->iov_len - iov_ofs;
-		if (len > bytes)
-			len = bytes;
-		ntfs_fault_in_pages_readable(buf, len);
-		bytes -= len;
-		iov++;
-		iov_ofs = 0;
-	} while (bytes);
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+			"0x%llx, count 0x%lx.", vi->i_ino,
+			(unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)*ppos, (unsigned long)*count);
+	/* We can write back this queue in page reclaim. */
+	current->backing_dev_info = inode_to_bdi(vi);
+	err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode));
+	if (unlikely(err))
+		goto out;
+	/*
+	 * All checks have passed.  Before we start doing any writing we want
+	 * to abort any totally illegal writes.
+	 */
+	BUG_ON(NInoMstProtected(ni));
+	BUG_ON(ni->type != AT_DATA);
+	/* If file is encrypted, deny access, just like NT4. */
+	if (NInoEncrypted(ni)) {
+		/* Only $DATA attributes can be encrypted. */
+		/*
+		 * Reminder for later: Encrypted files are _always_
+		 * non-resident so that the content can always be encrypted.
+		 */
+		ntfs_debug("Denying write access to encrypted file.");
+		err = -EACCES;
+		goto out;
+	}
+	if (NInoCompressed(ni)) {
+		/* Only unnamed $DATA attribute can be compressed. */
+		BUG_ON(ni->name_len);
+		/*
+		 * Reminder for later: If resident, the data is not actually
+		 * compressed.  Only on the switch to non-resident does
+		 * compression kick in.  This is in contrast to encrypted files
+		 * (see above).
+		 */
+		ntfs_error(vi->i_sb, "Writing to compressed files is not "
+				"implemented yet.  Sorry.");
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+	if (*count == 0)
+		goto out;
+	base_ni = ni;
+	if (NInoAttr(ni))
+		base_ni = ni->ext.base_ntfs_ino;
+	err = file_remove_suid(file);
+	if (unlikely(err))
+		goto out;
+	/*
+	 * Our ->update_time method always succeeds thus file_update_time()
+	 * cannot fail either so there is no need to check the return code.
+	 */
+	file_update_time(file);
+	pos = *ppos;
+	/* The first byte after the last cluster being written to. */
+	end = (pos + *count + vol->cluster_size_mask) &
+			~(u64)vol->cluster_size_mask;
+	/*
+	 * If the write goes beyond the allocated size, extend the allocation
+	 * to cover the whole of the write, rounded up to the nearest cluster.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	ll = ni->allocated_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (end > ll) {
+		/*
+		 * Extend the allocation without changing the data size.
+		 *
+		 * Note we ensure the allocation is big enough to at least
+		 * write some data but we do not require the allocation to be
+		 * complete, i.e. it may be partial.
+		 */
+		ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+		if (likely(ll >= 0)) {
+			BUG_ON(pos >= ll);
+			/* If the extension was partial truncate the write. */
+			if (end > ll) {
+				ntfs_debug("Truncating write to inode 0x%lx, "
+						"attribute type 0x%x, because "
+						"the allocation was only "
+						"partially extended.",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type));
+				*count = ll - pos;
+			}
+		} else {
+			err = ll;
+			read_lock_irqsave(&ni->size_lock, flags);
+			ll = ni->allocated_size;
+			read_unlock_irqrestore(&ni->size_lock, flags);
+			/* Perform a partial write if possible or fail. */
+			if (pos < ll) {
+				ntfs_debug("Truncating write to inode 0x%lx "
+						"attribute type 0x%x, because "
+						"extending the allocation "
+						"failed (error %d).",
+						vi->i_ino, (unsigned)
+						le32_to_cpu(ni->type),
+						(int)-err);
+				*count = ll - pos;
+			} else {
+				if (err != -ENOSPC)
+					ntfs_error(vi->i_sb, "Cannot perform "
+							"write to inode "
+							"0x%lx, attribute "
+							"type 0x%x, because "
+							"extending the "
+							"allocation failed "
+							"(error %ld).",
+							vi->i_ino, (unsigned)
+							le32_to_cpu(ni->type),
+							(long)-err);
+				else
+					ntfs_debug("Cannot perform write to "
+							"inode 0x%lx, "
+							"attribute type 0x%x, "
+							"because there is not "
+							"space left.",
+							vi->i_ino, (unsigned)
+							le32_to_cpu(ni->type));
+				goto out;
+			}
+		}
+	}
+	/*
+	 * If the write starts beyond the initialized size, extend it up to the
+	 * beginning of the write and initialize all non-sparse space between
+	 * the old initialized size and the new one.  This automatically also
+	 * increments the vfs inode->i_size to keep it above or equal to the
+	 * initialized_size.
+	 */
+	read_lock_irqsave(&ni->size_lock, flags);
+	ll = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (pos > ll) {
+		/*
+		 * Wait for ongoing direct i/o to complete before proceeding.
+		 * New direct i/o cannot start as we hold i_mutex.
+		 */
+		inode_dio_wait(vi);
+		err = ntfs_attr_extend_initialized(ni, pos);
+		if (unlikely(err < 0))
+			ntfs_error(vi->i_sb, "Cannot perform write to inode "
+					"0x%lx, attribute type 0x%x, because "
+					"extending the initialized size "
+					"failed (error %d).", vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type),
+					(int)-err);
+	}
+out:
+	return err;
 }
 
 /**
@@ -421,8 +526,8 @@
 					goto err_out;
 				}
 			}
-			err = add_to_page_cache_lru(*cached_page, mapping, index,
-					GFP_KERNEL);
+			err = add_to_page_cache_lru(*cached_page, mapping,
+					index, GFP_KERNEL);
 			if (unlikely(err)) {
 				if (err == -EEXIST)
 					continue;
@@ -1268,180 +1373,6 @@
 	return err;
 }
 
-/*
- * Copy as much as we can into the pages and return the number of bytes which
- * were successfully copied.  If a fault is encountered then clear the pages
- * out to (ofs + bytes) and return the number of bytes which were copied.
- */
-static inline size_t ntfs_copy_from_user(struct page **pages,
-		unsigned nr_pages, unsigned ofs, const char __user *buf,
-		size_t bytes)
-{
-	struct page **last_page = pages + nr_pages;
-	char *addr;
-	size_t total = 0;
-	unsigned len;
-	int left;
-
-	do {
-		len = PAGE_CACHE_SIZE - ofs;
-		if (len > bytes)
-			len = bytes;
-		addr = kmap_atomic(*pages);
-		left = __copy_from_user_inatomic(addr + ofs, buf, len);
-		kunmap_atomic(addr);
-		if (unlikely(left)) {
-			/* Do it the slow way. */
-			addr = kmap(*pages);
-			left = __copy_from_user(addr + ofs, buf, len);
-			kunmap(*pages);
-			if (unlikely(left))
-				goto err_out;
-		}
-		total += len;
-		bytes -= len;
-		if (!bytes)
-			break;
-		buf += len;
-		ofs = 0;
-	} while (++pages < last_page);
-out:
-	return total;
-err_out:
-	total += len - left;
-	/* Zero the rest of the target like __copy_from_user(). */
-	while (++pages < last_page) {
-		bytes -= len;
-		if (!bytes)
-			break;
-		len = PAGE_CACHE_SIZE;
-		if (len > bytes)
-			len = bytes;
-		zero_user(*pages, 0, len);
-	}
-	goto out;
-}
-
-static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
-		const struct iovec *iov, size_t iov_ofs, size_t bytes)
-{
-	size_t total = 0;
-
-	while (1) {
-		const char __user *buf = iov->iov_base + iov_ofs;
-		unsigned len;
-		size_t left;
-
-		len = iov->iov_len - iov_ofs;
-		if (len > bytes)
-			len = bytes;
-		left = __copy_from_user_inatomic(vaddr, buf, len);
-		total += len;
-		bytes -= len;
-		vaddr += len;
-		if (unlikely(left)) {
-			total -= left;
-			break;
-		}
-		if (!bytes)
-			break;
-		iov++;
-		iov_ofs = 0;
-	}
-	return total;
-}
-
-static inline void ntfs_set_next_iovec(const struct iovec **iovp,
-		size_t *iov_ofsp, size_t bytes)
-{
-	const struct iovec *iov = *iovp;
-	size_t iov_ofs = *iov_ofsp;
-
-	while (bytes) {
-		unsigned len;
-
-		len = iov->iov_len - iov_ofs;
-		if (len > bytes)
-			len = bytes;
-		bytes -= len;
-		iov_ofs += len;
-		if (iov->iov_len == iov_ofs) {
-			iov++;
-			iov_ofs = 0;
-		}
-	}
-	*iovp = iov;
-	*iov_ofsp = iov_ofs;
-}
-
-/*
- * This has the same side-effects and return value as ntfs_copy_from_user().
- * The difference is that on a fault we need to memset the remainder of the
- * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
- * single-segment behaviour.
- *
- * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
- * atomic and when not atomic.  This is ok because it calls
- * __copy_from_user_inatomic() and it is ok to call this when non-atomic.  In
- * fact, the only difference between __copy_from_user_inatomic() and
- * __copy_from_user() is that the latter calls might_sleep() and the former
- * should not zero the tail of the buffer on error.  And on many architectures
- * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
- * makes no difference at all on those architectures.
- */
-static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
-		unsigned nr_pages, unsigned ofs, const struct iovec **iov,
-		size_t *iov_ofs, size_t bytes)
-{
-	struct page **last_page = pages + nr_pages;
-	char *addr;
-	size_t copied, len, total = 0;
-
-	do {
-		len = PAGE_CACHE_SIZE - ofs;
-		if (len > bytes)
-			len = bytes;
-		addr = kmap_atomic(*pages);
-		copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
-				*iov, *iov_ofs, len);
-		kunmap_atomic(addr);
-		if (unlikely(copied != len)) {
-			/* Do it the slow way. */
-			addr = kmap(*pages);
-			copied = __ntfs_copy_from_user_iovec_inatomic(addr +
-					ofs, *iov, *iov_ofs, len);
-			if (unlikely(copied != len))
-				goto err_out;
-			kunmap(*pages);
-		}
-		total += len;
-		ntfs_set_next_iovec(iov, iov_ofs, len);
-		bytes -= len;
-		if (!bytes)
-			break;
-		ofs = 0;
-	} while (++pages < last_page);
-out:
-	return total;
-err_out:
-	BUG_ON(copied > len);
-	/* Zero the rest of the target like __copy_from_user(). */
-	memset(addr + ofs + copied, 0, len - copied);
-	kunmap(*pages);
-	total += copied;
-	ntfs_set_next_iovec(iov, iov_ofs, copied);
-	while (++pages < last_page) {
-		bytes -= len;
-		if (!bytes)
-			break;
-		len = PAGE_CACHE_SIZE;
-		if (len > bytes)
-			len = bytes;
-		zero_user(*pages, 0, len);
-	}
-	goto out;
-}
-
 static inline void ntfs_flush_dcache_pages(struct page **pages,
 		unsigned nr_pages)
 {
@@ -1762,86 +1693,83 @@
 	return err;
 }
 
-static void ntfs_write_failed(struct address_space *mapping, loff_t to)
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were successfully copied.  If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
+		unsigned ofs, struct iov_iter *i, size_t bytes)
 {
-	struct inode *inode = mapping->host;
+	struct page **last_page = pages + nr_pages;
+	size_t total = 0;
+	struct iov_iter data = *i;
+	unsigned len, copied;
 
-	if (to > inode->i_size) {
-		truncate_pagecache(inode, inode->i_size);
-		ntfs_truncate_vfs(inode);
-	}
+	do {
+		len = PAGE_CACHE_SIZE - ofs;
+		if (len > bytes)
+			len = bytes;
+		copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
+				len);
+		total += copied;
+		bytes -= copied;
+		if (!bytes)
+			break;
+		iov_iter_advance(&data, copied);
+		if (copied < len)
+			goto err;
+		ofs = 0;
+	} while (++pages < last_page);
+out:
+	return total;
+err:
+	/* Zero the rest of the target like __copy_from_user(). */
+	len = PAGE_CACHE_SIZE - copied;
+	do {
+		if (len > bytes)
+			len = bytes;
+		zero_user(*pages, copied, len);
+		bytes -= len;
+		copied = 0;
+		len = PAGE_CACHE_SIZE;
+	} while (++pages < last_page);
+	goto out;
 }
 
 /**
- * ntfs_file_buffered_write -
- *
- * Locking: The vfs is holding ->i_mutex on the inode.
+ * ntfs_perform_write - perform buffered write to a file
+ * @file:	file to write to
+ * @i:		iov_iter with data to write
+ * @pos:	byte offset in file at which to begin writing to
  */
-static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
-		const struct iovec *iov, unsigned long nr_segs,
-		loff_t pos, loff_t *ppos, size_t count)
+static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
+		loff_t pos)
 {
-	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *vi = mapping->host;
 	ntfs_inode *ni = NTFS_I(vi);
 	ntfs_volume *vol = ni->vol;
 	struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
 	struct page *cached_page = NULL;
-	char __user *buf = NULL;
-	s64 end, ll;
 	VCN last_vcn;
 	LCN lcn;
-	unsigned long flags;
-	size_t bytes, iov_ofs = 0;	/* Offset in the current iovec. */
-	ssize_t status, written;
+	size_t bytes;
+	ssize_t status, written = 0;
 	unsigned nr_pages;
-	int err;
 
-	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
-			"pos 0x%llx, count 0x%lx.",
-			vi->i_ino, (unsigned)le32_to_cpu(ni->type),
-			(unsigned long long)pos, (unsigned long)count);
-	if (unlikely(!count))
-		return 0;
-	BUG_ON(NInoMstProtected(ni));
-	/*
-	 * If the attribute is not an index root and it is encrypted or
-	 * compressed, we cannot write to it yet.  Note we need to check for
-	 * AT_INDEX_ALLOCATION since this is the type of both directory and
-	 * index inodes.
-	 */
-	if (ni->type != AT_INDEX_ALLOCATION) {
-		/* If file is encrypted, deny access, just like NT4. */
-		if (NInoEncrypted(ni)) {
-			/*
-			 * Reminder for later: Encrypted files are _always_
-			 * non-resident so that the content can always be
-			 * encrypted.
-			 */
-			ntfs_debug("Denying write access to encrypted file.");
-			return -EACCES;
-		}
-		if (NInoCompressed(ni)) {
-			/* Only unnamed $DATA attribute can be compressed. */
-			BUG_ON(ni->type != AT_DATA);
-			BUG_ON(ni->name_len);
-			/*
-			 * Reminder for later: If resident, the data is not
-			 * actually compressed.  Only on the switch to non-
-			 * resident does compression kick in.  This is in
-			 * contrast to encrypted files (see above).
-			 */
-			ntfs_error(vi->i_sb, "Writing to compressed files is "
-					"not implemented yet.  Sorry.");
-			return -EOPNOTSUPP;
-		}
-	}
+	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+			"0x%llx, count 0x%lx.", vi->i_ino,
+			(unsigned)le32_to_cpu(ni->type),
+			(unsigned long long)pos,
+			(unsigned long)iov_iter_count(i));
 	/*
 	 * If a previous ntfs_truncate() failed, repeat it and abort if it
 	 * fails again.
 	 */
 	if (unlikely(NInoTruncateFailed(ni))) {
+		int err;
+
 		inode_dio_wait(vi);
 		err = ntfs_truncate(vi);
 		if (err || NInoTruncateFailed(ni)) {
@@ -1855,81 +1783,6 @@
 			return err;
 		}
 	}
-	/* The first byte after the write. */
-	end = pos + count;
-	/*
-	 * If the write goes beyond the allocated size, extend the allocation
-	 * to cover the whole of the write, rounded up to the nearest cluster.
-	 */
-	read_lock_irqsave(&ni->size_lock, flags);
-	ll = ni->allocated_size;
-	read_unlock_irqrestore(&ni->size_lock, flags);
-	if (end > ll) {
-		/* Extend the allocation without changing the data size. */
-		ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
-		if (likely(ll >= 0)) {
-			BUG_ON(pos >= ll);
-			/* If the extension was partial truncate the write. */
-			if (end > ll) {
-				ntfs_debug("Truncating write to inode 0x%lx, "
-						"attribute type 0x%x, because "
-						"the allocation was only "
-						"partially extended.",
-						vi->i_ino, (unsigned)
-						le32_to_cpu(ni->type));
-				end = ll;
-				count = ll - pos;
-			}
-		} else {
-			err = ll;
-			read_lock_irqsave(&ni->size_lock, flags);
-			ll = ni->allocated_size;
-			read_unlock_irqrestore(&ni->size_lock, flags);
-			/* Perform a partial write if possible or fail. */
-			if (pos < ll) {
-				ntfs_debug("Truncating write to inode 0x%lx, "
-						"attribute type 0x%x, because "
-						"extending the allocation "
-						"failed (error code %i).",
-						vi->i_ino, (unsigned)
-						le32_to_cpu(ni->type), err);
-				end = ll;
-				count = ll - pos;
-			} else {
-				ntfs_error(vol->sb, "Cannot perform write to "
-						"inode 0x%lx, attribute type "
-						"0x%x, because extending the "
-						"allocation failed (error "
-						"code %i).", vi->i_ino,
-						(unsigned)
-						le32_to_cpu(ni->type), err);
-				return err;
-			}
-		}
-	}
-	written = 0;
-	/*
-	 * If the write starts beyond the initialized size, extend it up to the
-	 * beginning of the write and initialize all non-sparse space between
-	 * the old initialized size and the new one.  This automatically also
-	 * increments the vfs inode->i_size to keep it above or equal to the
-	 * initialized_size.
-	 */
-	read_lock_irqsave(&ni->size_lock, flags);
-	ll = ni->initialized_size;
-	read_unlock_irqrestore(&ni->size_lock, flags);
-	if (pos > ll) {
-		err = ntfs_attr_extend_initialized(ni, pos);
-		if (err < 0) {
-			ntfs_error(vol->sb, "Cannot perform write to inode "
-					"0x%lx, attribute type 0x%x, because "
-					"extending the initialized size "
-					"failed (error code %i).", vi->i_ino,
-					(unsigned)le32_to_cpu(ni->type), err);
-			status = err;
-			goto err_out;
-		}
-	}
 	/*
 	 * Determine the number of pages per cluster for non-resident
 	 * attributes.
@@ -1937,10 +1790,7 @@
 	nr_pages = 1;
 	if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
 		nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
-	/* Finally, perform the actual write. */
 	last_vcn = -1;
-	if (likely(nr_segs == 1))
-		buf = iov->iov_base;
 	do {
 		VCN vcn;
 		pgoff_t idx, start_idx;
@@ -1965,10 +1815,10 @@
 						vol->cluster_size_bits, false);
 				up_read(&ni->runlist.lock);
 				if (unlikely(lcn < LCN_HOLE)) {
-					status = -EIO;
 					if (lcn == LCN_ENOMEM)
 						status = -ENOMEM;
-					else
+					else {
+						status = -EIO;
 						ntfs_error(vol->sb, "Cannot "
 							"perform write to "
 							"inode 0x%lx, "
@@ -1977,6 +1827,7 @@
 							"is corrupt.",
 							vi->i_ino, (unsigned)
 							le32_to_cpu(ni->type));
+					}
 					break;
 				}
 				if (lcn == LCN_HOLE) {
@@ -1989,8 +1840,9 @@
 				}
 			}
 		}
-		if (bytes > count)
-			bytes = count;
+		if (bytes > iov_iter_count(i))
+			bytes = iov_iter_count(i);
+again:
 		/*
 		 * Bring in the user page(s) that we will copy from _first_.
 		 * Otherwise there is a nasty deadlock on copying from the same
@@ -1999,10 +1851,10 @@
 		 * pages being swapped out between us bringing them into memory
 		 * and doing the actual copying.
 		 */
-		if (likely(nr_segs == 1))
-			ntfs_fault_in_pages_readable(buf, bytes);
-		else
-			ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+		if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
+			status = -EFAULT;
+			break;
+		}
 		/* Get and lock @do_pages starting at index @start_idx. */
 		status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
 				pages, &cached_page);
@@ -2018,56 +1870,57 @@
 			status = ntfs_prepare_pages_for_non_resident_write(
 					pages, do_pages, pos, bytes);
 			if (unlikely(status)) {
-				loff_t i_size;
-
 				do {
 					unlock_page(pages[--do_pages]);
 					page_cache_release(pages[do_pages]);
 				} while (do_pages);
-				/*
-				 * The write preparation may have instantiated
-				 * allocated space outside i_size.  Trim this
-				 * off again.  We can ignore any errors in this
-				 * case as we will just be waisting a bit of
-				 * allocated space, which is not a disaster.
-				 */
-				i_size = i_size_read(vi);
-				if (pos + bytes > i_size) {
-					ntfs_write_failed(mapping, pos + bytes);
-				}
 				break;
 			}
 		}
 		u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
-		if (likely(nr_segs == 1)) {
-			copied = ntfs_copy_from_user(pages + u, do_pages - u,
-					ofs, buf, bytes);
-			buf += copied;
-		} else
-			copied = ntfs_copy_from_user_iovec(pages + u,
-					do_pages - u, ofs, &iov, &iov_ofs,
-					bytes);
+		copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
+					i, bytes);
 		ntfs_flush_dcache_pages(pages + u, do_pages - u);
-		status = ntfs_commit_pages_after_write(pages, do_pages, pos,
-				bytes);
-		if (likely(!status)) {
-			written += copied;
-			count -= copied;
-			pos += copied;
-			if (unlikely(copied != bytes))
-				status = -EFAULT;
+		status = 0;
+		if (likely(copied == bytes)) {
+			status = ntfs_commit_pages_after_write(pages, do_pages,
+					pos, bytes);
+			if (!status)
+				status = bytes;
 		}
 		do {
 			unlock_page(pages[--do_pages]);
 			page_cache_release(pages[do_pages]);
 		} while (do_pages);
-		if (unlikely(status))
+		if (unlikely(status < 0))
 			break;
-		balance_dirty_pages_ratelimited(mapping);
+		copied = status;
 		cond_resched();
-	} while (count);
-err_out:
-	*ppos = pos;
+		if (unlikely(!copied)) {
+			size_t sc;
+
+			/*
+			 * We failed to copy anything.  Fall back to single
+			 * segment length write.
+			 *
+			 * This is needed to avoid possible livelock in the
+			 * case that all segments in the iov cannot be copied
+			 * at once without a pagefault.
+			 */
+			sc = iov_iter_single_seg_count(i);
+			if (bytes > sc)
+				bytes = sc;
+			goto again;
+		}
+		iov_iter_advance(i, copied);
+		pos += copied;
+		written += copied;
+		balance_dirty_pages_ratelimited(mapping);
+		if (fatal_signal_pending(current)) {
+			status = -EINTR;
+			break;
+		}
+	} while (iov_iter_count(i));
 	if (cached_page)
 		page_cache_release(cached_page);
 	ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
@@ -2077,59 +1930,56 @@
 }
 
 /**
- * ntfs_file_aio_write_nolock -
+ * ntfs_file_write_iter_nolock - write data to a file
+ * @iocb:	IO state structure (file, offset, etc.)
+ * @from:	iov_iter with data to write
+ *
+ * Basically the same as __generic_file_write_iter() except that it ends
+ * up calling ntfs_perform_write() instead of generic_perform_write() and that
+ * O_DIRECT is not implemented.
  */
-static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
-		const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb,
+		struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	loff_t pos;
-	size_t count;		/* after file limit checks */
-	ssize_t written, err;
+	loff_t pos = iocb->ki_pos;
+	ssize_t written = 0;
+	ssize_t err;
+	size_t count = iov_iter_count(from);
 
-	count = iov_length(iov, nr_segs);
-	pos = *ppos;
-	/* We can write back this queue in page reclaim. */
-	current->backing_dev_info = inode_to_bdi(inode);
-	written = 0;
-	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
-	if (err)
-		goto out;
-	if (!count)
-		goto out;
-	err = file_remove_suid(file);
-	if (err)
-		goto out;
-	err = file_update_time(file);
-	if (err)
-		goto out;
-	written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
-			count);
-out:
+	err = ntfs_prepare_file_for_write(file, &pos, &count);
+	if (count && !err) {
+		iov_iter_truncate(from, count);
+		written = ntfs_perform_write(file, from, pos);
+		if (likely(written >= 0))
+			iocb->ki_pos = pos + written;
+	}
 	current->backing_dev_info = NULL;
 	return written ? written : err;
 }
 
 /**
- * ntfs_file_aio_write -
+ * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
+ * @iocb:	IO state structure
+ * @from:	iov_iter with data to write
+ *
+ * Basically the same as generic_file_write_iter() except that it ends up
+ * calling ntfs_file_write_iter_nolock() instead of
+ * __generic_file_write_iter().
  */
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
+	struct inode *vi = file_inode(file);
 	ssize_t ret;
 
-	BUG_ON(iocb->ki_pos != pos);
-
-	mutex_lock(&inode->i_mutex);
-	ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
-	mutex_unlock(&inode->i_mutex);
+	mutex_lock(&vi->i_mutex);
+	ret = ntfs_file_write_iter_nolock(iocb, from);
+	mutex_unlock(&vi->i_mutex);
 	if (ret > 0) {
-		int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+		ssize_t err;
+
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0)
 			ret = err;
 	}
@@ -2197,37 +2047,17 @@
 #endif /* NTFS_RW */
 
 const struct file_operations ntfs_file_ops = {
-	.llseek		= generic_file_llseek,	 /* Seek inside file. */
-	.read		= new_sync_read,	 /* Read from file. */
-	.read_iter	= generic_file_read_iter, /* Async read from file. */
+	.llseek		= generic_file_llseek,
+	.read		= new_sync_read,
+	.read_iter	= generic_file_read_iter,
 #ifdef NTFS_RW
-	.write		= do_sync_write,	 /* Write to file. */
-	.aio_write	= ntfs_file_aio_write,	 /* Async write to file. */
-	/*.release	= ,*/			 /* Last file is closed.  See
-						    fs/ext2/file.c::
-						    ext2_release_file() for
-						    how to use this to discard
-						    preallocated space for
-						    write opened files. */
-	.fsync		= ntfs_file_fsync,	 /* Sync a file to disk. */
-	/*.aio_fsync	= ,*/			 /* Sync all outstanding async
-						    i/o operations on a
-						    kiocb. */
+	.write		= new_sync_write,
+	.write_iter	= ntfs_file_write_iter,
+	.fsync		= ntfs_file_fsync,
 #endif /* NTFS_RW */
-	/*.ioctl	= ,*/			 /* Perform function on the
-						    mounted filesystem. */
-	.mmap		= generic_file_mmap,	 /* Mmap file. */
-	.open		= ntfs_file_open,	 /* Open file. */
-	.splice_read	= generic_file_splice_read /* Zero-copy data send with
-						    the data source being on
-						    the ntfs partition.  We do
-						    not need to care about the
-						    data destination. */
-	/*.sendpage	= ,*/			 /* Zero-copy data send with
-						    the data destination being
-						    on the ntfs partition.  We
-						    do not need to care about
-						    the data source. */
+	.mmap		= generic_file_mmap,
+	.open		= ntfs_file_open,
+	.splice_read	= generic_file_splice_read,
 };
 
 const struct inode_operations ntfs_file_inode_ops = {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 898b994..1d0c21d 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -28,7 +28,6 @@
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/log2.h>
-#include <linux/aio.h>
 
 #include "aops.h"
 #include "attrib.h"
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1b0463a..8d2bc84 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -29,6 +29,7 @@
 #include <linux/mpage.h>
 #include <linux/quotaops.h>
 #include <linux/blkdev.h>
+#include <linux/uio.h>
 
 #include <cluster/masklog.h>
 
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 6cae155..dd59599 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,7 +22,7 @@
 #ifndef OCFS2_AOPS_H
 #define OCFS2_AOPS_H
 
-#include <linux/aio.h>
+#include <linux/fs.h>
 
 handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
 							 struct page *page,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ba1790e..91f03ce9 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2280,7 +2280,7 @@
 		file->f_path.dentry->d_name.name,
 		(unsigned int)from->nr_segs);	/* GRRRRR */
 
-	if (iocb->ki_nbytes == 0)
+	if (count == 0)
 		return 0;
 
 	appending = file->f_flags & O_APPEND ? 1 : 0;
@@ -2330,8 +2330,7 @@
 	}
 
 	can_do_direct = direct_io;
-	ret = ocfs2_prepare_inode_for_write(file, ppos,
-					    iocb->ki_nbytes, appending,
+	ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending,
 					    &can_do_direct, &has_refcount);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -2339,8 +2338,7 @@
 	}
 
 	if (direct_io && !is_sync_kiocb(iocb))
-		unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes,
-						      *ppos);
+		unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos);
 
 	/*
 	 * We can't complete the direct I/O as requested, fall back to
diff --git a/fs/open.c b/fs/open.c
index 33f9cbf..6a83c47 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -570,6 +570,7 @@
 	uid = make_kuid(current_user_ns(), user);
 	gid = make_kgid(current_user_ns(), group);
 
+retry_deleg:
 	newattrs.ia_valid =  ATTR_CTIME;
 	if (user != (uid_t) -1) {
 		if (!uid_valid(uid))
@@ -586,7 +587,6 @@
 	if (!S_ISDIR(inode->i_mode))
 		newattrs.ia_valid |=
 			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
-retry_deleg:
 	mutex_lock(&inode->i_mutex);
 	error = security_path_chown(path, uid, gid);
 	if (!error)
@@ -988,9 +988,6 @@
 		return ERR_PTR(err);
 	if (flags & O_CREAT)
 		return ERR_PTR(-EINVAL);
-	if (!filename && (flags & O_DIRECTORY))
-		if (!dentry->d_inode->i_op->lookup)
-			return ERR_PTR(-ENOTDIR);
 	return do_file_open_root(dentry, mnt, filename, &op);
 }
 EXPORT_SYMBOL(file_open_root);
diff --git a/fs/pipe.c b/fs/pipe.c
index 21981e5..2d084f2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,7 +21,6 @@
 #include <linux/audit.h>
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 39d1373..44a549b 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -539,6 +539,9 @@
 	mem_address = pdata->mem_address;
 	record_size = pdata->record_size;
 	dump_oops = pdata->dump_oops;
+	ramoops_console_size = pdata->console_size;
+	ramoops_pmsg_size = pdata->pmsg_size;
+	ramoops_ftrace_size = pdata->ftrace_size;
 
 	pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n",
 		cxt->size, (unsigned long long)cxt->phys_addr,
diff --git a/fs/read_write.c b/fs/read_write.c
index 8e1b687..69128b3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/uio.h>
-#include <linux/aio.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
 #include <linux/export.h>
@@ -343,13 +342,10 @@
 
 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = iov_iter_count(iter);
 
 	iter->type |= READ;
 	ret = file->f_op->read_iter(&kiocb, iter);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
-
+	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
 	return ret;
@@ -366,13 +362,10 @@
 
 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = iov_iter_count(iter);
 
 	iter->type |= WRITE;
 	ret = file->f_op->write_iter(&kiocb, iter);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
-
+	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
 	return ret;
@@ -426,11 +419,9 @@
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 
 	ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@@ -446,12 +437,10 @@
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 	iov_iter_init(&iter, READ, &iov, 1, len);
 
 	ret = filp->f_op->read_iter(&kiocb, &iter);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@@ -510,11 +499,9 @@
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 
 	ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@@ -530,12 +517,10 @@
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 	iov_iter_init(&iter, WRITE, &iov, 1, len);
 
 	ret = filp->f_op->write_iter(&kiocb, &iter);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@@ -710,60 +695,47 @@
 }
 EXPORT_SYMBOL(iov_shorten);
 
-static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
-		unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
+static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
+		loff_t *ppos, iter_fn_t fn)
 {
 	struct kiocb kiocb;
-	struct iov_iter iter;
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 
-	iov_iter_init(&iter, rw, iov, nr_segs, len);
-	ret = fn(&kiocb, &iter);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
+	ret = fn(&kiocb, iter);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
 
-static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
-		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+static ssize_t do_sync_readv_writev(struct file *filp, struct iov_iter *iter,
+		loff_t *ppos, iov_fn_t fn)
 {
 	struct kiocb kiocb;
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 
-	ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
+	ret = fn(&kiocb, iter->iov, iter->nr_segs, kiocb.ki_pos);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
 
 /* Do it by hand, with file-ops */
-static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
-		unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
+		loff_t *ppos, io_fn_t fn)
 {
-	struct iovec *vector = iov;
 	ssize_t ret = 0;
 
-	while (nr_segs > 0) {
-		void __user *base;
-		size_t len;
+	while (iov_iter_count(iter)) {
+		struct iovec iovec = iov_iter_iovec(iter);
 		ssize_t nr;
 
-		base = vector->iov_base;
-		len = vector->iov_len;
-		vector++;
-		nr_segs--;
-
-		nr = fn(filp, base, len, ppos);
+		nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
 
 		if (nr < 0) {
 			if (!ret)
@@ -771,8 +743,9 @@
 			break;
 		}
 		ret += nr;
-		if (nr != len)
+		if (nr != iovec.iov_len)
 			break;
+		iov_iter_advance(iter, nr);
 	}
 
 	return ret;
@@ -863,17 +836,20 @@
 	size_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
+	struct iov_iter iter;
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
 	iter_fn_t iter_fn;
 
-	ret = rw_copy_check_uvector(type, uvector, nr_segs,
-				    ARRAY_SIZE(iovstack), iovstack, &iov);
-	if (ret <= 0)
-		goto out;
+	ret = import_iovec(type, uvector, nr_segs,
+			   ARRAY_SIZE(iovstack), &iov, &iter);
+	if (ret < 0)
+		return ret;
 
-	tot_len = ret;
+	tot_len = iov_iter_count(&iter);
+	if (!tot_len)
+		goto out;
 	ret = rw_verify_area(type, file, pos, tot_len);
 	if (ret < 0)
 		goto out;
@@ -891,20 +867,17 @@
 	}
 
 	if (iter_fn)
-		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
-						pos, iter_fn);
+		ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
 	else if (fnv)
-		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-						pos, fnv);
+		ret = do_sync_readv_writev(file, &iter, pos, fnv);
 	else
-		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+		ret = do_loop_readv_writev(file, &iter, pos, fn);
 
 	if (type != READ)
 		file_end_write(file);
 
 out:
-	if (iov != iovstack)
-		kfree(iov);
+	kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
 			fsnotify_access(file);
@@ -1043,17 +1016,20 @@
 	compat_ssize_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
+	struct iov_iter iter;
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
 	iter_fn_t iter_fn;
 
-	ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
-					       UIO_FASTIOV, iovstack, &iov);
-	if (ret <= 0)
-		goto out;
+	ret = compat_import_iovec(type, uvector, nr_segs,
+				  UIO_FASTIOV, &iov, &iter);
+	if (ret < 0)
+		return ret;
 
-	tot_len = ret;
+	tot_len = iov_iter_count(&iter);
+	if (!tot_len)
+		goto out;
 	ret = rw_verify_area(type, file, pos, tot_len);
 	if (ret < 0)
 		goto out;
@@ -1071,20 +1047,17 @@
 	}
 
 	if (iter_fn)
-		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
-						pos, iter_fn);
+		ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
 	else if (fnv)
-		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-						pos, fnv);
+		ret = do_sync_readv_writev(file, &iter, pos, fnv);
 	else
-		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+		ret = do_loop_readv_writev(file, &iter, pos, fn);
 
 	if (type != READ)
 		file_end_write(file);
 
 out:
-	if (iov != iovstack)
-		kfree(iov);
+	kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
 			fsnotify_access(file);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index e72401e..9312b78 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,7 +18,7 @@
 #include <linux/writeback.h>
 #include <linux/quotaops.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
diff --git a/fs/splice.c b/fs/splice.c
index 7968da9..41cbb16 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,7 +32,6 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
-#include <linux/aio.h>
 #include "internal.h"
 
 /*
@@ -1534,34 +1533,29 @@
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
 	struct iov_iter iter;
-	ssize_t count;
 
 	pipe = get_pipe_info(file);
 	if (!pipe)
 		return -EBADF;
 
-	ret = rw_copy_check_uvector(READ, uiov, nr_segs,
-				    ARRAY_SIZE(iovstack), iovstack, &iov);
-	if (ret <= 0)
-		goto out;
+	ret = import_iovec(READ, uiov, nr_segs,
+			   ARRAY_SIZE(iovstack), &iov, &iter);
+	if (ret < 0)
+		return ret;
 
-	count = ret;
-	iov_iter_init(&iter, READ, iov, nr_segs, count);
-
+	sd.total_len = iov_iter_count(&iter);
 	sd.len = 0;
-	sd.total_len = count;
 	sd.flags = flags;
 	sd.u.data = &iter;
 	sd.pos = 0;
 
-	pipe_lock(pipe);
-	ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
-	pipe_unlock(pipe);
+	if (sd.total_len) {
+		pipe_lock(pipe);
+		ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
+		pipe_unlock(pipe);
+	}
 
-out:
-	if (iov != iovstack)
-		kfree(iov);
-
+	kfree(iov);
 	return ret;
 }
 
diff --git a/fs/stat.c b/fs/stat.c
index ae0c3ce..19636af 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -66,7 +66,7 @@
 {
 	int retval;
 
-	retval = security_inode_getattr(path->mnt, path->dentry);
+	retval = security_inode_getattr(path);
 	if (retval)
 		return retval;
 	return vfs_getattr_nosec(path, stat);
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
new file mode 100644
index 0000000..82fa35b
--- /dev/null
+++ b/fs/tracefs/Makefile
@@ -0,0 +1,4 @@
+tracefs-objs	:= inode.o
+
+obj-$(CONFIG_TRACING)	+= tracefs.o
+
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
new file mode 100644
index 0000000..d92bdf3
--- /dev/null
+++ b/fs/tracefs/inode.c
@@ -0,0 +1,650 @@
+/*
+ *  inode.c - part of tracefs, a pseudo file system for activating tracing
+ *
+ * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
+ *
+ *  Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License version
+ *	2 as published by the Free Software Foundation.
+ *
+ * tracefs is the file system that is used by the tracing infrastructure.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/kobject.h>
+#include <linux/namei.h>
+#include <linux/tracefs.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+#include <linux/parser.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
+
+#define TRACEFS_DEFAULT_MODE	0700
+
+static struct vfsmount *tracefs_mount;
+static int tracefs_mount_count;
+static bool tracefs_registered;
+
+static ssize_t default_read_file(struct file *file, char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	return 0;
+}
+
+static ssize_t default_write_file(struct file *file, const char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	return count;
+}
+
+static const struct file_operations tracefs_file_operations = {
+	.read =		default_read_file,
+	.write =	default_write_file,
+	.open =		simple_open,
+	.llseek =	noop_llseek,
+};
+
+static struct tracefs_dir_ops {
+	int (*mkdir)(const char *name);
+	int (*rmdir)(const char *name);
+} tracefs_ops;
+
+static char *get_dname(struct dentry *dentry)
+{
+	const char *dname;
+	char *name;
+	int len = dentry->d_name.len;
+
+	dname = dentry->d_name.name;
+	name = kmalloc(len + 1, GFP_KERNEL);
+	if (!name)
+		return NULL;
+	memcpy(name, dname, len);
+	name[len] = 0;
+	return name;
+}
+
+static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
+{
+	char *name;
+	int ret;
+
+	name = get_dname(dentry);
+	if (!name)
+		return -ENOMEM;
+
+	/*
+	 * The mkdir call can call the generic functions that create
+	 * the files within the tracefs system. It is up to the individual
+	 * mkdir routine to handle races.
+	 */
+	mutex_unlock(&inode->i_mutex);
+	ret = tracefs_ops.mkdir(name);
+	mutex_lock(&inode->i_mutex);
+
+	kfree(name);
+
+	return ret;
+}
+
+static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
+{
+	char *name;
+	int ret;
+
+	name = get_dname(dentry);
+	if (!name)
+		return -ENOMEM;
+
+	/*
+	 * The rmdir call can call the generic functions that create
+	 * the files within the tracefs system. It is up to the individual
+	 * rmdir routine to handle races.
+	 * This time we need to unlock not only the parent (inode) but
+	 * also the directory that is being deleted.
+	 */
+	mutex_unlock(&inode->i_mutex);
+	mutex_unlock(&dentry->d_inode->i_mutex);
+
+	ret = tracefs_ops.rmdir(name);
+
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	kfree(name);
+
+	return ret;
+}
+
+static const struct inode_operations tracefs_dir_inode_operations = {
+	.lookup		= simple_lookup,
+	.mkdir		= tracefs_syscall_mkdir,
+	.rmdir		= tracefs_syscall_rmdir,
+};
+
+static struct inode *tracefs_get_inode(struct super_block *sb)
+{
+	struct inode *inode = new_inode(sb);
+	if (inode) {
+		inode->i_ino = get_next_ino();
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	}
+	return inode;
+}
+
+struct tracefs_mount_opts {
+	kuid_t uid;
+	kgid_t gid;
+	umode_t mode;
+};
+
+enum {
+	Opt_uid,
+	Opt_gid,
+	Opt_mode,
+	Opt_err
+};
+
+static const match_table_t tokens = {
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_mode, "mode=%o"},
+	{Opt_err, NULL}
+};
+
+struct tracefs_fs_info {
+	struct tracefs_mount_opts mount_opts;
+};
+
+static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+{
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	int token;
+	kuid_t uid;
+	kgid_t gid;
+	char *p;
+
+	opts->mode = TRACEFS_DEFAULT_MODE;
+
+	while ((p = strsep(&data, ",")) != NULL) {
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(uid))
+				return -EINVAL;
+			opts->uid = uid;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(gid))
+				return -EINVAL;
+			opts->gid = gid;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->mode = option & S_IALLUGO;
+			break;
+		/*
+		 * We might like to report bad mount options here;
+		 * but traditionally tracefs has ignored all mount options
+		 */
+		}
+	}
+
+	return 0;
+}
+
+static int tracefs_apply_options(struct super_block *sb)
+{
+	struct tracefs_fs_info *fsi = sb->s_fs_info;
+	struct inode *inode = sb->s_root->d_inode;
+	struct tracefs_mount_opts *opts = &fsi->mount_opts;
+
+	inode->i_mode &= ~S_IALLUGO;
+	inode->i_mode |= opts->mode;
+
+	inode->i_uid = opts->uid;
+	inode->i_gid = opts->gid;
+
+	return 0;
+}
+
+static int tracefs_remount(struct super_block *sb, int *flags, char *data)
+{
+	int err;
+	struct tracefs_fs_info *fsi = sb->s_fs_info;
+
+	sync_filesystem(sb);
+	err = tracefs_parse_options(data, &fsi->mount_opts);
+	if (err)
+		goto fail;
+
+	tracefs_apply_options(sb);
+
+fail:
+	return err;
+}
+
+static int tracefs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
+	struct tracefs_mount_opts *opts = &fsi->mount_opts;
+
+	if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, opts->uid));
+	if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, opts->gid));
+	if (opts->mode != TRACEFS_DEFAULT_MODE)
+		seq_printf(m, ",mode=%o", opts->mode);
+
+	return 0;
+}
+
+static const struct super_operations tracefs_super_operations = {
+	.statfs		= simple_statfs,
+	.remount_fs	= tracefs_remount,
+	.show_options	= tracefs_show_options,
+};
+
+static int trace_fill_super(struct super_block *sb, void *data, int silent)
+{
+	static struct tree_descr trace_files[] = {{""}};
+	struct tracefs_fs_info *fsi;
+	int err;
+
+	save_mount_options(sb, data);
+
+	fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
+	sb->s_fs_info = fsi;
+	if (!fsi) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	err = tracefs_parse_options(data, &fsi->mount_opts);
+	if (err)
+		goto fail;
+
+	err  =  simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
+	if (err)
+		goto fail;
+
+	sb->s_op = &tracefs_super_operations;
+
+	tracefs_apply_options(sb);
+
+	return 0;
+
+fail:
+	kfree(fsi);
+	sb->s_fs_info = NULL;
+	return err;
+}
+
+static struct dentry *trace_mount(struct file_system_type *fs_type,
+			int flags, const char *dev_name,
+			void *data)
+{
+	return mount_single(fs_type, flags, data, trace_fill_super);
+}
+
+static struct file_system_type trace_fs_type = {
+	.owner =	THIS_MODULE,
+	.name =		"tracefs",
+	.mount =	trace_mount,
+	.kill_sb =	kill_litter_super,
+};
+MODULE_ALIAS_FS("tracefs");
+
+static struct dentry *start_creating(const char *name, struct dentry *parent)
+{
+	struct dentry *dentry;
+	int error;
+
+	pr_debug("tracefs: creating file '%s'\n",name);
+
+	error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+			      &tracefs_mount_count);
+	if (error)
+		return ERR_PTR(error);
+
+	/* If the parent is not specified, we create it in the root.
+	 * We need the root dentry to do this, which is in the super
+	 * block. A pointer to that is in the struct vfsmount that we
+	 * have around.
+	 */
+	if (!parent)
+		parent = tracefs_mount->mnt_root;
+
+	mutex_lock(&parent->d_inode->i_mutex);
+	dentry = lookup_one_len(name, parent, strlen(name));
+	if (!IS_ERR(dentry) && dentry->d_inode) {
+		dput(dentry);
+		dentry = ERR_PTR(-EEXIST);
+	}
+	if (IS_ERR(dentry))
+		mutex_unlock(&parent->d_inode->i_mutex);
+	return dentry;
+}
+
+static struct dentry *failed_creating(struct dentry *dentry)
+{
+	mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+	dput(dentry);
+	simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+	return NULL;
+}
+
+static struct dentry *end_creating(struct dentry *dentry)
+{
+	mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+	return dentry;
+}
+
+/**
+ * tracefs_create_file - create a file in the tracefs filesystem
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is NULL, then the
+ *          file will be created in the root of the tracefs filesystem.
+ * @data: a pointer to something that the caller will want to get to later
+ *        on.  The inode.i_private pointer will point to this value on
+ *        the open() call.
+ * @fops: a pointer to a struct file_operations that should be used for
+ *        this file.
+ *
+ * This is the basic "create a file" function for tracefs.  It allows for a
+ * wide range of flexibility in creating a file, or a directory (if you want
+ * to create a directory, the tracefs_create_dir() function is
+ * recommended to be used instead.)
+ *
+ * This function will return a pointer to a dentry if it succeeds.  This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.)  If an error occurs, %NULL will be returned.
+ *
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *tracefs_create_file(const char *name, umode_t mode,
+				   struct dentry *parent, void *data,
+				   const struct file_operations *fops)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	if (!(mode & S_IFMT))
+		mode |= S_IFREG;
+	BUG_ON(!S_ISREG(mode));
+	dentry = start_creating(name, parent);
+
+	if (IS_ERR(dentry))
+		return NULL;
+
+	inode = tracefs_get_inode(dentry->d_sb);
+	if (unlikely(!inode))
+		return failed_creating(dentry);
+
+	inode->i_mode = mode;
+	inode->i_fop = fops ? fops : &tracefs_file_operations;
+	inode->i_private = data;
+	d_instantiate(dentry, inode);
+	fsnotify_create(dentry->d_parent->d_inode, dentry);
+	return end_creating(dentry);
+}
+
+static struct dentry *__create_dir(const char *name, struct dentry *parent,
+				   const struct inode_operations *ops)
+{
+	struct dentry *dentry = start_creating(name, parent);
+	struct inode *inode;
+
+	if (IS_ERR(dentry))
+		return NULL;
+
+	inode = tracefs_get_inode(dentry->d_sb);
+	if (unlikely(!inode))
+		return failed_creating(dentry);
+
+	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+	inode->i_op = ops;
+	inode->i_fop = &simple_dir_operations;
+
+	/* directory inodes start off with i_nlink == 2 (for "." entry) */
+	inc_nlink(inode);
+	d_instantiate(dentry, inode);
+	inc_nlink(dentry->d_parent->d_inode);
+	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+	return end_creating(dentry);
+}
+
+/**
+ * tracefs_create_dir - create a directory in the tracefs filesystem
+ * @name: a pointer to a string containing the name of the directory to
+ *        create.
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is NULL, then the
+ *          directory will be created in the root of the tracefs filesystem.
+ *
+ * This function creates a directory in tracefs with the given name.
+ *
+ * This function will return a pointer to a dentry if it succeeds.  This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed. If an error occurs, %NULL will be returned.
+ *
+ * If tracing is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
+{
+	return __create_dir(name, parent, &simple_dir_inode_operations);
+}
+
+/**
+ * tracefs_create_instance_dir - create the tracing instances directory
+ * @name: The name of the instances directory to create
+ * @parent: The parent directory that the instances directory will exist
+ * @mkdir: The function to call when a mkdir is performed.
+ * @rmdir: The function to call when a rmdir is performed.
+ *
+ * Only one instances directory is allowed.
+ *
+ * The instances directory is special as it allows for mkdir and rmdir to
+ * to be done by userspace. When a mkdir or rmdir is performed, the inode
+ * locks are released and the methhods passed in (@mkdir and @rmdir) are
+ * called without locks and with the name of the directory being created
+ * within the instances directory.
+ *
+ * Returns the dentry of the instances directory.
+ */
+struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
+					  int (*mkdir)(const char *name),
+					  int (*rmdir)(const char *name))
+{
+	struct dentry *dentry;
+
+	/* Only allow one instance of the instances directory. */
+	if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
+		return NULL;
+
+	dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
+	if (!dentry)
+		return NULL;
+
+	tracefs_ops.mkdir = mkdir;
+	tracefs_ops.rmdir = rmdir;
+
+	return dentry;
+}
+
+static inline int tracefs_positive(struct dentry *dentry)
+{
+	return dentry->d_inode && !d_unhashed(dentry);
+}
+
+static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
+{
+	int ret = 0;
+
+	if (tracefs_positive(dentry)) {
+		if (dentry->d_inode) {
+			dget(dentry);
+			switch (dentry->d_inode->i_mode & S_IFMT) {
+			case S_IFDIR:
+				ret = simple_rmdir(parent->d_inode, dentry);
+				break;
+			default:
+				simple_unlink(parent->d_inode, dentry);
+				break;
+			}
+			if (!ret)
+				d_delete(dentry);
+			dput(dentry);
+		}
+	}
+	return ret;
+}
+
+/**
+ * tracefs_remove - removes a file or directory from the tracefs filesystem
+ * @dentry: a pointer to a the dentry of the file or directory to be
+ *          removed.
+ *
+ * This function removes a file or directory in tracefs that was previously
+ * created with a call to another tracefs function (like
+ * tracefs_create_file() or variants thereof.)
+ */
+void tracefs_remove(struct dentry *dentry)
+{
+	struct dentry *parent;
+	int ret;
+
+	if (IS_ERR_OR_NULL(dentry))
+		return;
+
+	parent = dentry->d_parent;
+	if (!parent || !parent->d_inode)
+		return;
+
+	mutex_lock(&parent->d_inode->i_mutex);
+	ret = __tracefs_remove(dentry, parent);
+	mutex_unlock(&parent->d_inode->i_mutex);
+	if (!ret)
+		simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+}
+
+/**
+ * tracefs_remove_recursive - recursively removes a directory
+ * @dentry: a pointer to a the dentry of the directory to be removed.
+ *
+ * This function recursively removes a directory tree in tracefs that
+ * was previously created with a call to another tracefs function
+ * (like tracefs_create_file() or variants thereof.)
+ */
+void tracefs_remove_recursive(struct dentry *dentry)
+{
+	struct dentry *child, *parent;
+
+	if (IS_ERR_OR_NULL(dentry))
+		return;
+
+	parent = dentry->d_parent;
+	if (!parent || !parent->d_inode)
+		return;
+
+	parent = dentry;
+ down:
+	mutex_lock(&parent->d_inode->i_mutex);
+ loop:
+	/*
+	 * The parent->d_subdirs is protected by the d_lock. Outside that
+	 * lock, the child can be unlinked and set to be freed which can
+	 * use the d_u.d_child as the rcu head and corrupt this list.
+	 */
+	spin_lock(&parent->d_lock);
+	list_for_each_entry(child, &parent->d_subdirs, d_child) {
+		if (!tracefs_positive(child))
+			continue;
+
+		/* perhaps simple_empty(child) makes more sense */
+		if (!list_empty(&child->d_subdirs)) {
+			spin_unlock(&parent->d_lock);
+			mutex_unlock(&parent->d_inode->i_mutex);
+			parent = child;
+			goto down;
+		}
+
+		spin_unlock(&parent->d_lock);
+
+		if (!__tracefs_remove(child, parent))
+			simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+
+		/*
+		 * The parent->d_lock protects agaist child from unlinking
+		 * from d_subdirs. When releasing the parent->d_lock we can
+		 * no longer trust that the next pointer is valid.
+		 * Restart the loop. We'll skip this one with the
+		 * tracefs_positive() check.
+		 */
+		goto loop;
+	}
+	spin_unlock(&parent->d_lock);
+
+	mutex_unlock(&parent->d_inode->i_mutex);
+	child = parent;
+	parent = parent->d_parent;
+	mutex_lock(&parent->d_inode->i_mutex);
+
+	if (child != dentry)
+		/* go up */
+		goto loop;
+
+	if (!__tracefs_remove(child, parent))
+		simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+	mutex_unlock(&parent->d_inode->i_mutex);
+}
+
+/**
+ * tracefs_initialized - Tells whether tracefs has been registered
+ */
+bool tracefs_initialized(void)
+{
+	return tracefs_registered;
+}
+
+static struct kobject *trace_kobj;
+
+static int __init tracefs_init(void)
+{
+	int retval;
+
+	trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
+	if (!trace_kobj)
+		return -EINVAL;
+
+	retval = register_filesystem(&trace_fs_type);
+	if (!retval)
+		tracefs_registered = true;
+
+	return retval;
+}
+core_initcall(tracefs_init);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index e627c0a..c3d15fe 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -50,7 +50,6 @@
  */
 
 #include "ubifs.h"
-#include <linux/aio.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/slab.h>
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 08f3555..7f885cc 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,7 +34,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
@@ -122,7 +122,7 @@
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	int err, pos;
-	size_t count = iocb->ki_nbytes;
+	size_t count = iov_iter_count(from);
 	struct udf_inode_info *iinfo = UDF_I(inode);
 
 	mutex_lock(&inode->i_mutex);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index a445d59..9c1fbd2 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -38,7 +38,7 @@
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a9b7a1..4f8cdc5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,7 +31,6 @@
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
-#include <linux/aio.h>
 #include <linux/gfp.h>
 #include <linux/mpage.h>
 #include <linux/pagevec.h>
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a2e1cb8..f44212f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,7 +38,6 @@
 #include "xfs_icache.h"
 #include "xfs_pnfs.h"
 
-#include <linux/aio.h>
 #include <linux/dcache.h>
 #include <linux/falloc.h>
 #include <linux/pagevec.h>
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index ac78910..f8e8b34 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -124,7 +124,10 @@
 #define FTRACE_EVENTS()	. = ALIGN(8);					\
 			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
 			*(_ftrace_events)				\
-			VMLINUX_SYMBOL(__stop_ftrace_events) = .;
+			VMLINUX_SYMBOL(__stop_ftrace_events) = .;	\
+			VMLINUX_SYMBOL(__start_ftrace_enum_maps) = .;	\
+			*(_ftrace_enum_map)				\
+			VMLINUX_SYMBOL(__stop_ftrace_enum_maps) = .;
 #else
 #define FTRACE_EVENTS()
 #endif
diff --git a/include/linux/aio.h b/include/linux/aio.h
index d9c92da..9eb42db 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -1,86 +1,23 @@
 #ifndef __LINUX__AIO_H
 #define __LINUX__AIO_H
 
-#include <linux/list.h>
-#include <linux/workqueue.h>
 #include <linux/aio_abi.h>
-#include <linux/uio.h>
-#include <linux/rcupdate.h>
-
-#include <linux/atomic.h>
 
 struct kioctx;
 struct kiocb;
+struct mm_struct;
 
 #define KIOCB_KEY		0
 
-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED		((void *) (~0ULL))
-
 typedef int (kiocb_cancel_fn)(struct kiocb *);
 
-struct kiocb {
-	struct file		*ki_filp;
-	struct kioctx		*ki_ctx;	/* NULL for sync ops */
-	kiocb_cancel_fn		*ki_cancel;
-	void			*private;
-
-	union {
-		void __user		*user;
-		struct task_struct	*tsk;
-	} ki_obj;
-
-	__u64			ki_user_data;	/* user's data for completion */
-	loff_t			ki_pos;
-	size_t			ki_nbytes;	/* copy of iocb->aio_nbytes */
-
-	struct list_head	ki_list;	/* the aio core uses this
-						 * for cancellation */
-
-	/*
-	 * If the aio_resfd field of the userspace iocb is not zero,
-	 * this is the underlying eventfd context to deliver events to.
-	 */
-	struct eventfd_ctx	*ki_eventfd;
-};
-
-static inline bool is_sync_kiocb(struct kiocb *kiocb)
-{
-	return kiocb->ki_ctx == NULL;
-}
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
-	*kiocb = (struct kiocb) {
-			.ki_ctx = NULL,
-			.ki_filp = filp,
-			.ki_obj.tsk = current,
-		};
-}
-
 /* prototypes */
 #ifdef CONFIG_AIO
-extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
-extern void aio_complete(struct kiocb *iocb, long res, long res2);
-struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
 extern long do_io_submit(aio_context_t ctx_id, long nr,
 			 struct iocb __user *__user *iocbpp, bool compat);
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
-static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
-static inline void aio_complete(struct kiocb *iocb, long res, long res2) { }
-struct mm_struct;
 static inline void exit_aio(struct mm_struct *mm) { }
 static inline long do_io_submit(aio_context_t ctx_id, long nr,
 				struct iocb __user * __user *iocbpp,
@@ -89,11 +26,6 @@
 				       kiocb_cancel_fn *cancel) { }
 #endif /* CONFIG_AIO */
 
-static inline struct kiocb *list_kiocb(struct list_head *h)
-{
-	return list_entry(h, struct kiocb, ki_list);
-}
-
 /* for sysctl: */
 extern unsigned long aio_nr;
 extern unsigned long aio_max_nr;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bbfceb7..c2e2111 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -113,8 +113,6 @@
 	enum bpf_prog_type type;
 };
 
-void bpf_register_prog_type(struct bpf_prog_type_list *tl);
-
 struct bpf_prog;
 
 struct bpf_prog_aux {
@@ -129,11 +127,25 @@
 };
 
 #ifdef CONFIG_BPF_SYSCALL
+void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+
 void bpf_prog_put(struct bpf_prog *prog);
-#else
-static inline void bpf_prog_put(struct bpf_prog *prog) {}
-#endif
 struct bpf_prog *bpf_prog_get(u32 ufd);
+#else
+static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
+{
+}
+
+static inline struct bpf_prog *bpf_prog_get(u32 ufd)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void bpf_prog_put(struct bpf_prog *prog)
+{
+}
+#endif
+
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
 
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 37b81bd..2821838 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -10,6 +10,8 @@
 #ifdef CONFIG_CONTEXT_TRACKING
 extern void context_tracking_cpu_set(int cpu);
 
+extern void context_tracking_enter(enum ctx_state state);
+extern void context_tracking_exit(enum ctx_state state);
 extern void context_tracking_user_enter(void);
 extern void context_tracking_user_exit(void);
 extern void __context_tracking_task_switch(struct task_struct *prev,
@@ -35,7 +37,8 @@
 		return 0;
 
 	prev_ctx = this_cpu_read(context_tracking.state);
-	context_tracking_user_exit();
+	if (prev_ctx != CONTEXT_KERNEL)
+		context_tracking_exit(prev_ctx);
 
 	return prev_ctx;
 }
@@ -43,8 +46,8 @@
 static inline void exception_exit(enum ctx_state prev_ctx)
 {
 	if (context_tracking_is_enabled()) {
-		if (prev_ctx == IN_USER)
-			context_tracking_user_enter();
+		if (prev_ctx != CONTEXT_KERNEL)
+			context_tracking_enter(prev_ctx);
 	}
 }
 
@@ -78,10 +81,16 @@
 		vtime_guest_enter(current);
 	else
 		current->flags |= PF_VCPU;
+
+	if (context_tracking_is_enabled())
+		context_tracking_enter(CONTEXT_GUEST);
 }
 
 static inline void guest_exit(void)
 {
+	if (context_tracking_is_enabled())
+		context_tracking_exit(CONTEXT_GUEST);
+
 	if (vtime_accounting_enabled())
 		vtime_guest_exit(current);
 	else
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 97a8122..6b7b96a 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -13,8 +13,9 @@
 	 */
 	bool active;
 	enum ctx_state {
-		IN_KERNEL = 0,
-		IN_USER,
+		CONTEXT_KERNEL = 0,
+		CONTEXT_USER,
+		CONTEXT_GUEST,
 	} state;
 };
 
@@ -34,11 +35,13 @@
 
 static inline bool context_tracking_in_user(void)
 {
-	return __this_cpu_read(context_tracking.state) == IN_USER;
+	return __this_cpu_read(context_tracking.state) == CONTEXT_USER;
 }
 #else
 static inline bool context_tracking_in_user(void) { return false; }
 static inline bool context_tracking_active(void) { return false; }
+static inline bool context_tracking_is_enabled(void) { return false; }
+static inline bool context_tracking_cpu_is_enabled(void) { return false; }
 #endif /* CONFIG_CONTEXT_TRACKING */
 
 #endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 4260e85..c0fb6b1 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -73,6 +73,7 @@
 	/* migration should happen before other stuff but after perf */
 	CPU_PRI_PERF		= 20,
 	CPU_PRI_MIGRATION	= 10,
+	CPU_PRI_SMPBOOT		= 9,
 	/* bring up workqueues before normal notifiers and down after */
 	CPU_PRI_WORKQUEUE_UP	= 5,
 	CPU_PRI_WORKQUEUE_DOWN	= -5,
@@ -95,6 +96,10 @@
 					* Called on the new cpu, just before
 					* enabling interrupts. Must not sleep,
 					* must not fail */
+#define CPU_DYING_IDLE		0x000B /* CPU (unsigned)v dying, reached
+					* idle loop. */
+#define CPU_BROKEN		0x000C /* CPU (unsigned)v did not die properly,
+					* perhaps due to preemption. */
 
 /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend
  * operation in progress
@@ -161,6 +166,7 @@
 }
 #endif
 
+void smpboot_thread_init(void);
 int cpu_up(unsigned int cpu);
 void notify_cpu_starting(unsigned int cpu);
 extern void cpu_maps_update_begin(void);
@@ -208,6 +214,10 @@
 {
 }
 
+static inline void smpboot_thread_init(void)
+{
+}
+
 #endif /* CONFIG_SMP */
 extern struct bus_type cpu_subsys;
 
@@ -271,4 +281,14 @@
 void arch_cpu_idle_exit(void);
 void arch_cpu_idle_dead(void);
 
+DECLARE_PER_CPU(bool, cpu_dead_idle);
+
+int cpu_report_state(int cpu);
+int cpu_check_up_prepare(int cpu);
+void cpu_set_state_online(int cpu);
+#ifdef CONFIG_HOTPLUG_CPU
+bool cpu_wait_death(unsigned int cpu, int seconds);
+bool cpu_report_death(void);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 52cc449..d502e54 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -314,6 +314,28 @@
 struct address_space;
 struct writeback_control;
 
+#define IOCB_EVENTFD		(1 << 0)
+
+struct kiocb {
+	struct file		*ki_filp;
+	loff_t			ki_pos;
+	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
+	void			*private;
+	int			ki_flags;
+};
+
+static inline bool is_sync_kiocb(struct kiocb *kiocb)
+{
+	return kiocb->ki_complete == NULL;
+}
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+	*kiocb = (struct kiocb) {
+		.ki_filp = filp,
+	};
+}
+
 /*
  * "descriptor" for what we're up to with a read.
  * This allows us to use the same read code yet
@@ -2145,7 +2167,7 @@
 	const __user char	*uptr;	/* original userland pointer */
 	struct audit_names	*aname;
 	int			refcnt;
-	bool			separate; /* should "name" be freed? */
+	const char		iname[];
 };
 
 extern long vfs_truncate(struct path *, loff_t);
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c674ee8..46e83c2 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -13,6 +13,7 @@
 struct trace_buffer;
 struct tracer;
 struct dentry;
+struct bpf_prog;
 
 struct trace_print_flags {
 	unsigned long		mask;
@@ -202,7 +203,7 @@
 struct ftrace_event_call;
 
 struct ftrace_event_class {
-	char			*system;
+	const char		*system;
 	void			*probe;
 #ifdef CONFIG_PERF_EVENTS
 	void			*perf_probe;
@@ -252,6 +253,7 @@
 	TRACE_EVENT_FL_WAS_ENABLED_BIT,
 	TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
 	TRACE_EVENT_FL_TRACEPOINT_BIT,
+	TRACE_EVENT_FL_KPROBE_BIT,
 };
 
 /*
@@ -265,6 +267,7 @@
  *                     it is best to clear the buffers that used it).
  *  USE_CALL_FILTER - For ftrace internal events, don't use file filter
  *  TRACEPOINT    - Event is a tracepoint
+ *  KPROBE        - Event is a kprobe
  */
 enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -274,6 +277,7 @@
 	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
 	TRACE_EVENT_FL_USE_CALL_FILTER	= (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
 	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
+	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
 };
 
 struct ftrace_event_call {
@@ -285,7 +289,7 @@
 		struct tracepoint	*tp;
 	};
 	struct trace_event	event;
-	const char		*print_fmt;
+	char			*print_fmt;
 	struct event_filter	*filter;
 	void			*mod;
 	void			*data;
@@ -303,6 +307,7 @@
 #ifdef CONFIG_PERF_EVENTS
 	int				perf_refcount;
 	struct hlist_head __percpu	*perf_events;
+	struct bpf_prog			*prog;
 
 	int	(*perf_perm)(struct ftrace_event_call *,
 			     struct perf_event *);
@@ -548,6 +553,15 @@
 		event_triggers_post_call(file, tt);
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
+#else
+static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+	return 1;
+}
+#endif
+
 enum {
 	FILTER_OTHER = 0,
 	FILTER_STATIC_STRING,
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 4173a8f..0408421 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -49,19 +49,43 @@
 };
 
 /**
+ * struct sensor_hub_pending - Synchronous read pending information
+ * @status:		Pending status true/false.
+ * @ready:		Completion synchronization data.
+ * @usage_id:		Usage id for physical device, E.g. Gyro usage id.
+ * @attr_usage_id:	Usage Id of a field, E.g. X-AXIS for a gyro.
+ * @raw_size:		Response size for a read request.
+ * @raw_data:		Place holder for received response.
+ */
+struct sensor_hub_pending {
+	bool status;
+	struct completion ready;
+	u32 usage_id;
+	u32 attr_usage_id;
+	int raw_size;
+	u8  *raw_data;
+};
+
+/**
  * struct hid_sensor_hub_device - Stores the hub instance data
  * @hdev:		Stores the hid instance.
  * @vendor_id:		Vendor id of hub device.
  * @product_id:		Product id of hub device.
+ * @usage:		Usage id for this hub device instance.
  * @start_collection_index: Starting index for a phy type collection
  * @end_collection_index: Last index for a phy type collection
+ * @mutex:		synchronizing mutex.
+ * @pending:		Holds information of pending sync read request.
  */
 struct hid_sensor_hub_device {
 	struct hid_device *hdev;
 	u32 vendor_id;
 	u32 product_id;
+	u32 usage;
 	int start_collection_index;
 	int end_collection_index;
+	struct mutex mutex;
+	struct sensor_hub_pending pending;
 };
 
 /**
@@ -152,40 +176,51 @@
 * @usage_id:	Attribute usage id of parent physical device as per spec
 * @attr_usage_id:	Attribute usage id as per spec
 * @report_id:	Report id to look for
+* @flag:      Synchronous or asynchronous read
 *
-* Issues a synchronous read request for an input attribute. Returns
-* data upto 32 bits. Since client can get events, so this call should
-* not be used for data paths, this will impact performance.
+* Issues a synchronous or asynchronous read request for an input attribute.
+* Returns data upto 32 bits.
 */
 
+enum sensor_hub_read_flags {
+	SENSOR_HUB_SYNC,
+	SENSOR_HUB_ASYNC,
+};
+
 int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
-			u32 usage_id,
-			u32 attr_usage_id, u32 report_id);
+ 					u32 usage_id,
+ 					u32 attr_usage_id, u32 report_id,
+ 					enum sensor_hub_read_flags flag
+);
+
 /**
 * sensor_hub_set_feature() - Feature set request
 * @hsdev:	Hub device instance.
 * @report_id:	Report id to look for
 * @field_index:	Field index inside a report
-* @value:	Value to set
+* @buffer_size: size of the buffer
+* @buffer:	buffer to use in the feature set
 *
 * Used to set a field in feature report. For example this can set polling
 * interval, sensitivity, activate/deactivate state.
 */
 int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
-			u32 field_index, s32 value);
+			   u32 field_index, int buffer_size, void *buffer);
 
 /**
 * sensor_hub_get_feature() - Feature get request
 * @hsdev:	Hub device instance.
 * @report_id:	Report id to look for
 * @field_index:	Field index inside a report
-* @value:	Place holder for return value
+* @buffer_size:	size of the buffer
+* @buffer:	buffer to copy output
 *
 * Used to get a field in feature report. For example this can get polling
-* interval, sensitivity, activate/deactivate state.
+* interval, sensitivity, activate/deactivate state. On success it returns
+* number of bytes copied to buffer. On failure, it returns value < 0.
 */
 int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
-			u32 field_index, s32 *value);
+			   u32 field_index, int buffer_size, void *buffer);
 
 /* hid-sensor-attributes */
 
diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index 109f0e6..f2ee90a 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -21,6 +21,8 @@
 
 #define HID_MAX_PHY_DEVICES					0xFF
 
+#define HID_USAGE_SENSOR_COLLECTION				0x200001
+
 /* Accel 3D (200073) */
 #define HID_USAGE_SENSOR_ACCEL_3D				0x200073
 #define HID_USAGE_SENSOR_DATA_ACCELERATION			0x200452
diff --git a/include/linux/hid.h b/include/linux/hid.h
index f94cf28..176b436 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -159,6 +159,7 @@
 #define HID_UP_LED		0x00080000
 #define HID_UP_BUTTON		0x00090000
 #define HID_UP_ORDINAL		0x000a0000
+#define HID_UP_TELEPHONY	0x000b0000
 #define HID_UP_CONSUMER		0x000c0000
 #define HID_UP_DIGITIZER	0x000d0000
 #define HID_UP_PID		0x000f0000
@@ -269,6 +270,7 @@
 #define HID_DG_DEVICEINDEX	0x000d0053
 #define HID_DG_CONTACTCOUNT	0x000d0054
 #define HID_DG_CONTACTMAX	0x000d0055
+#define HID_DG_BUTTONTYPE	0x000d0059
 #define HID_DG_BARRELSWITCH2	0x000d005a
 #define HID_DG_TOOLSERIALNUMBER	0x000d005b
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 82af5d0..ad45054 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -779,7 +779,8 @@
 	 * one time slice). Lets treat guest mode as quiescent state, just like
 	 * we do with user-mode execution.
 	 */
-	rcu_virt_note_context_switch(smp_processor_id());
+	if (!context_tracking_cpu_is_enabled())
+		rcu_virt_note_context_switch(smp_processor_id());
 }
 
 static inline void kvm_guest_exit(void)
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 95023fd..ee6dbb3 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -123,10 +123,10 @@
 	enum klp_state state;
 };
 
-extern int klp_register_patch(struct klp_patch *);
-extern int klp_unregister_patch(struct klp_patch *);
-extern int klp_enable_patch(struct klp_patch *);
-extern int klp_disable_patch(struct klp_patch *);
+int klp_register_patch(struct klp_patch *);
+int klp_unregister_patch(struct klp_patch *);
+int klp_enable_patch(struct klp_patch *);
+int klp_disable_patch(struct klp_patch *);
 
 #endif /* CONFIG_LIVEPATCH */
 
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 74ab231..066ba41 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -531,8 +531,13 @@
 # define might_lock_read(lock) do { } while (0)
 #endif
 
-#ifdef CONFIG_PROVE_RCU
+#ifdef CONFIG_LOCKDEP
 void lockdep_rcu_suspicious(const char *file, const int line, const char *s);
+#else
+static inline void
+lockdep_rcu_suspicious(const char *file, const int line, const char *s)
+{
+}
 #endif
 
 #endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/module.h b/include/linux/module.h
index b03485b..c883b86 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -338,6 +338,8 @@
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call **trace_events;
 	unsigned int num_trace_events;
+	struct trace_enum_map **trace_enums;
+	unsigned int num_trace_enums;
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 	unsigned int num_ftrace_callsites;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2b62198..61992cf 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -53,6 +53,7 @@
 #include <linux/sysfs.h>
 #include <linux/perf_regs.h>
 #include <linux/workqueue.h>
+#include <linux/cgroup.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -118,10 +119,19 @@
 			struct hrtimer	hrtimer;
 		};
 		struct { /* tracepoint */
-			struct task_struct	*tp_target;
 			/* for tp_event->class */
 			struct list_head	tp_list;
 		};
+		struct { /* intel_cqm */
+			int			cqm_state;
+			int			cqm_rmid;
+			struct list_head	cqm_events_entry;
+			struct list_head	cqm_groups_entry;
+			struct list_head	cqm_group_entry;
+		};
+		struct { /* itrace */
+			int			itrace_started;
+		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		struct { /* breakpoint */
 			/*
@@ -129,12 +139,12 @@
 			 * problem hw_breakpoint has with context
 			 * creation and event initalization.
 			 */
-			struct task_struct		*bp_target;
 			struct arch_hw_breakpoint	info;
 			struct list_head		bp_list;
 		};
 #endif
 	};
+	struct task_struct		*target;
 	int				state;
 	local64_t			prev_count;
 	u64				sample_period;
@@ -166,6 +176,11 @@
  * pmu::capabilities flags
  */
 #define PERF_PMU_CAP_NO_INTERRUPT		0x01
+#define PERF_PMU_CAP_NO_NMI			0x02
+#define PERF_PMU_CAP_AUX_NO_SG			0x04
+#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF		0x08
+#define PERF_PMU_CAP_EXCLUSIVE			0x10
+#define PERF_PMU_CAP_ITRACE			0x20
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -186,6 +201,7 @@
 
 	int * __percpu			pmu_disable_count;
 	struct perf_cpu_context * __percpu pmu_cpu_context;
+	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
 	int				task_ctx_nr;
 	int				hrtimer_interval_ms;
 
@@ -262,9 +278,32 @@
 	int (*event_idx)		(struct perf_event *event); /*optional */
 
 	/*
-	 * flush branch stack on context-switches (needed in cpu-wide mode)
+	 * context-switches callback
 	 */
-	void (*flush_branch_stack)	(void);
+	void (*sched_task)		(struct perf_event_context *ctx,
+					bool sched_in);
+	/*
+	 * PMU specific data size
+	 */
+	size_t				task_ctx_size;
+
+
+	/*
+	 * Return the count value for a counter.
+	 */
+	u64 (*count)			(struct perf_event *event); /*optional*/
+
+	/*
+	 * Set up pmu-private data structures for an AUX area
+	 */
+	void *(*setup_aux)		(int cpu, void **pages,
+					 int nr_pages, bool overwrite);
+					/* optional */
+
+	/*
+	 * Free pmu-private AUX data structures
+	 */
+	void (*free_aux)		(void *aux); /* optional */
 };
 
 /**
@@ -300,6 +339,7 @@
 #define PERF_ATTACH_CONTEXT	0x01
 #define PERF_ATTACH_GROUP	0x02
 #define PERF_ATTACH_TASK	0x04
+#define PERF_ATTACH_TASK_DATA	0x08
 
 struct perf_cgroup;
 struct ring_buffer;
@@ -438,6 +478,7 @@
 	struct pid_namespace		*ns;
 	u64				id;
 
+	u64				(*clock)(void);
 	perf_overflow_handler_t		overflow_handler;
 	void				*overflow_handler_context;
 
@@ -504,7 +545,7 @@
 	u64				generation;
 	int				pin_count;
 	int				nr_cgroups;	 /* cgroup evts */
-	int				nr_branch_stack; /* branch_stack evt */
+	void				*task_ctx_data; /* pmu specific data */
 	struct rcu_head			rcu_head;
 
 	struct delayed_work		orphans_remove;
@@ -536,12 +577,52 @@
 	struct ring_buffer		*rb;
 	unsigned long			wakeup;
 	unsigned long			size;
-	void				*addr;
+	union {
+		void			*addr;
+		unsigned long		head;
+	};
 	int				page;
 };
 
+#ifdef CONFIG_CGROUP_PERF
+
+/*
+ * perf_cgroup_info keeps track of time_enabled for a cgroup.
+ * This is a per-cpu dynamically allocated data structure.
+ */
+struct perf_cgroup_info {
+	u64				time;
+	u64				timestamp;
+};
+
+struct perf_cgroup {
+	struct cgroup_subsys_state	css;
+	struct perf_cgroup_info	__percpu *info;
+};
+
+/*
+ * Must ensure cgroup is pinned (css_get) before calling
+ * this function. In other words, we cannot call this function
+ * if there is no cgroup event for the current CPU context.
+ */
+static inline struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task)
+{
+	return container_of(task_css(task, perf_event_cgrp_id),
+			    struct perf_cgroup, css);
+}
+#endif /* CONFIG_CGROUP_PERF */
+
 #ifdef CONFIG_PERF_EVENTS
 
+extern void *perf_aux_output_begin(struct perf_output_handle *handle,
+				   struct perf_event *event);
+extern void perf_aux_output_end(struct perf_output_handle *handle,
+				unsigned long size, bool truncated);
+extern int perf_aux_output_skip(struct perf_output_handle *handle,
+				unsigned long size);
+extern void *perf_get_aux(struct perf_output_handle *handle);
+
 extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
 extern void perf_pmu_unregister(struct pmu *pmu);
 
@@ -558,6 +639,8 @@
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
 extern void perf_pmu_enable(struct pmu *pmu);
+extern void perf_sched_cb_dec(struct pmu *pmu);
+extern void perf_sched_cb_inc(struct pmu *pmu);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
 extern int perf_event_refresh(struct perf_event *event, int refresh);
@@ -731,6 +814,11 @@
 		__perf_event_task_sched_out(prev, next);
 }
 
+static inline u64 __perf_event_count(struct perf_event *event)
+{
+	return local64_read(&event->count) + atomic64_read(&event->child_count);
+}
+
 extern void perf_event_mmap(struct vm_area_struct *vma);
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -800,6 +888,16 @@
 	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
 }
 
+static inline bool needs_branch_stack(struct perf_event *event)
+{
+	return event->attr.branch_sample_type != 0;
+}
+
+static inline bool has_aux(struct perf_event *event)
+{
+	return event->pmu->setup_aux;
+}
+
 extern int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_event *event, unsigned int size);
 extern void perf_output_end(struct perf_output_handle *handle);
@@ -815,6 +913,17 @@
 extern int __perf_event_disable(void *info);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
+static inline void *
+perf_aux_output_begin(struct perf_output_handle *handle,
+		      struct perf_event *event)				{ return NULL; }
+static inline void
+perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
+		    bool truncated)					{ }
+static inline int
+perf_aux_output_skip(struct perf_output_handle *handle,
+		     unsigned long size)				{ return -EINVAL; }
+static inline void *
+perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
 static inline void
 perf_event_task_sched_in(struct task_struct *prev,
 			 struct task_struct *task)			{ }
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7809749..573a5af 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -48,6 +48,26 @@
 
 extern int rcu_expedited; /* for sysctl */
 
+#ifdef CONFIG_TINY_RCU
+/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
+static inline bool rcu_gp_is_expedited(void)  /* Internal RCU use. */
+{
+	return false;
+}
+
+static inline void rcu_expedite_gp(void)
+{
+}
+
+static inline void rcu_unexpedite_gp(void)
+{
+}
+#else /* #ifdef CONFIG_TINY_RCU */
+bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
+void rcu_expedite_gp(void);
+void rcu_unexpedite_gp(void);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
 enum rcutorture_type {
 	RCU_FLAVOR,
 	RCU_BH_FLAVOR,
@@ -195,6 +215,15 @@
 
 void synchronize_sched(void);
 
+/*
+ * Structure allowing asynchronous waiting on RCU.
+ */
+struct rcu_synchronize {
+	struct rcu_head head;
+	struct completion completion;
+};
+void wakeme_after_rcu(struct rcu_head *head);
+
 /**
  * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
  * @head: structure to be used for queueing the RCU updates.
@@ -258,6 +287,7 @@
 
 /* Internal to kernel */
 void rcu_init(void);
+void rcu_end_inkernel_boot(void);
 void rcu_sched_qs(void);
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
@@ -266,6 +296,8 @@
 void rcu_idle_exit(void);
 void rcu_irq_enter(void);
 void rcu_irq_exit(void);
+int rcu_cpu_notify(struct notifier_block *self,
+		   unsigned long action, void *hcpu);
 
 #ifdef CONFIG_RCU_STALL_COMMON
 void rcu_sysrq_start(void);
@@ -720,7 +752,7 @@
  * annotated as __rcu.
  */
 #define rcu_dereference_check(p, c) \
-	__rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu)
+	__rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu)
 
 /**
  * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
@@ -730,7 +762,7 @@
  * This is the RCU-bh counterpart to rcu_dereference_check().
  */
 #define rcu_dereference_bh_check(p, c) \
-	__rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu)
+	__rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu)
 
 /**
  * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
@@ -740,7 +772,7 @@
  * This is the RCU-sched counterpart to rcu_dereference_check().
  */
 #define rcu_dereference_sched_check(p, c) \
-	__rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \
+	__rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
 				__rcu)
 
 #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
@@ -933,9 +965,9 @@
 {
 	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_unlock() used illegally while idle");
-	rcu_lock_release(&rcu_lock_map);
 	__release(RCU);
 	__rcu_read_unlock();
+	rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */
 }
 
 /**
diff --git a/include/linux/security.h b/include/linux/security.h
index a1b7dbd..4e14e3d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1556,7 +1556,7 @@
 	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
 	int (*inode_permission) (struct inode *inode, int mask);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
-	int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
+	int (*inode_getattr) (const struct path *path);
 	int (*inode_setxattr) (struct dentry *dentry, const char *name,
 			       const void *value, size_t size, int flags);
 	void (*inode_post_setxattr) (struct dentry *dentry, const char *name,
@@ -1843,7 +1843,7 @@
 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
 int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
-int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
+int security_inode_getattr(const struct path *path);
 int security_inode_setxattr(struct dentry *dentry, const char *name,
 			    const void *value, size_t size, int flags);
 void security_inode_post_setxattr(struct dentry *dentry, const char *name,
@@ -2259,8 +2259,7 @@
 	return 0;
 }
 
-static inline int security_inode_getattr(struct vfsmount *mnt,
-					  struct dentry *dentry)
+static inline int security_inode_getattr(const struct path *path)
 {
 	return 0;
 }
diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index 13e9296..d600afb 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -47,6 +47,5 @@
 
 int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
 void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
-int smpboot_thread_schedule(void);
 
 #endif
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 9cfd962..bdeb456 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -182,7 +182,7 @@
  * lockdep_is_held() calls.
  */
 #define srcu_dereference_check(p, sp, c) \
-	__rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu)
+	__rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu)
 
 /**
  * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 669045a..0a34489 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -7,8 +7,6 @@
 struct pt_regs;
 
 #ifdef CONFIG_STACKTRACE
-struct task_struct;
-
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
 	unsigned long *entries;
diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h
new file mode 100644
index 0000000..5b727a1
--- /dev/null
+++ b/include/linux/tracefs.h
@@ -0,0 +1,45 @@
+/*
+ *  tracefs.h - a pseudo file system for activating tracing
+ *
+ * Based on debugfs by: 2004 Greg Kroah-Hartman <greg@kroah.com>
+ *
+ *  Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License version
+ *	2 as published by the Free Software Foundation.
+ *
+ * tracefs is the file system that is used by the tracing infrastructure.
+ *
+ */
+
+#ifndef _TRACEFS_H_
+#define _TRACEFS_H_
+
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+
+#include <linux/types.h>
+
+struct file_operations;
+
+#ifdef CONFIG_TRACING
+
+struct dentry *tracefs_create_file(const char *name, umode_t mode,
+				   struct dentry *parent, void *data,
+				   const struct file_operations *fops);
+
+struct dentry *tracefs_create_dir(const char *name, struct dentry *parent);
+
+void tracefs_remove(struct dentry *dentry);
+void tracefs_remove_recursive(struct dentry *dentry);
+
+struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
+					   int (*mkdir)(const char *name),
+					   int (*rmdir)(const char *name));
+
+bool tracefs_initialized(void);
+
+#endif /* CONFIG_TRACING */
+
+#endif
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c728513..a5f7f3e 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -36,6 +36,12 @@
 	struct tracepoint_func __rcu *funcs;
 };
 
+struct trace_enum_map {
+	const char		*system;
+	const char		*enum_string;
+	unsigned long		enum_value;
+};
+
 extern int
 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
 extern int
@@ -87,6 +93,8 @@
 
 #define PARAMS(args...) args
 
+#define TRACE_DEFINE_ENUM(x)
+
 #endif /* _LINUX_TRACEPOINT_H */
 
 /*
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 7188029..15f11fb 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -76,6 +76,7 @@
 		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
+int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
@@ -139,4 +140,18 @@
 size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 
+int import_iovec(int type, const struct iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i);
+
+#ifdef CONFIG_COMPAT
+struct compat_iovec;
+int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i);
+#endif
+
+int import_single_range(int type, void __user *buf, size_t len,
+		 struct iovec *iov, struct iov_iter *i);
+
 #endif
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 395b70e..a746bf5 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -137,4 +137,12 @@
 extern int watchdog_register_device(struct watchdog_device *);
 extern void watchdog_unregister_device(struct watchdog_device *);
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+void watchdog_nmi_disable_all(void);
+void watchdog_nmi_enable_all(void);
+#else
+static inline void watchdog_nmi_disable_all(void) {}
+static inline void watchdog_nmi_enable_all(void) {}
+#endif
+
 #endif  /* ifndef _LINUX_WATCHDOG_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index e4079c2..81c81ea 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -57,7 +57,6 @@
 #include <linux/page_counter.h>
 #include <linux/memcontrol.h>
 #include <linux/static_key.h>
-#include <linux/aio.h>
 #include <linux/sched.h>
 
 #include <linux/filter.h>
diff --git a/include/trace/events/9p.h b/include/trace/events/9p.h
index a066636..633ee9e 100644
--- a/include/trace/events/9p.h
+++ b/include/trace/events/9p.h
@@ -6,76 +6,95 @@
 
 #include <linux/tracepoint.h>
 
+#define P9_MSG_T							\
+		EM( P9_TLERROR,		"P9_TLERROR" )			\
+		EM( P9_RLERROR,		"P9_RLERROR" )			\
+		EM( P9_TSTATFS,		"P9_TSTATFS" )			\
+		EM( P9_RSTATFS,		"P9_RSTATFS" )			\
+		EM( P9_TLOPEN,		"P9_TLOPEN" )			\
+		EM( P9_RLOPEN,		"P9_RLOPEN" )			\
+		EM( P9_TLCREATE,	"P9_TLCREATE" )			\
+		EM( P9_RLCREATE,	"P9_RLCREATE" )			\
+		EM( P9_TSYMLINK,	"P9_TSYMLINK" )			\
+		EM( P9_RSYMLINK,	"P9_RSYMLINK" )			\
+		EM( P9_TMKNOD,		"P9_TMKNOD" )			\
+		EM( P9_RMKNOD,		"P9_RMKNOD" )			\
+		EM( P9_TRENAME,		"P9_TRENAME" )			\
+		EM( P9_RRENAME,		"P9_RRENAME" )			\
+		EM( P9_TREADLINK,	"P9_TREADLINK" )		\
+		EM( P9_RREADLINK,	"P9_RREADLINK" )		\
+		EM( P9_TGETATTR,	"P9_TGETATTR" )			\
+		EM( P9_RGETATTR,	"P9_RGETATTR" )			\
+		EM( P9_TSETATTR,	"P9_TSETATTR" )			\
+		EM( P9_RSETATTR,	"P9_RSETATTR" )			\
+		EM( P9_TXATTRWALK,	"P9_TXATTRWALK" )		\
+		EM( P9_RXATTRWALK,	"P9_RXATTRWALK" )		\
+		EM( P9_TXATTRCREATE,	"P9_TXATTRCREATE" )		\
+		EM( P9_RXATTRCREATE,	"P9_RXATTRCREATE" )		\
+		EM( P9_TREADDIR,	"P9_TREADDIR" )			\
+		EM( P9_RREADDIR,	"P9_RREADDIR" )			\
+		EM( P9_TFSYNC,		"P9_TFSYNC" )			\
+		EM( P9_RFSYNC,		"P9_RFSYNC" )			\
+		EM( P9_TLOCK,		"P9_TLOCK" )			\
+		EM( P9_RLOCK,		"P9_RLOCK" )			\
+		EM( P9_TGETLOCK,	"P9_TGETLOCK" )			\
+		EM( P9_RGETLOCK,	"P9_RGETLOCK" )			\
+		EM( P9_TLINK,		"P9_TLINK" )			\
+		EM( P9_RLINK,		"P9_RLINK" )			\
+		EM( P9_TMKDIR,		"P9_TMKDIR" )			\
+		EM( P9_RMKDIR,		"P9_RMKDIR" )			\
+		EM( P9_TRENAMEAT,	"P9_TRENAMEAT" )		\
+		EM( P9_RRENAMEAT,	"P9_RRENAMEAT" )		\
+		EM( P9_TUNLINKAT,	"P9_TUNLINKAT" )		\
+		EM( P9_RUNLINKAT,	"P9_RUNLINKAT" )		\
+		EM( P9_TVERSION,	"P9_TVERSION" )			\
+		EM( P9_RVERSION,	"P9_RVERSION" )			\
+		EM( P9_TAUTH,		"P9_TAUTH" )			\
+		EM( P9_RAUTH,		"P9_RAUTH" )			\
+		EM( P9_TATTACH,		"P9_TATTACH" )			\
+		EM( P9_RATTACH,		"P9_RATTACH" )			\
+		EM( P9_TERROR,		"P9_TERROR" )			\
+		EM( P9_RERROR,		"P9_RERROR" )			\
+		EM( P9_TFLUSH,		"P9_TFLUSH" )			\
+		EM( P9_RFLUSH,		"P9_RFLUSH" )			\
+		EM( P9_TWALK,		"P9_TWALK" )			\
+		EM( P9_RWALK,		"P9_RWALK" )			\
+		EM( P9_TOPEN,		"P9_TOPEN" )			\
+		EM( P9_ROPEN,		"P9_ROPEN" )			\
+		EM( P9_TCREATE,		"P9_TCREATE" )			\
+		EM( P9_RCREATE,		"P9_RCREATE" )			\
+		EM( P9_TREAD,		"P9_TREAD" )			\
+		EM( P9_RREAD,		"P9_RREAD" )			\
+		EM( P9_TWRITE,		"P9_TWRITE" )			\
+		EM( P9_RWRITE,		"P9_RWRITE" )			\
+		EM( P9_TCLUNK,		"P9_TCLUNK" )			\
+		EM( P9_RCLUNK,		"P9_RCLUNK" )			\
+		EM( P9_TREMOVE,		"P9_TREMOVE" )			\
+		EM( P9_RREMOVE,		"P9_RREMOVE" )			\
+		EM( P9_TSTAT,		"P9_TSTAT" )			\
+		EM( P9_RSTAT,		"P9_RSTAT" )			\
+		EM( P9_TWSTAT,		"P9_TWSTAT" )			\
+		EMe(P9_RWSTAT,		"P9_RWSTAT" )
+
+/* Define EM() to export the enums to userspace via TRACE_DEFINE_ENUM() */
+#undef EM
+#undef EMe
+#define EM(a, b)	TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
+
+P9_MSG_T
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	{ a, b },
+#define EMe(a, b)	{ a, b }
+
 #define show_9p_op(type)						\
-	__print_symbolic(type,						\
-			 { P9_TLERROR,		"P9_TLERROR" },		\
-			 { P9_RLERROR,		"P9_RLERROR" },		\
-			 { P9_TSTATFS,		"P9_TSTATFS" },		\
-			 { P9_RSTATFS,		"P9_RSTATFS" },		\
-			 { P9_TLOPEN,		"P9_TLOPEN" },		\
-			 { P9_RLOPEN,		"P9_RLOPEN" },		\
-			 { P9_TLCREATE,		"P9_TLCREATE" },	\
-			 { P9_RLCREATE,		"P9_RLCREATE" },	\
-			 { P9_TSYMLINK,		"P9_TSYMLINK" },	\
-			 { P9_RSYMLINK,		"P9_RSYMLINK" },	\
-			 { P9_TMKNOD,		"P9_TMKNOD" },		\
-			 { P9_RMKNOD,		"P9_RMKNOD" },		\
-			 { P9_TRENAME,		"P9_TRENAME" },		\
-			 { P9_RRENAME,		"P9_RRENAME" },		\
-			 { P9_TREADLINK,	"P9_TREADLINK" },	\
-			 { P9_RREADLINK,	"P9_RREADLINK" },	\
-			 { P9_TGETATTR,		"P9_TGETATTR" },	\
-			 { P9_RGETATTR,		"P9_RGETATTR" },	\
-			 { P9_TSETATTR,		"P9_TSETATTR" },	\
-			 { P9_RSETATTR,		"P9_RSETATTR" },	\
-			 { P9_TXATTRWALK,	"P9_TXATTRWALK" },	\
-			 { P9_RXATTRWALK,	"P9_RXATTRWALK" },	\
-			 { P9_TXATTRCREATE,	"P9_TXATTRCREATE" },	\
-			 { P9_RXATTRCREATE,	"P9_RXATTRCREATE" },	\
-			 { P9_TREADDIR,		"P9_TREADDIR" },	\
-			 { P9_RREADDIR,		"P9_RREADDIR" },	\
-			 { P9_TFSYNC,		"P9_TFSYNC" },		\
-			 { P9_RFSYNC,		"P9_RFSYNC" },		\
-			 { P9_TLOCK,		"P9_TLOCK" },		\
-			 { P9_RLOCK,		"P9_RLOCK" },		\
-			 { P9_TGETLOCK,		"P9_TGETLOCK" },	\
-			 { P9_RGETLOCK,		"P9_RGETLOCK" },	\
-			 { P9_TLINK,		"P9_TLINK" },		\
-			 { P9_RLINK,		"P9_RLINK" },		\
-			 { P9_TMKDIR,		"P9_TMKDIR" },		\
-			 { P9_RMKDIR,		"P9_RMKDIR" },		\
-			 { P9_TRENAMEAT,	"P9_TRENAMEAT" },	\
-			 { P9_RRENAMEAT,	"P9_RRENAMEAT" },	\
-			 { P9_TUNLINKAT,	"P9_TUNLINKAT" },	\
-			 { P9_RUNLINKAT,	"P9_RUNLINKAT" },	\
-			 { P9_TVERSION,		"P9_TVERSION" },	\
-			 { P9_RVERSION,		"P9_RVERSION" },	\
-			 { P9_TAUTH,		"P9_TAUTH" },		\
-			 { P9_RAUTH,		"P9_RAUTH" },		\
-			 { P9_TATTACH,		"P9_TATTACH" },		\
-			 { P9_RATTACH,		"P9_RATTACH" },		\
-			 { P9_TERROR,		"P9_TERROR" },		\
-			 { P9_RERROR,		"P9_RERROR" },		\
-			 { P9_TFLUSH,		"P9_TFLUSH" },		\
-			 { P9_RFLUSH,		"P9_RFLUSH" },		\
-			 { P9_TWALK,		"P9_TWALK" },		\
-			 { P9_RWALK,		"P9_RWALK" },		\
-			 { P9_TOPEN,		"P9_TOPEN" },		\
-			 { P9_ROPEN,		"P9_ROPEN" },		\
-			 { P9_TCREATE,		"P9_TCREATE" },		\
-			 { P9_RCREATE,		"P9_RCREATE" },		\
-			 { P9_TREAD,		"P9_TREAD" },		\
-			 { P9_RREAD,		"P9_RREAD" },		\
-			 { P9_TWRITE,		"P9_TWRITE" },		\
-			 { P9_RWRITE,		"P9_RWRITE" },		\
-			 { P9_TCLUNK,		"P9_TCLUNK" },		\
-			 { P9_RCLUNK,		"P9_RCLUNK" },		\
-			 { P9_TREMOVE,		"P9_TREMOVE" },		\
-			 { P9_RREMOVE,		"P9_RREMOVE" },		\
-			 { P9_TSTAT,		"P9_TSTAT" },		\
-			 { P9_RSTAT,		"P9_RSTAT" },		\
-			 { P9_TWSTAT,		"P9_TWSTAT" },		\
-			 { P9_RWSTAT,		"P9_RWSTAT" })
+	__print_symbolic(type, P9_MSG_T)
 
 TRACE_EVENT(9p_client_req,
 	    TP_PROTO(struct p9_client *clnt, int8_t type, int tag),
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 1faecea..572e650 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -962,7 +962,7 @@
 		__entry->ip	= IP
 	),
 
-	TP_printk("state=%p; mask = %s; caller = %pF", __entry->state,
+	TP_printk("state=%p; mask = %s; caller = %pS", __entry->state,
 		  show_gfp_flags(__entry->mask), (void *)__entry->ip)
 );
 
@@ -982,7 +982,7 @@
 		__entry->ip = IP
 	),
 
-	TP_printk(" state=%p; caller = %pF", __entry->state,
+	TP_printk(" state=%p; caller = %pS", __entry->state,
 		  (void *)__entry->ip)
 );
 
diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h
index 6797b9d..7f20707 100644
--- a/include/trace/events/ext3.h
+++ b/include/trace/events/ext3.h
@@ -144,7 +144,7 @@
 		__entry->ip	= IP;
 	),
 
-	TP_printk("dev %d,%d ino %lu caller %pF",
+	TP_printk("dev %d,%d ino %lu caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, (void *)__entry->ip)
 );
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6e5abd6..47fca36 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -240,7 +240,7 @@
 		__entry->ip	= IP;
 	),
 
-	TP_printk("dev %d,%d ino %lu caller %pF",
+	TP_printk("dev %d,%d ino %lu caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, (void *)__entry->ip)
 );
@@ -1762,7 +1762,7 @@
 		__entry->rsv_blocks	 = rsv_blocks;
 	),
 
-	TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pF",
+	TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->blocks, __entry->rsv_blocks, (void *)__entry->ip)
 );
@@ -1784,7 +1784,7 @@
 		__entry->blocks		 = blocks;
 	),
 
-	TP_printk("dev %d,%d blocks, %d caller %pF",
+	TP_printk("dev %d,%d blocks, %d caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->blocks, (void *)__entry->ip)
 );
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 5422dbf..36f4536 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -9,6 +9,36 @@
 #define show_dev(entry)		MAJOR(entry->dev), MINOR(entry->dev)
 #define show_dev_ino(entry)	show_dev(entry), (unsigned long)entry->ino
 
+TRACE_DEFINE_ENUM(NODE);
+TRACE_DEFINE_ENUM(DATA);
+TRACE_DEFINE_ENUM(META);
+TRACE_DEFINE_ENUM(META_FLUSH);
+TRACE_DEFINE_ENUM(CURSEG_HOT_DATA);
+TRACE_DEFINE_ENUM(CURSEG_WARM_DATA);
+TRACE_DEFINE_ENUM(CURSEG_COLD_DATA);
+TRACE_DEFINE_ENUM(CURSEG_HOT_NODE);
+TRACE_DEFINE_ENUM(CURSEG_WARM_NODE);
+TRACE_DEFINE_ENUM(CURSEG_COLD_NODE);
+TRACE_DEFINE_ENUM(NO_CHECK_TYPE);
+TRACE_DEFINE_ENUM(GC_GREEDY);
+TRACE_DEFINE_ENUM(GC_CB);
+TRACE_DEFINE_ENUM(FG_GC);
+TRACE_DEFINE_ENUM(BG_GC);
+TRACE_DEFINE_ENUM(LFS);
+TRACE_DEFINE_ENUM(SSR);
+TRACE_DEFINE_ENUM(__REQ_RAHEAD);
+TRACE_DEFINE_ENUM(__REQ_WRITE);
+TRACE_DEFINE_ENUM(__REQ_SYNC);
+TRACE_DEFINE_ENUM(__REQ_NOIDLE);
+TRACE_DEFINE_ENUM(__REQ_FLUSH);
+TRACE_DEFINE_ENUM(__REQ_FUA);
+TRACE_DEFINE_ENUM(__REQ_PRIO);
+TRACE_DEFINE_ENUM(__REQ_META);
+TRACE_DEFINE_ENUM(CP_UMOUNT);
+TRACE_DEFINE_ENUM(CP_FASTBOOT);
+TRACE_DEFINE_ENUM(CP_SYNC);
+TRACE_DEFINE_ENUM(CP_DISCARD);
+
 #define show_block_type(type)						\
 	__print_symbolic(type,						\
 		{ NODE,		"NODE" },				\
diff --git a/include/trace/events/intel-sst.h b/include/trace/events/intel-sst.h
index 76c72d3..edc24e6 100644
--- a/include/trace/events/intel-sst.h
+++ b/include/trace/events/intel-sst.h
@@ -1,6 +1,13 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM intel-sst
 
+/*
+ * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a
+ * legitimate C variable. It is not exported to user space.
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR intel_sst
+
 #if !defined(_TRACE_INTEL_SST_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_INTEL_SST_H
 
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 3608beb..ff8f6c0 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -9,19 +9,34 @@
 struct irqaction;
 struct softirq_action;
 
-#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
+#define SOFTIRQ_NAME_LIST				\
+			 softirq_name(HI)		\
+			 softirq_name(TIMER)		\
+			 softirq_name(NET_TX)		\
+			 softirq_name(NET_RX)		\
+			 softirq_name(BLOCK)		\
+			 softirq_name(BLOCK_IOPOLL)	\
+			 softirq_name(TASKLET)		\
+			 softirq_name(SCHED)		\
+			 softirq_name(HRTIMER)		\
+			 softirq_name_end(RCU)
+
+#undef softirq_name
+#undef softirq_name_end
+
+#define softirq_name(sirq) TRACE_DEFINE_ENUM(sirq##_SOFTIRQ);
+#define softirq_name_end(sirq)  TRACE_DEFINE_ENUM(sirq##_SOFTIRQ);
+
+SOFTIRQ_NAME_LIST
+
+#undef softirq_name
+#undef softirq_name_end
+
+#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq },
+#define softirq_name_end(sirq) { sirq##_SOFTIRQ, #sirq }
+
 #define show_softirq_name(val)				\
-	__print_symbolic(val,				\
-			 softirq_name(HI),		\
-			 softirq_name(TIMER),		\
-			 softirq_name(NET_TX),		\
-			 softirq_name(NET_RX),		\
-			 softirq_name(BLOCK),		\
-			 softirq_name(BLOCK_IOPOLL),	\
-			 softirq_name(TASKLET),		\
-			 softirq_name(SCHED),		\
-			 softirq_name(HRTIMER),		\
-			 softirq_name(RCU))
+	__print_symbolic(val, SOFTIRQ_NAME_LIST)
 
 /**
  * irq_handler_entry - called immediately before the irq action handler
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index dd2b546..539b25a 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -7,18 +7,40 @@
 #include <linux/tracepoint.h>
 
 #define MIGRATE_MODE						\
-	{MIGRATE_ASYNC,		"MIGRATE_ASYNC"},		\
-	{MIGRATE_SYNC_LIGHT,	"MIGRATE_SYNC_LIGHT"},		\
-	{MIGRATE_SYNC,		"MIGRATE_SYNC"}		
+	EM( MIGRATE_ASYNC,	"MIGRATE_ASYNC")		\
+	EM( MIGRATE_SYNC_LIGHT,	"MIGRATE_SYNC_LIGHT")		\
+	EMe(MIGRATE_SYNC,	"MIGRATE_SYNC")
+
 
 #define MIGRATE_REASON						\
-	{MR_COMPACTION,		"compaction"},			\
-	{MR_MEMORY_FAILURE,	"memory_failure"},		\
-	{MR_MEMORY_HOTPLUG,	"memory_hotplug"},		\
-	{MR_SYSCALL,		"syscall_or_cpuset"},		\
-	{MR_MEMPOLICY_MBIND,	"mempolicy_mbind"},		\
-	{MR_NUMA_MISPLACED,	"numa_misplaced"},		\
-	{MR_CMA,		"cma"}
+	EM( MR_COMPACTION,	"compaction")			\
+	EM( MR_MEMORY_FAILURE,	"memory_failure")		\
+	EM( MR_MEMORY_HOTPLUG,	"memory_hotplug")		\
+	EM( MR_SYSCALL,		"syscall_or_cpuset")		\
+	EM( MR_MEMPOLICY_MBIND,	"mempolicy_mbind")		\
+	EM( MR_NUMA_MISPLACED,	"numa_misplaced")		\
+	EMe(MR_CMA,		"cma")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
+
+MIGRATE_MODE
+MIGRATE_REASON
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	{a, b},
+#define EMe(a, b)	{a, b}
 
 TRACE_EVENT(mm_migrate_pages,
 
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 81c4c18..28c4599 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -84,7 +84,7 @@
 		__assign_str(name, mod->name);
 	),
 
-	TP_printk("%s call_site=%pf refcnt=%d",
+	TP_printk("%s call_site=%ps refcnt=%d",
 		  __get_str(name), (void *)__entry->ip, __entry->refcnt)
 );
 
@@ -121,7 +121,7 @@
 		__assign_str(name, name);
 	),
 
-	TP_printk("%s wait=%d call_site=%pf",
+	TP_printk("%s wait=%d call_site=%ps",
 		  __get_str(name), (int)__entry->wait, (void *)__entry->ip)
 );
 
diff --git a/include/trace/events/random.h b/include/trace/events/random.h
index 805af6d..4684de3 100644
--- a/include/trace/events/random.h
+++ b/include/trace/events/random.h
@@ -22,7 +22,7 @@
 		__entry->IP		= IP;
 	),
 
-	TP_printk("bytes %d caller %pF",
+	TP_printk("bytes %d caller %pS",
 		__entry->bytes, (void *)__entry->IP)
 );
 
@@ -43,7 +43,7 @@
 		__entry->IP		= IP;
 	),
 
-	TP_printk("%s pool: bytes %d caller %pF",
+	TP_printk("%s pool: bytes %d caller %pS",
 		  __entry->pool_name, __entry->bytes, (void *)__entry->IP)
 );
 
@@ -82,7 +82,7 @@
 	),
 
 	TP_printk("%s pool: bits %d entropy_count %d entropy_total %d "
-		  "caller %pF", __entry->pool_name, __entry->bits,
+		  "caller %pS", __entry->pool_name, __entry->bits,
 		  __entry->entropy_count, __entry->entropy_total,
 		  (void *)__entry->IP)
 );
@@ -207,7 +207,7 @@
 		__entry->IP		= IP;
 	),
 
-	TP_printk("nbytes %d caller %pF", __entry->nbytes, (void *)__entry->IP)
+	TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP)
 );
 
 DEFINE_EVENT(random__get_random_bytes, get_random_bytes,
@@ -242,7 +242,7 @@
 		__entry->IP		= IP;
 	),
 
-	TP_printk("%s pool: nbytes %d entropy_count %d caller %pF",
+	TP_printk("%s pool: nbytes %d entropy_count %d caller %pS",
 		  __entry->pool_name, __entry->nbytes, __entry->entropy_count,
 		  (void *)__entry->IP)
 );
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index b9c1dc6..fd1a02c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -179,27 +179,53 @@
 
 );
 
+/*
+ * First define the enums in the below macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
+
+#define RPC_SHOW_SOCKET				\
+	EM( SS_FREE, "FREE" )			\
+	EM( SS_UNCONNECTED, "UNCONNECTED" )	\
+	EM( SS_CONNECTING, "CONNECTING," )	\
+	EM( SS_CONNECTED, "CONNECTED," )	\
+	EMe(SS_DISCONNECTING, "DISCONNECTING" )
+
 #define rpc_show_socket_state(state) \
-	__print_symbolic(state, \
-		{ SS_FREE, "FREE" }, \
-		{ SS_UNCONNECTED, "UNCONNECTED" }, \
-		{ SS_CONNECTING, "CONNECTING," }, \
-		{ SS_CONNECTED, "CONNECTED," }, \
-		{ SS_DISCONNECTING, "DISCONNECTING" })
+	__print_symbolic(state, RPC_SHOW_SOCKET)
+
+RPC_SHOW_SOCKET
+
+#define RPC_SHOW_SOCK				\
+	EM( TCP_ESTABLISHED, "ESTABLISHED" )	\
+	EM( TCP_SYN_SENT, "SYN_SENT" )		\
+	EM( TCP_SYN_RECV, "SYN_RECV" )		\
+	EM( TCP_FIN_WAIT1, "FIN_WAIT1" )	\
+	EM( TCP_FIN_WAIT2, "FIN_WAIT2" )	\
+	EM( TCP_TIME_WAIT, "TIME_WAIT" )	\
+	EM( TCP_CLOSE, "CLOSE" )		\
+	EM( TCP_CLOSE_WAIT, "CLOSE_WAIT" )	\
+	EM( TCP_LAST_ACK, "LAST_ACK" )		\
+	EM( TCP_LISTEN, "LISTEN" )		\
+	EMe( TCP_CLOSING, "CLOSING" )
 
 #define rpc_show_sock_state(state) \
-	__print_symbolic(state, \
-		{ TCP_ESTABLISHED, "ESTABLISHED" }, \
-		{ TCP_SYN_SENT, "SYN_SENT" }, \
-		{ TCP_SYN_RECV, "SYN_RECV" }, \
-		{ TCP_FIN_WAIT1, "FIN_WAIT1" }, \
-		{ TCP_FIN_WAIT2, "FIN_WAIT2" }, \
-		{ TCP_TIME_WAIT, "TIME_WAIT" }, \
-		{ TCP_CLOSE, "CLOSE" }, \
-		{ TCP_CLOSE_WAIT, "CLOSE_WAIT" }, \
-		{ TCP_LAST_ACK, "LAST_ACK" }, \
-		{ TCP_LISTEN, "LISTEN" }, \
-		{ TCP_CLOSING, "CLOSING" })
+	__print_symbolic(state, RPC_SHOW_SOCK)
+
+RPC_SHOW_SOCK
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	{a, b},
+#define EMe(a, b)	{a, b}
 
 DECLARE_EVENT_CLASS(xs_socket_event,
 
diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h
index 0e76357..4250f36 100644
--- a/include/trace/events/tlb.h
+++ b/include/trace/events/tlb.h
@@ -7,11 +7,31 @@
 #include <linux/mm_types.h>
 #include <linux/tracepoint.h>
 
-#define TLB_FLUSH_REASON	\
-	{ TLB_FLUSH_ON_TASK_SWITCH,	"flush on task switch" },	\
-	{ TLB_REMOTE_SHOOTDOWN,		"remote shootdown" },		\
-	{ TLB_LOCAL_SHOOTDOWN,		"local shootdown" },		\
-	{ TLB_LOCAL_MM_SHOOTDOWN,	"local mm shootdown" }
+#define TLB_FLUSH_REASON						\
+	EM(  TLB_FLUSH_ON_TASK_SWITCH,	"flush on task switch" )	\
+	EM(  TLB_REMOTE_SHOOTDOWN,	"remote shootdown" )		\
+	EM(  TLB_LOCAL_SHOOTDOWN,	"local shootdown" )		\
+	EMe( TLB_LOCAL_MM_SHOOTDOWN,	"local mm shootdown" )
+
+/*
+ * First define the enums in TLB_FLUSH_REASON to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a,b)		TRACE_DEFINE_ENUM(a);
+#define EMe(a,b)	TRACE_DEFINE_ENUM(a);
+
+TLB_FLUSH_REASON
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a,b)		{ a, b },
+#define EMe(a,b)	{ a, b }
 
 TRACE_EVENT_CONDITION(tlb_flush,
 
diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h
index b9bb1f2..2011217 100644
--- a/include/trace/events/v4l2.h
+++ b/include/trace/events/v4l2.h
@@ -6,33 +6,58 @@
 
 #include <linux/tracepoint.h>
 
-#define show_type(type)							       \
-	__print_symbolic(type,						       \
-		{ V4L2_BUF_TYPE_VIDEO_CAPTURE,	      "VIDEO_CAPTURE" },       \
-		{ V4L2_BUF_TYPE_VIDEO_OUTPUT,	      "VIDEO_OUTPUT" },	       \
-		{ V4L2_BUF_TYPE_VIDEO_OVERLAY,	      "VIDEO_OVERLAY" },       \
-		{ V4L2_BUF_TYPE_VBI_CAPTURE,	      "VBI_CAPTURE" },	       \
-		{ V4L2_BUF_TYPE_VBI_OUTPUT,	      "VBI_OUTPUT" },	       \
-		{ V4L2_BUF_TYPE_SLICED_VBI_CAPTURE,   "SLICED_VBI_CAPTURE" },  \
-		{ V4L2_BUF_TYPE_SLICED_VBI_OUTPUT,    "SLICED_VBI_OUTPUT" },   \
-		{ V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY, "VIDEO_OUTPUT_OVERLAY" },\
-		{ V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, "VIDEO_CAPTURE_MPLANE" },\
-		{ V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE,  "VIDEO_OUTPUT_MPLANE" }, \
-		{ V4L2_BUF_TYPE_SDR_CAPTURE,          "SDR_CAPTURE" },         \
-		{ V4L2_BUF_TYPE_PRIVATE,	      "PRIVATE" })
+/* Enums require being exported to userspace, for user tool parsing */
+#undef EM
+#undef EMe
+#define EM(a, b)	TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
+
+#define show_type(type)							\
+	__print_symbolic(type, SHOW_TYPE)
+
+#define SHOW_TYPE							\
+	EM( V4L2_BUF_TYPE_VIDEO_CAPTURE,	"VIDEO_CAPTURE" )	\
+	EM( V4L2_BUF_TYPE_VIDEO_OUTPUT,		"VIDEO_OUTPUT" )	\
+	EM( V4L2_BUF_TYPE_VIDEO_OVERLAY,	"VIDEO_OVERLAY" )	\
+	EM( V4L2_BUF_TYPE_VBI_CAPTURE,		"VBI_CAPTURE" )		\
+	EM( V4L2_BUF_TYPE_VBI_OUTPUT,		"VBI_OUTPUT" )		\
+	EM( V4L2_BUF_TYPE_SLICED_VBI_CAPTURE,   "SLICED_VBI_CAPTURE" )	\
+	EM( V4L2_BUF_TYPE_SLICED_VBI_OUTPUT,    "SLICED_VBI_OUTPUT" )	\
+	EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY, "VIDEO_OUTPUT_OVERLAY" ) \
+	EM( V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, "VIDEO_CAPTURE_MPLANE" ) \
+	EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE,  "VIDEO_OUTPUT_MPLANE" )	\
+	EM( V4L2_BUF_TYPE_SDR_CAPTURE,          "SDR_CAPTURE" )		\
+	EMe(V4L2_BUF_TYPE_PRIVATE,		"PRIVATE" )
+
+SHOW_TYPE
 
 #define show_field(field)						\
-	__print_symbolic(field,						\
-		{ V4L2_FIELD_ANY,		"ANY" },		\
-		{ V4L2_FIELD_NONE,		"NONE" },		\
-		{ V4L2_FIELD_TOP,		"TOP" },		\
-		{ V4L2_FIELD_BOTTOM,		"BOTTOM" },		\
-		{ V4L2_FIELD_INTERLACED,	"INTERLACED" },		\
-		{ V4L2_FIELD_SEQ_TB,		"SEQ_TB" },		\
-		{ V4L2_FIELD_SEQ_BT,		"SEQ_BT" },		\
-		{ V4L2_FIELD_ALTERNATE,		"ALTERNATE" },		\
-		{ V4L2_FIELD_INTERLACED_TB,	"INTERLACED_TB" },      \
-		{ V4L2_FIELD_INTERLACED_BT,	"INTERLACED_BT" })
+	__print_symbolic(field, SHOW_FIELD)
+
+#define SHOW_FIELD							\
+	EM( V4L2_FIELD_ANY,		"ANY" )				\
+	EM( V4L2_FIELD_NONE,		"NONE" )			\
+	EM( V4L2_FIELD_TOP,		"TOP" )				\
+	EM( V4L2_FIELD_BOTTOM,		"BOTTOM" )			\
+	EM( V4L2_FIELD_INTERLACED,	"INTERLACED" )			\
+	EM( V4L2_FIELD_SEQ_TB,		"SEQ_TB" )			\
+	EM( V4L2_FIELD_SEQ_BT,		"SEQ_BT" )			\
+	EM( V4L2_FIELD_ALTERNATE,	"ALTERNATE" )			\
+	EM( V4L2_FIELD_INTERLACED_TB,	"INTERLACED_TB" )		\
+	EMe( V4L2_FIELD_INTERLACED_BT,	"INTERLACED_BT" )
+
+SHOW_FIELD
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	{a, b},
+#define EMe(a, b)	{a, b}
+
+/* V4L2_TC_TYPE_* are macros, not defines, they do not need processing */
 
 #define show_timecode_type(type)					\
 	__print_symbolic(type,						\
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 5a14ead..880dd74 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -23,15 +23,32 @@
 		{I_REFERENCED,		"I_REFERENCED"}		\
 	)
 
+/* enums need to be exported to user space */
+#undef EM
+#undef EMe
+#define EM(a,b) 	TRACE_DEFINE_ENUM(a);
+#define EMe(a,b)	TRACE_DEFINE_ENUM(a);
+
 #define WB_WORK_REASON							\
-		{WB_REASON_BACKGROUND,		"background"},		\
-		{WB_REASON_TRY_TO_FREE_PAGES,	"try_to_free_pages"},	\
-		{WB_REASON_SYNC,		"sync"},		\
-		{WB_REASON_PERIODIC,		"periodic"},		\
-		{WB_REASON_LAPTOP_TIMER,	"laptop_timer"},	\
-		{WB_REASON_FREE_MORE_MEM,	"free_more_memory"},	\
-		{WB_REASON_FS_FREE_SPACE,	"fs_free_space"},	\
-		{WB_REASON_FORKER_THREAD,	"forker_thread"}
+	EM( WB_REASON_BACKGROUND,		"background")		\
+	EM( WB_REASON_TRY_TO_FREE_PAGES,	"try_to_free_pages")	\
+	EM( WB_REASON_SYNC,			"sync")			\
+	EM( WB_REASON_PERIODIC,			"periodic")		\
+	EM( WB_REASON_LAPTOP_TIMER,		"laptop_timer")		\
+	EM( WB_REASON_FREE_MORE_MEM,		"free_more_memory")	\
+	EM( WB_REASON_FS_FREE_SPACE,		"fs_free_space")	\
+	EMe(WB_REASON_FORKER_THREAD,		"forker_thread")
+
+WB_WORK_REASON
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a,b)		{ a, b },
+#define EMe(a,b)	{ a, b }
 
 struct wb_writeback_work;
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 41bf65f..37d4b10 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -18,6 +18,34 @@
 
 #include <linux/ftrace_event.h>
 
+#ifndef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR TRACE_SYSTEM
+#endif
+
+#define __app__(x, y) str__##x##y
+#define __app(x, y) __app__(x, y)
+
+#define TRACE_SYSTEM_STRING __app(TRACE_SYSTEM_VAR,__trace_system_name)
+
+#define TRACE_MAKE_SYSTEM_STR()				\
+	static const char TRACE_SYSTEM_STRING[] =	\
+		__stringify(TRACE_SYSTEM)
+
+TRACE_MAKE_SYSTEM_STR();
+
+#undef TRACE_DEFINE_ENUM
+#define TRACE_DEFINE_ENUM(a)				\
+	static struct trace_enum_map __used __initdata	\
+	__##TRACE_SYSTEM##_##a =			\
+	{						\
+		.system = TRACE_SYSTEM_STRING,		\
+		.enum_string = #a,			\
+		.enum_value = a				\
+	};						\
+	static struct trace_enum_map __used		\
+	__attribute__((section("_ftrace_enum_map")))	\
+	*TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a
+
 /*
  * DECLARE_EVENT_CLASS can be used to add a generic function
  * handlers for events. That is, if all events have the same
@@ -105,7 +133,6 @@
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-
 /*
  * Stage 2 of the trace events.
  *
@@ -122,6 +149,9 @@
  * The size of an array is also encoded, in the higher 16 bits of <item>.
  */
 
+#undef TRACE_DEFINE_ENUM
+#define TRACE_DEFINE_ENUM(a)
+
 #undef __field
 #define __field(type, item)
 
@@ -539,7 +569,7 @@
  *	.trace			= ftrace_raw_output_<call>, <-- stage 2
  * };
  *
- * static const char print_fmt_<call>[] = <TP_printk>;
+ * static char print_fmt_<call>[] = <TP_printk>;
  *
  * static struct ftrace_event_class __used event_class_<template> = {
  *	.system			= "<system>",
@@ -690,9 +720,9 @@
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 _TRACE_PERF_PROTO(call, PARAMS(proto));					\
-static const char print_fmt_##call[] = print;				\
+static char print_fmt_##call[] = print;					\
 static struct ftrace_event_class __used __refdata event_class_##call = { \
-	.system			= __stringify(TRACE_SYSTEM),		\
+	.system			= TRACE_SYSTEM_STRING,			\
 	.define_fields		= ftrace_define_fields_##call,		\
 	.fields			= LIST_HEAD_INIT(event_class_##call.fields),\
 	.raw_init		= trace_event_raw_init,			\
@@ -719,7 +749,7 @@
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
 									\
-static const char print_fmt_##call[] = print;				\
+static char print_fmt_##call[] = print;					\
 									\
 static struct ftrace_event_call __used event_##call = {			\
 	.class			= &event_class_##template,		\
@@ -735,6 +765,7 @@
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+#undef TRACE_SYSTEM_VAR
 
 #ifdef CONFIG_PERF_EVENTS
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 45da7ec..cc47ef4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -118,6 +118,7 @@
 enum bpf_prog_type {
 	BPF_PROG_TYPE_UNSPEC,
 	BPF_PROG_TYPE_SOCKET_FILTER,
+	BPF_PROG_TYPE_KPROBE,
 };
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -151,6 +152,7 @@
 		__u32		log_level;	/* verbosity level of verifier */
 		__u32		log_size;	/* size of user buffer */
 		__aligned_u64	log_buf;	/* user supplied buffer */
+		__u32		kern_version;	/* checked when prog_type=kprobe */
 	};
 } __attribute__((aligned(8)));
 
@@ -162,6 +164,9 @@
 	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
 	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
+	BPF_FUNC_ktime_get_ns,    /* u64 bpf_ktime_get_ns(void) */
+	BPF_FUNC_trace_printk,    /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 2f62ab2..8038e9a 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -702,6 +702,10 @@
 #define KEY_NUMERIC_9		0x209
 #define KEY_NUMERIC_STAR	0x20a
 #define KEY_NUMERIC_POUND	0x20b
+#define KEY_NUMERIC_A		0x20c	/* Phone key A - HUT Telephony 0xb9 */
+#define KEY_NUMERIC_B		0x20d
+#define KEY_NUMERIC_C		0x20e
+#define KEY_NUMERIC_D		0x20f
 
 #define KEY_CAMERA_FOCUS	0x210
 #define KEY_WPS_BUTTON		0x211	/* WiFi Protected Setup key */
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 7d664ea..7b1425a 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -58,6 +58,8 @@
 
 #define STACK_END_MAGIC		0x57AC6E9D
 
+#define TRACEFS_MAGIC          0x74726163
+
 #define V9FS_MAGIC		0x01021997
 
 #define BDEVFS_MAGIC            0x62646576
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9b79abb..309211b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -152,21 +152,42 @@
  * The branch types can be combined, however BRANCH_ANY covers all types
  * of branches and therefore it supersedes all the other types.
  */
+enum perf_branch_sample_type_shift {
+	PERF_SAMPLE_BRANCH_USER_SHIFT		= 0, /* user branches */
+	PERF_SAMPLE_BRANCH_KERNEL_SHIFT		= 1, /* kernel branches */
+	PERF_SAMPLE_BRANCH_HV_SHIFT		= 2, /* hypervisor branches */
+
+	PERF_SAMPLE_BRANCH_ANY_SHIFT		= 3, /* any branch types */
+	PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT	= 4, /* any call branch */
+	PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT	= 5, /* any return branch */
+	PERF_SAMPLE_BRANCH_IND_CALL_SHIFT	= 6, /* indirect calls */
+	PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT	= 7, /* transaction aborts */
+	PERF_SAMPLE_BRANCH_IN_TX_SHIFT		= 8, /* in transaction */
+	PERF_SAMPLE_BRANCH_NO_TX_SHIFT		= 9, /* not in transaction */
+	PERF_SAMPLE_BRANCH_COND_SHIFT		= 10, /* conditional branches */
+
+	PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT	= 11, /* call/ret stack */
+
+	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
+};
+
 enum perf_branch_sample_type {
-	PERF_SAMPLE_BRANCH_USER		= 1U << 0, /* user branches */
-	PERF_SAMPLE_BRANCH_KERNEL	= 1U << 1, /* kernel branches */
-	PERF_SAMPLE_BRANCH_HV		= 1U << 2, /* hypervisor branches */
+	PERF_SAMPLE_BRANCH_USER		= 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+	PERF_SAMPLE_BRANCH_KERNEL	= 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+	PERF_SAMPLE_BRANCH_HV		= 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
 
-	PERF_SAMPLE_BRANCH_ANY		= 1U << 3, /* any branch types */
-	PERF_SAMPLE_BRANCH_ANY_CALL	= 1U << 4, /* any call branch */
-	PERF_SAMPLE_BRANCH_ANY_RETURN	= 1U << 5, /* any return branch */
-	PERF_SAMPLE_BRANCH_IND_CALL	= 1U << 6, /* indirect calls */
-	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << 7, /* transaction aborts */
-	PERF_SAMPLE_BRANCH_IN_TX	= 1U << 8, /* in transaction */
-	PERF_SAMPLE_BRANCH_NO_TX	= 1U << 9, /* not in transaction */
-	PERF_SAMPLE_BRANCH_COND		= 1U << 10, /* conditional branches */
+	PERF_SAMPLE_BRANCH_ANY		= 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+	PERF_SAMPLE_BRANCH_ANY_CALL	= 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+	PERF_SAMPLE_BRANCH_ANY_RETURN	= 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+	PERF_SAMPLE_BRANCH_IND_CALL	= 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_IN_TX	= 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_NO_TX	= 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_COND		= 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
 
-	PERF_SAMPLE_BRANCH_MAX		= 1U << 11, /* non-ABI */
+	PERF_SAMPLE_BRANCH_CALL_STACK	= 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+
+	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
@@ -240,6 +261,7 @@
 #define PERF_ATTR_SIZE_VER3	96	/* add: sample_regs_user */
 					/* add: sample_stack_user */
 #define PERF_ATTR_SIZE_VER4	104	/* add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5	112	/* add: aux_watermark */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -305,7 +327,8 @@
 				exclude_callchain_user   : 1, /* exclude user callchains */
 				mmap2          :  1, /* include mmap with inode data     */
 				comm_exec      :  1, /* flag comm events that are due to an exec */
-				__reserved_1   : 39;
+				use_clockid    :  1, /* use @clockid for time fields */
+				__reserved_1   : 38;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -334,8 +357,7 @@
 	 */
 	__u32	sample_stack_user;
 
-	/* Align to u64. */
-	__u32	__reserved_2;
+	__s32	clockid;
 	/*
 	 * Defines set of regs to dump for each sample
 	 * state captured on:
@@ -345,6 +367,12 @@
 	 * See asm/perf_regs.h for details.
 	 */
 	__u64	sample_regs_intr;
+
+	/*
+	 * Wakeup watermark for AUX area
+	 */
+	__u32	aux_watermark;
+	__u32	__reserved_2;	/* align to __u64 */
 };
 
 #define perf_flags(attr)	(*(&(attr)->read_format + 1))
@@ -360,6 +388,7 @@
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
@@ -500,9 +529,30 @@
 	 * In this case the kernel will not over-write unread data.
 	 *
 	 * See perf_output_put_handle() for the data ordering.
+	 *
+	 * data_{offset,size} indicate the location and size of the perf record
+	 * buffer within the mmapped area.
 	 */
 	__u64   data_head;		/* head in the data section */
 	__u64	data_tail;		/* user-space written tail */
+	__u64	data_offset;		/* where the buffer starts */
+	__u64	data_size;		/* data buffer size */
+
+	/*
+	 * AUX area is defined by aux_{offset,size} fields that should be set
+	 * by the userspace, so that
+	 *
+	 *   aux_offset >= data_offset + data_size
+	 *
+	 * prior to mmap()ing it. Size of the mmap()ed area should be aux_size.
+	 *
+	 * Ring buffer pointers aux_{head,tail} have the same semantics as
+	 * data_{head,tail} and same ordering rules apply.
+	 */
+	__u64	aux_head;
+	__u64	aux_tail;
+	__u64	aux_offset;
+	__u64	aux_size;
 };
 
 #define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
@@ -725,6 +775,31 @@
 	 */
 	PERF_RECORD_MMAP2			= 10,
 
+	/*
+	 * Records that new data landed in the AUX buffer part.
+	 *
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 *
+	 * 	u64				aux_offset;
+	 * 	u64				aux_size;
+	 *	u64				flags;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_AUX				= 11,
+
+	/*
+	 * Indicates that instruction trace has started
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid;
+	 *	u32				tid;
+	 * };
+	 */
+	PERF_RECORD_ITRACE_START		= 12,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
@@ -742,6 +817,12 @@
 	PERF_CONTEXT_MAX		= (__u64)-4095,
 };
 
+/**
+ * PERF_RECORD_AUX::flags bits
+ */
+#define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
+
 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
 #define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
diff --git a/init/Kconfig b/init/Kconfig
index f5dbc6d..a905b73 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -791,6 +791,19 @@
 
 endchoice
 
+config RCU_EXPEDITE_BOOT
+	bool
+	default n
+	help
+	  This option enables expedited grace periods at boot time,
+	  as if rcu_expedite_gp() had been invoked early in boot.
+	  The corresponding rcu_unexpedite_gp() is invoked from
+	  rcu_end_inkernel_boot(), which is intended to be invoked
+	  at the end of the kernel-only boot sequence, just before
+	  init is exec'ed.
+
+	  Accept the default if unsure.
+
 endmenu # "RCU Subsystem"
 
 config BUILD_BIN2C
@@ -1513,7 +1526,7 @@
 
 # syscall, maps, verifier
 config BPF_SYSCALL
-	bool "Enable bpf() system call" if EXPERT
+	bool "Enable bpf() system call"
 	select ANON_INODES
 	select BPF
 	default n
diff --git a/init/main.c b/init/main.c
index f6dd8fe..a7e969d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -144,7 +144,7 @@
  * rely on the BIOS and skip the reset operation.
  *
  * This is useful if kernel is booting in an unreliable environment.
- * For ex. kdump situaiton where previous kernel has crashed, BIOS has been
+ * For ex. kdump situation where previous kernel has crashed, BIOS has been
  * skipped and devices will be in unknown state.
  */
 unsigned int reset_devices;
@@ -385,6 +385,7 @@
 	int pid;
 
 	rcu_scheduler_starting();
+	smpboot_thread_init();
 	/*
 	 * We need to spawn init first so that it obtains pid 1, however
 	 * the init task will end up wanting to create kthreads, which, if
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 536edc2..504c10b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -16,6 +16,7 @@
 #include <linux/file.h>
 #include <linux/license.h>
 #include <linux/filter.h>
+#include <linux/version.h>
 
 static LIST_HEAD(bpf_map_types);
 
@@ -467,7 +468,7 @@
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD log_buf
+#define	BPF_PROG_LOAD_LAST_FIELD kern_version
 
 static int bpf_prog_load(union bpf_attr *attr)
 {
@@ -492,6 +493,10 @@
 	if (attr->insn_cnt >= BPF_MAXINSNS)
 		return -EINVAL;
 
+	if (type == BPF_PROG_TYPE_KPROBE &&
+	    attr->kern_version != LINUX_VERSION_CODE)
+		return -EINVAL;
+
 	/* plain bpf_prog allocation */
 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 	if (!prog)
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 937ecdf..72d59a1 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -39,15 +39,15 @@
 }
 
 /**
- * context_tracking_user_enter - Inform the context tracking that the CPU is going to
- *                               enter userspace mode.
+ * context_tracking_enter - Inform the context tracking that the CPU is going
+ *                          enter user or guest space mode.
  *
  * This function must be called right before we switch from the kernel
- * to userspace, when it's guaranteed the remaining kernel instructions
- * to execute won't use any RCU read side critical section because this
- * function sets RCU in extended quiescent state.
+ * to user or guest space, when it's guaranteed the remaining kernel
+ * instructions to execute won't use any RCU read side critical section
+ * because this function sets RCU in extended quiescent state.
  */
-void context_tracking_user_enter(void)
+void context_tracking_enter(enum ctx_state state)
 {
 	unsigned long flags;
 
@@ -75,9 +75,8 @@
 	WARN_ON_ONCE(!current->mm);
 
 	local_irq_save(flags);
-	if ( __this_cpu_read(context_tracking.state) != IN_USER) {
+	if ( __this_cpu_read(context_tracking.state) != state) {
 		if (__this_cpu_read(context_tracking.active)) {
-			trace_user_enter(0);
 			/*
 			 * At this stage, only low level arch entry code remains and
 			 * then we'll run in userspace. We can assume there won't be
@@ -85,7 +84,10 @@
 			 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
 			 * on the tick.
 			 */
-			vtime_user_enter(current);
+			if (state == CONTEXT_USER) {
+				trace_user_enter(0);
+				vtime_user_enter(current);
+			}
 			rcu_user_enter();
 		}
 		/*
@@ -101,24 +103,32 @@
 		 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
 		 * is false because we know that CPU is not tickless.
 		 */
-		__this_cpu_write(context_tracking.state, IN_USER);
+		__this_cpu_write(context_tracking.state, state);
 	}
 	local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(context_tracking_enter);
+EXPORT_SYMBOL_GPL(context_tracking_enter);
+
+void context_tracking_user_enter(void)
+{
+	context_tracking_enter(CONTEXT_USER);
+}
 NOKPROBE_SYMBOL(context_tracking_user_enter);
 
 /**
- * context_tracking_user_exit - Inform the context tracking that the CPU is
- *                              exiting userspace mode and entering the kernel.
+ * context_tracking_exit - Inform the context tracking that the CPU is
+ *                         exiting user or guest mode and entering the kernel.
  *
- * This function must be called after we entered the kernel from userspace
- * before any use of RCU read side critical section. This potentially include
- * any high level kernel code like syscalls, exceptions, signal handling, etc...
+ * This function must be called after we entered the kernel from user or
+ * guest space before any use of RCU read side critical section. This
+ * potentially include any high level kernel code like syscalls, exceptions,
+ * signal handling, etc...
  *
  * This call supports re-entrancy. This way it can be called from any exception
  * handler without needing to know if we came from userspace or not.
  */
-void context_tracking_user_exit(void)
+void context_tracking_exit(enum ctx_state state)
 {
 	unsigned long flags;
 
@@ -129,20 +139,29 @@
 		return;
 
 	local_irq_save(flags);
-	if (__this_cpu_read(context_tracking.state) == IN_USER) {
+	if (__this_cpu_read(context_tracking.state) == state) {
 		if (__this_cpu_read(context_tracking.active)) {
 			/*
 			 * We are going to run code that may use RCU. Inform
 			 * RCU core about that (ie: we may need the tick again).
 			 */
 			rcu_user_exit();
-			vtime_user_exit(current);
-			trace_user_exit(0);
+			if (state == CONTEXT_USER) {
+				vtime_user_exit(current);
+				trace_user_exit(0);
+			}
 		}
-		__this_cpu_write(context_tracking.state, IN_KERNEL);
+		__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
 	}
 	local_irq_restore(flags);
 }
+NOKPROBE_SYMBOL(context_tracking_exit);
+EXPORT_SYMBOL_GPL(context_tracking_exit);
+
+void context_tracking_user_exit(void)
+{
+	context_tracking_exit(CONTEXT_USER);
+}
 NOKPROBE_SYMBOL(context_tracking_user_exit);
 
 /**
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 82eea9c..94bbe46 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -411,8 +411,10 @@
 	 *
 	 * Wait for the stop thread to go away.
 	 */
-	while (!idle_cpu(cpu))
+	while (!per_cpu(cpu_dead_idle, cpu))
 		cpu_relax();
+	smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
+	per_cpu(cpu_dead_idle, cpu) = false;
 
 	hotplug_cpu__broadcast_tick_pull(cpu);
 	/* This actually kills the CPU. */
@@ -451,6 +453,37 @@
 EXPORT_SYMBOL(cpu_down);
 #endif /*CONFIG_HOTPLUG_CPU*/
 
+/*
+ * Unpark per-CPU smpboot kthreads at CPU-online time.
+ */
+static int smpboot_thread_call(struct notifier_block *nfb,
+			       unsigned long action, void *hcpu)
+{
+	int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+
+	case CPU_ONLINE:
+		smpboot_unpark_threads(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block smpboot_thread_notifier = {
+	.notifier_call = smpboot_thread_call,
+	.priority = CPU_PRI_SMPBOOT,
+};
+
+void __cpuinit smpboot_thread_init(void)
+{
+	register_cpu_notifier(&smpboot_thread_notifier);
+}
+
 /* Requires cpu_add_remove_lock to be held */
 static int _cpu_up(unsigned int cpu, int tasks_frozen)
 {
@@ -490,9 +523,6 @@
 		goto out_notify;
 	BUG_ON(!cpu_online(cpu));
 
-	/* Wake the per cpu threads */
-	smpboot_unpark_threads(cpu);
-
 	/* Now call notifier in preparation. */
 	cpu_notify(CPU_ONLINE | mod, hcpu);
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2fabc06..06917d5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -34,14 +34,16 @@
 #include <linux/syscalls.h>
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
+#include <linux/cgroup.h>
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/mm_types.h>
-#include <linux/cgroup.h>
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/compat.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
 
 #include "internal.h"
 
@@ -153,7 +155,7 @@
  */
 struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
-static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
+static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -327,6 +329,11 @@
 	return local_clock();
 }
 
+static inline u64 perf_event_clock(struct perf_event *event)
+{
+	return event->clock();
+}
+
 static inline struct perf_cpu_context *
 __get_cpu_context(struct perf_event_context *ctx)
 {
@@ -351,32 +358,6 @@
 
 #ifdef CONFIG_CGROUP_PERF
 
-/*
- * perf_cgroup_info keeps track of time_enabled for a cgroup.
- * This is a per-cpu dynamically allocated data structure.
- */
-struct perf_cgroup_info {
-	u64				time;
-	u64				timestamp;
-};
-
-struct perf_cgroup {
-	struct cgroup_subsys_state	css;
-	struct perf_cgroup_info	__percpu *info;
-};
-
-/*
- * Must ensure cgroup is pinned (css_get) before calling
- * this function. In other words, we cannot call this function
- * if there is no cgroup event for the current CPU context.
- */
-static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
-{
-	return container_of(task_css(task, perf_event_cgrp_id),
-			    struct perf_cgroup, css);
-}
-
 static inline bool
 perf_cgroup_match(struct perf_event *event)
 {
@@ -905,6 +886,15 @@
 	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
 }
 
+static void free_ctx(struct rcu_head *head)
+{
+	struct perf_event_context *ctx;
+
+	ctx = container_of(head, struct perf_event_context, rcu_head);
+	kfree(ctx->task_ctx_data);
+	kfree(ctx);
+}
+
 static void put_ctx(struct perf_event_context *ctx)
 {
 	if (atomic_dec_and_test(&ctx->refcount)) {
@@ -912,7 +902,7 @@
 			put_ctx(ctx->parent_ctx);
 		if (ctx->task)
 			put_task_struct(ctx->task);
-		kfree_rcu(ctx, rcu_head);
+		call_rcu(&ctx->rcu_head, free_ctx);
 	}
 }
 
@@ -1239,9 +1229,6 @@
 	if (is_cgroup_event(event))
 		ctx->nr_cgroups++;
 
-	if (has_branch_stack(event))
-		ctx->nr_branch_stack++;
-
 	list_add_rcu(&event->event_entry, &ctx->event_list);
 	ctx->nr_events++;
 	if (event->attr.inherit_stat)
@@ -1408,9 +1395,6 @@
 			cpuctx->cgrp = NULL;
 	}
 
-	if (has_branch_stack(event))
-		ctx->nr_branch_stack--;
-
 	ctx->nr_events--;
 	if (event->attr.inherit_stat)
 		ctx->nr_stat--;
@@ -1847,6 +1831,7 @@
 #define MAX_INTERRUPTS (~0ULL)
 
 static void perf_log_throttle(struct perf_event *event, int enable);
+static void perf_log_itrace_start(struct perf_event *event);
 
 static int
 event_sched_in(struct perf_event *event,
@@ -1881,6 +1866,12 @@
 
 	perf_pmu_disable(event->pmu);
 
+	event->tstamp_running += tstamp - event->tstamp_stopped;
+
+	perf_set_shadow_time(event, ctx, tstamp);
+
+	perf_log_itrace_start(event);
+
 	if (event->pmu->add(event, PERF_EF_START)) {
 		event->state = PERF_EVENT_STATE_INACTIVE;
 		event->oncpu = -1;
@@ -1888,10 +1879,6 @@
 		goto out;
 	}
 
-	event->tstamp_running += tstamp - event->tstamp_stopped;
-
-	perf_set_shadow_time(event, ctx, tstamp);
-
 	if (!is_software_event(event))
 		cpuctx->active_oncpu++;
 	if (!ctx->nr_active++)
@@ -2559,6 +2546,9 @@
 			next->perf_event_ctxp[ctxn] = ctx;
 			ctx->task = next;
 			next_ctx->task = task;
+
+			swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
+
 			do_switch = 0;
 
 			perf_event_sync_stat(ctx, next_ctx);
@@ -2577,6 +2567,56 @@
 	}
 }
 
+void perf_sched_cb_dec(struct pmu *pmu)
+{
+	this_cpu_dec(perf_sched_cb_usages);
+}
+
+void perf_sched_cb_inc(struct pmu *pmu)
+{
+	this_cpu_inc(perf_sched_cb_usages);
+}
+
+/*
+ * This function provides the context switch callback to the lower code
+ * layer. It is invoked ONLY when the context switch callback is enabled.
+ */
+static void perf_pmu_sched_task(struct task_struct *prev,
+				struct task_struct *next,
+				bool sched_in)
+{
+	struct perf_cpu_context *cpuctx;
+	struct pmu *pmu;
+	unsigned long flags;
+
+	if (prev == next)
+		return;
+
+	local_irq_save(flags);
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		if (pmu->sched_task) {
+			cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+			perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+
+			perf_pmu_disable(pmu);
+
+			pmu->sched_task(cpuctx->task_ctx, sched_in);
+
+			perf_pmu_enable(pmu);
+
+			perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
+		}
+	}
+
+	rcu_read_unlock();
+
+	local_irq_restore(flags);
+}
+
 #define for_each_task_context_nr(ctxn)					\
 	for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
 
@@ -2596,6 +2636,9 @@
 {
 	int ctxn;
 
+	if (__this_cpu_read(perf_sched_cb_usages))
+		perf_pmu_sched_task(task, next, false);
+
 	for_each_task_context_nr(ctxn)
 		perf_event_context_sched_out(task, ctxn, next);
 
@@ -2755,64 +2798,6 @@
 }
 
 /*
- * When sampling the branck stack in system-wide, it may be necessary
- * to flush the stack on context switch. This happens when the branch
- * stack does not tag its entries with the pid of the current task.
- * Otherwise it becomes impossible to associate a branch entry with a
- * task. This ambiguity is more likely to appear when the branch stack
- * supports priv level filtering and the user sets it to monitor only
- * at the user level (which could be a useful measurement in system-wide
- * mode). In that case, the risk is high of having a branch stack with
- * branch from multiple tasks. Flushing may mean dropping the existing
- * entries or stashing them somewhere in the PMU specific code layer.
- *
- * This function provides the context switch callback to the lower code
- * layer. It is invoked ONLY when there is at least one system-wide context
- * with at least one active event using taken branch sampling.
- */
-static void perf_branch_stack_sched_in(struct task_struct *prev,
-				       struct task_struct *task)
-{
-	struct perf_cpu_context *cpuctx;
-	struct pmu *pmu;
-	unsigned long flags;
-
-	/* no need to flush branch stack if not changing task */
-	if (prev == task)
-		return;
-
-	local_irq_save(flags);
-
-	rcu_read_lock();
-
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
-		/*
-		 * check if the context has at least one
-		 * event using PERF_SAMPLE_BRANCH_STACK
-		 */
-		if (cpuctx->ctx.nr_branch_stack > 0
-		    && pmu->flush_branch_stack) {
-
-			perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-
-			perf_pmu_disable(pmu);
-
-			pmu->flush_branch_stack();
-
-			perf_pmu_enable(pmu);
-
-			perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-		}
-	}
-
-	rcu_read_unlock();
-
-	local_irq_restore(flags);
-}
-
-/*
  * Called from scheduler to add the events of the current task
  * with interrupts disabled.
  *
@@ -2844,9 +2829,8 @@
 	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
 		perf_cgroup_sched_in(prev, task);
 
-	/* check for system-wide branch_stack events */
-	if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))
-		perf_branch_stack_sched_in(prev, task);
+	if (__this_cpu_read(perf_sched_cb_usages))
+		perf_pmu_sched_task(prev, task, true);
 }
 
 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -3220,7 +3204,10 @@
 
 static inline u64 perf_event_count(struct perf_event *event)
 {
-	return local64_read(&event->count) + atomic64_read(&event->child_count);
+	if (event->pmu->count)
+		return event->pmu->count(event);
+
+	return __perf_event_count(event);
 }
 
 static u64 perf_event_read(struct perf_event *event)
@@ -3321,12 +3308,15 @@
  * Returns a matching context with refcount and pincount.
  */
 static struct perf_event_context *
-find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
+find_get_context(struct pmu *pmu, struct task_struct *task,
+		struct perf_event *event)
 {
 	struct perf_event_context *ctx, *clone_ctx = NULL;
 	struct perf_cpu_context *cpuctx;
+	void *task_ctx_data = NULL;
 	unsigned long flags;
 	int ctxn, err;
+	int cpu = event->cpu;
 
 	if (!task) {
 		/* Must be root to operate on a CPU event: */
@@ -3354,11 +3344,24 @@
 	if (ctxn < 0)
 		goto errout;
 
+	if (event->attach_state & PERF_ATTACH_TASK_DATA) {
+		task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL);
+		if (!task_ctx_data) {
+			err = -ENOMEM;
+			goto errout;
+		}
+	}
+
 retry:
 	ctx = perf_lock_task_context(task, ctxn, &flags);
 	if (ctx) {
 		clone_ctx = unclone_ctx(ctx);
 		++ctx->pin_count;
+
+		if (task_ctx_data && !ctx->task_ctx_data) {
+			ctx->task_ctx_data = task_ctx_data;
+			task_ctx_data = NULL;
+		}
 		raw_spin_unlock_irqrestore(&ctx->lock, flags);
 
 		if (clone_ctx)
@@ -3369,6 +3372,11 @@
 		if (!ctx)
 			goto errout;
 
+		if (task_ctx_data) {
+			ctx->task_ctx_data = task_ctx_data;
+			task_ctx_data = NULL;
+		}
+
 		err = 0;
 		mutex_lock(&task->perf_event_mutex);
 		/*
@@ -3395,13 +3403,16 @@
 		}
 	}
 
+	kfree(task_ctx_data);
 	return ctx;
 
 errout:
+	kfree(task_ctx_data);
 	return ERR_PTR(err);
 }
 
 static void perf_event_free_filter(struct perf_event *event);
+static void perf_event_free_bpf_prog(struct perf_event *event);
 
 static void free_event_rcu(struct rcu_head *head)
 {
@@ -3411,10 +3422,10 @@
 	if (event->ns)
 		put_pid_ns(event->ns);
 	perf_event_free_filter(event);
+	perf_event_free_bpf_prog(event);
 	kfree(event);
 }
 
-static void ring_buffer_put(struct ring_buffer *rb);
 static void ring_buffer_attach(struct perf_event *event,
 			       struct ring_buffer *rb);
 
@@ -3423,10 +3434,6 @@
 	if (event->parent)
 		return;
 
-	if (has_branch_stack(event)) {
-		if (!(event->attach_state & PERF_ATTACH_TASK))
-			atomic_dec(&per_cpu(perf_branch_stack_events, cpu));
-	}
 	if (is_cgroup_event(event))
 		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
 }
@@ -3454,6 +3461,91 @@
 	unaccount_event_cpu(event, event->cpu);
 }
 
+/*
+ * The following implement mutual exclusion of events on "exclusive" pmus
+ * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
+ * at a time, so we disallow creating events that might conflict, namely:
+ *
+ *  1) cpu-wide events in the presence of per-task events,
+ *  2) per-task events in the presence of cpu-wide events,
+ *  3) two matching events on the same context.
+ *
+ * The former two cases are handled in the allocation path (perf_event_alloc(),
+ * __free_event()), the latter -- before the first perf_install_in_context().
+ */
+static int exclusive_event_init(struct perf_event *event)
+{
+	struct pmu *pmu = event->pmu;
+
+	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+		return 0;
+
+	/*
+	 * Prevent co-existence of per-task and cpu-wide events on the
+	 * same exclusive pmu.
+	 *
+	 * Negative pmu::exclusive_cnt means there are cpu-wide
+	 * events on this "exclusive" pmu, positive means there are
+	 * per-task events.
+	 *
+	 * Since this is called in perf_event_alloc() path, event::ctx
+	 * doesn't exist yet; it is, however, safe to use PERF_ATTACH_TASK
+	 * to mean "per-task event", because unlike other attach states it
+	 * never gets cleared.
+	 */
+	if (event->attach_state & PERF_ATTACH_TASK) {
+		if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
+			return -EBUSY;
+	} else {
+		if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void exclusive_event_destroy(struct perf_event *event)
+{
+	struct pmu *pmu = event->pmu;
+
+	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+		return;
+
+	/* see comment in exclusive_event_init() */
+	if (event->attach_state & PERF_ATTACH_TASK)
+		atomic_dec(&pmu->exclusive_cnt);
+	else
+		atomic_inc(&pmu->exclusive_cnt);
+}
+
+static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
+{
+	if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+	    (e1->cpu == e2->cpu ||
+	     e1->cpu == -1 ||
+	     e2->cpu == -1))
+		return true;
+	return false;
+}
+
+/* Called under the same ctx::mutex as perf_install_in_context() */
+static bool exclusive_event_installable(struct perf_event *event,
+					struct perf_event_context *ctx)
+{
+	struct perf_event *iter_event;
+	struct pmu *pmu = event->pmu;
+
+	if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+		return true;
+
+	list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
+		if (exclusive_event_match(iter_event, event))
+			return false;
+	}
+
+	return true;
+}
+
 static void __free_event(struct perf_event *event)
 {
 	if (!event->parent) {
@@ -3467,8 +3559,10 @@
 	if (event->ctx)
 		put_ctx(event->ctx);
 
-	if (event->pmu)
+	if (event->pmu) {
+		exclusive_event_destroy(event);
 		module_put(event->pmu->module);
+	}
 
 	call_rcu(&event->rcu_head, free_event_rcu);
 }
@@ -3927,6 +4021,7 @@
 static int perf_event_set_output(struct perf_event *event,
 				 struct perf_event *output_event);
 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
+static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
 
 static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
 {
@@ -3980,6 +4075,9 @@
 	case PERF_EVENT_IOC_SET_FILTER:
 		return perf_event_set_filter(event, (void __user *)arg);
 
+	case PERF_EVENT_IOC_SET_BPF:
+		return perf_event_set_bpf_prog(event, arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -4096,6 +4194,8 @@
 	/* Allow new userspace to detect that bit 0 is deprecated */
 	userpg->cap_bit0_is_deprecated = 1;
 	userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
+	userpg->data_offset = PAGE_SIZE;
+	userpg->data_size = perf_data_size(rb);
 
 unlock:
 	rcu_read_unlock();
@@ -4263,7 +4363,7 @@
 	rb_free(rb);
 }
 
-static struct ring_buffer *ring_buffer_get(struct perf_event *event)
+struct ring_buffer *ring_buffer_get(struct perf_event *event)
 {
 	struct ring_buffer *rb;
 
@@ -4278,7 +4378,7 @@
 	return rb;
 }
 
-static void ring_buffer_put(struct ring_buffer *rb)
+void ring_buffer_put(struct ring_buffer *rb)
 {
 	if (!atomic_dec_and_test(&rb->refcount))
 		return;
@@ -4295,6 +4395,9 @@
 	atomic_inc(&event->mmap_count);
 	atomic_inc(&event->rb->mmap_count);
 
+	if (vma->vm_pgoff)
+		atomic_inc(&event->rb->aux_mmap_count);
+
 	if (event->pmu->event_mapped)
 		event->pmu->event_mapped(event);
 }
@@ -4319,6 +4422,20 @@
 	if (event->pmu->event_unmapped)
 		event->pmu->event_unmapped(event);
 
+	/*
+	 * rb->aux_mmap_count will always drop before rb->mmap_count and
+	 * event->mmap_count, so it is ok to use event->mmap_mutex to
+	 * serialize with perf_mmap here.
+	 */
+	if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
+	    atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
+		atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
+		vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
+
+		rb_free_aux(rb);
+		mutex_unlock(&event->mmap_mutex);
+	}
+
 	atomic_dec(&rb->mmap_count);
 
 	if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
@@ -4392,7 +4509,7 @@
 
 static const struct vm_operations_struct perf_mmap_vmops = {
 	.open		= perf_mmap_open,
-	.close		= perf_mmap_close,
+	.close		= perf_mmap_close, /* non mergable */
 	.fault		= perf_mmap_fault,
 	.page_mkwrite	= perf_mmap_fault,
 };
@@ -4403,10 +4520,10 @@
 	unsigned long user_locked, user_lock_limit;
 	struct user_struct *user = current_user();
 	unsigned long locked, lock_limit;
-	struct ring_buffer *rb;
+	struct ring_buffer *rb = NULL;
 	unsigned long vma_size;
 	unsigned long nr_pages;
-	long user_extra, extra;
+	long user_extra = 0, extra = 0;
 	int ret = 0, flags = 0;
 
 	/*
@@ -4421,7 +4538,66 @@
 		return -EINVAL;
 
 	vma_size = vma->vm_end - vma->vm_start;
-	nr_pages = (vma_size / PAGE_SIZE) - 1;
+
+	if (vma->vm_pgoff == 0) {
+		nr_pages = (vma_size / PAGE_SIZE) - 1;
+	} else {
+		/*
+		 * AUX area mapping: if rb->aux_nr_pages != 0, it's already
+		 * mapped, all subsequent mappings should have the same size
+		 * and offset. Must be above the normal perf buffer.
+		 */
+		u64 aux_offset, aux_size;
+
+		if (!event->rb)
+			return -EINVAL;
+
+		nr_pages = vma_size / PAGE_SIZE;
+
+		mutex_lock(&event->mmap_mutex);
+		ret = -EINVAL;
+
+		rb = event->rb;
+		if (!rb)
+			goto aux_unlock;
+
+		aux_offset = ACCESS_ONCE(rb->user_page->aux_offset);
+		aux_size = ACCESS_ONCE(rb->user_page->aux_size);
+
+		if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
+			goto aux_unlock;
+
+		if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
+			goto aux_unlock;
+
+		/* already mapped with a different offset */
+		if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
+			goto aux_unlock;
+
+		if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE)
+			goto aux_unlock;
+
+		/* already mapped with a different size */
+		if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
+			goto aux_unlock;
+
+		if (!is_power_of_2(nr_pages))
+			goto aux_unlock;
+
+		if (!atomic_inc_not_zero(&rb->mmap_count))
+			goto aux_unlock;
+
+		if (rb_has_aux(rb)) {
+			atomic_inc(&rb->aux_mmap_count);
+			ret = 0;
+			goto unlock;
+		}
+
+		atomic_set(&rb->aux_mmap_count, 1);
+		user_extra = nr_pages;
+
+		goto accounting;
+	}
 
 	/*
 	 * If we have rb pages ensure they're a power-of-two number, so we
@@ -4433,9 +4609,6 @@
 	if (vma_size != PAGE_SIZE * (1 + nr_pages))
 		return -EINVAL;
 
-	if (vma->vm_pgoff != 0)
-		return -EINVAL;
-
 	WARN_ON_ONCE(event->ctx->parent_ctx);
 again:
 	mutex_lock(&event->mmap_mutex);
@@ -4459,6 +4632,8 @@
 	}
 
 	user_extra = nr_pages + 1;
+
+accounting:
 	user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
 
 	/*
@@ -4468,7 +4643,6 @@
 
 	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
 
-	extra = 0;
 	if (user_locked > user_lock_limit)
 		extra = user_locked - user_lock_limit;
 
@@ -4482,35 +4656,46 @@
 		goto unlock;
 	}
 
-	WARN_ON(event->rb);
+	WARN_ON(!rb && event->rb);
 
 	if (vma->vm_flags & VM_WRITE)
 		flags |= RING_BUFFER_WRITABLE;
 
-	rb = rb_alloc(nr_pages, 
-		event->attr.watermark ? event->attr.wakeup_watermark : 0,
-		event->cpu, flags);
-
 	if (!rb) {
-		ret = -ENOMEM;
-		goto unlock;
+		rb = rb_alloc(nr_pages,
+			      event->attr.watermark ? event->attr.wakeup_watermark : 0,
+			      event->cpu, flags);
+
+		if (!rb) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+
+		atomic_set(&rb->mmap_count, 1);
+		rb->mmap_user = get_current_user();
+		rb->mmap_locked = extra;
+
+		ring_buffer_attach(event, rb);
+
+		perf_event_init_userpage(event);
+		perf_event_update_userpage(event);
+	} else {
+		ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
+				   event->attr.aux_watermark, flags);
+		if (!ret)
+			rb->aux_mmap_locked = extra;
 	}
 
-	atomic_set(&rb->mmap_count, 1);
-	rb->mmap_locked = extra;
-	rb->mmap_user = get_current_user();
-
-	atomic_long_add(user_extra, &user->locked_vm);
-	vma->vm_mm->pinned_vm += extra;
-
-	ring_buffer_attach(event, rb);
-
-	perf_event_init_userpage(event);
-	perf_event_update_userpage(event);
-
 unlock:
-	if (!ret)
+	if (!ret) {
+		atomic_long_add(user_extra, &user->locked_vm);
+		vma->vm_mm->pinned_vm += extra;
+
 		atomic_inc(&event->mmap_count);
+	} else if (rb) {
+		atomic_dec(&rb->mmap_count);
+	}
+aux_unlock:
 	mutex_unlock(&event->mmap_mutex);
 
 	/*
@@ -4766,7 +4951,7 @@
 	}
 
 	if (sample_type & PERF_SAMPLE_TIME)
-		data->time = perf_clock();
+		data->time = perf_event_clock(event);
 
 	if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
 		data->id = primary_event_id(event);
@@ -5344,6 +5529,8 @@
 	task_event->event_id.tid = perf_event_tid(event, task);
 	task_event->event_id.ptid = perf_event_tid(event, current);
 
+	task_event->event_id.time = perf_event_clock(event);
+
 	perf_output_put(&handle, task_event->event_id);
 
 	perf_event__output_id_sample(event, &handle, &sample);
@@ -5377,7 +5564,7 @@
 			/* .ppid */
 			/* .tid  */
 			/* .ptid */
-			.time = perf_clock(),
+			/* .time */
 		},
 	};
 
@@ -5732,6 +5919,40 @@
 	perf_event_mmap_event(&mmap_event);
 }
 
+void perf_event_aux_event(struct perf_event *event, unsigned long head,
+			  unsigned long size, u64 flags)
+{
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	struct perf_aux_event {
+		struct perf_event_header	header;
+		u64				offset;
+		u64				size;
+		u64				flags;
+	} rec = {
+		.header = {
+			.type = PERF_RECORD_AUX,
+			.misc = 0,
+			.size = sizeof(rec),
+		},
+		.offset		= head,
+		.size		= size,
+		.flags		= flags,
+	};
+	int ret;
+
+	perf_event_header__init_id(&rec.header, &sample, event);
+	ret = perf_output_begin(&handle, event, rec.header.size);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, rec);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
 /*
  * IRQ throttle logging
  */
@@ -5753,7 +5974,7 @@
 			.misc = 0,
 			.size = sizeof(throttle_event),
 		},
-		.time		= perf_clock(),
+		.time		= perf_event_clock(event),
 		.id		= primary_event_id(event),
 		.stream_id	= event->id,
 	};
@@ -5773,6 +5994,44 @@
 	perf_output_end(&handle);
 }
 
+static void perf_log_itrace_start(struct perf_event *event)
+{
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	struct perf_aux_event {
+		struct perf_event_header        header;
+		u32				pid;
+		u32				tid;
+	} rec;
+	int ret;
+
+	if (event->parent)
+		event = event->parent;
+
+	if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) ||
+	    event->hw.itrace_started)
+		return;
+
+	event->hw.itrace_started = 1;
+
+	rec.header.type	= PERF_RECORD_ITRACE_START;
+	rec.header.misc	= 0;
+	rec.header.size	= sizeof(rec);
+	rec.pid	= perf_event_pid(event, current);
+	rec.tid	= perf_event_tid(event, current);
+
+	perf_event_header__init_id(&rec.header, &sample, event);
+	ret = perf_output_begin(&handle, event, rec.header.size);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, rec);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -6133,6 +6392,7 @@
 	}
 
 	hlist_add_head_rcu(&event->hlist_entry, head);
+	perf_event_update_userpage(event);
 
 	return 0;
 }
@@ -6296,6 +6556,8 @@
 static struct pmu perf_swevent = {
 	.task_ctx_nr	= perf_sw_context,
 
+	.capabilities	= PERF_PMU_CAP_NO_NMI,
+
 	.event_init	= perf_swevent_init,
 	.add		= perf_swevent_add,
 	.del		= perf_swevent_del,
@@ -6449,6 +6711,49 @@
 	ftrace_profile_free_filter(event);
 }
 
+static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+{
+	struct bpf_prog *prog;
+
+	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+		return -EINVAL;
+
+	if (event->tp_event->prog)
+		return -EEXIST;
+
+	if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
+		/* bpf programs can only be attached to kprobes */
+		return -EINVAL;
+
+	prog = bpf_prog_get(prog_fd);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	if (prog->aux->prog_type != BPF_PROG_TYPE_KPROBE) {
+		/* valid fd, but invalid bpf program type */
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
+	event->tp_event->prog = prog;
+
+	return 0;
+}
+
+static void perf_event_free_bpf_prog(struct perf_event *event)
+{
+	struct bpf_prog *prog;
+
+	if (!event->tp_event)
+		return;
+
+	prog = event->tp_event->prog;
+	if (prog) {
+		event->tp_event->prog = NULL;
+		bpf_prog_put(prog);
+	}
+}
+
 #else
 
 static inline void perf_tp_register(void)
@@ -6464,6 +6769,14 @@
 {
 }
 
+static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+{
+	return -ENOENT;
+}
+
+static void perf_event_free_bpf_prog(struct perf_event *event)
+{
+}
 #endif /* CONFIG_EVENT_TRACING */
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -6602,6 +6915,7 @@
 {
 	if (flags & PERF_EF_START)
 		cpu_clock_event_start(event, flags);
+	perf_event_update_userpage(event);
 
 	return 0;
 }
@@ -6638,6 +6952,8 @@
 static struct pmu perf_cpu_clock = {
 	.task_ctx_nr	= perf_sw_context,
 
+	.capabilities	= PERF_PMU_CAP_NO_NMI,
+
 	.event_init	= cpu_clock_event_init,
 	.add		= cpu_clock_event_add,
 	.del		= cpu_clock_event_del,
@@ -6676,6 +6992,7 @@
 {
 	if (flags & PERF_EF_START)
 		task_clock_event_start(event, flags);
+	perf_event_update_userpage(event);
 
 	return 0;
 }
@@ -6716,6 +7033,8 @@
 static struct pmu perf_task_clock = {
 	.task_ctx_nr	= perf_sw_context,
 
+	.capabilities	= PERF_PMU_CAP_NO_NMI,
+
 	.event_init	= task_clock_event_init,
 	.add		= task_clock_event_add,
 	.del		= task_clock_event_del,
@@ -6993,6 +7312,7 @@
 		pmu->event_idx = perf_event_idx_default;
 
 	list_add_rcu(&pmu->entry, &pmus);
+	atomic_set(&pmu->exclusive_cnt, 0);
 	ret = 0;
 unlock:
 	mutex_unlock(&pmus_lock);
@@ -7037,12 +7357,23 @@
 
 static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
 {
+	struct perf_event_context *ctx = NULL;
 	int ret;
 
 	if (!try_module_get(pmu->module))
 		return -ENODEV;
+
+	if (event->group_leader != event) {
+		ctx = perf_event_ctx_lock(event->group_leader);
+		BUG_ON(!ctx);
+	}
+
 	event->pmu = pmu;
 	ret = pmu->event_init(event);
+
+	if (ctx)
+		perf_event_ctx_unlock(event->group_leader, ctx);
+
 	if (ret)
 		module_put(pmu->module);
 
@@ -7089,10 +7420,6 @@
 	if (event->parent)
 		return;
 
-	if (has_branch_stack(event)) {
-		if (!(event->attach_state & PERF_ATTACH_TASK))
-			atomic_inc(&per_cpu(perf_branch_stack_events, cpu));
-	}
 	if (is_cgroup_event(event))
 		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
 }
@@ -7131,7 +7458,7 @@
 		 struct perf_event *group_leader,
 		 struct perf_event *parent_event,
 		 perf_overflow_handler_t overflow_handler,
-		 void *context)
+		 void *context, int cgroup_fd)
 {
 	struct pmu *pmu;
 	struct perf_event *event;
@@ -7186,18 +7513,18 @@
 
 	if (task) {
 		event->attach_state = PERF_ATTACH_TASK;
-
-		if (attr->type == PERF_TYPE_TRACEPOINT)
-			event->hw.tp_target = task;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
 		/*
-		 * hw_breakpoint is a bit difficult here..
+		 * XXX pmu::event_init needs to know what task to account to
+		 * and we cannot use the ctx information because we need the
+		 * pmu before we get a ctx.
 		 */
-		else if (attr->type == PERF_TYPE_BREAKPOINT)
-			event->hw.bp_target = task;
-#endif
+		event->hw.target = task;
 	}
 
+	event->clock = &local_clock;
+	if (parent_event)
+		event->clock = parent_event->clock;
+
 	if (!overflow_handler && parent_event) {
 		overflow_handler = parent_event->overflow_handler;
 		context = parent_event->overflow_handler_context;
@@ -7224,6 +7551,15 @@
 	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
 		goto err_ns;
 
+	if (!has_branch_stack(event))
+		event->attr.branch_sample_type = 0;
+
+	if (cgroup_fd != -1) {
+		err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
+		if (err)
+			goto err_ns;
+	}
+
 	pmu = perf_init_event(event);
 	if (!pmu)
 		goto err_ns;
@@ -7232,21 +7568,30 @@
 		goto err_ns;
 	}
 
+	err = exclusive_event_init(event);
+	if (err)
+		goto err_pmu;
+
 	if (!event->parent) {
 		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
 			err = get_callchain_buffers();
 			if (err)
-				goto err_pmu;
+				goto err_per_task;
 		}
 	}
 
 	return event;
 
+err_per_task:
+	exclusive_event_destroy(event);
+
 err_pmu:
 	if (event->destroy)
 		event->destroy(event);
 	module_put(pmu->module);
 err_ns:
+	if (is_cgroup_event(event))
+		perf_detach_cgroup(event);
 	if (event->ns)
 		put_pid_ns(event->ns);
 	kfree(event);
@@ -7409,6 +7754,19 @@
 	if (output_event->cpu == -1 && output_event->ctx != event->ctx)
 		goto out;
 
+	/*
+	 * Mixing clocks in the same buffer is trouble you don't need.
+	 */
+	if (output_event->clock != event->clock)
+		goto out;
+
+	/*
+	 * If both events generate aux data, they must be on the same PMU
+	 */
+	if (has_aux(event) && has_aux(output_event) &&
+	    event->pmu != output_event->pmu)
+		goto out;
+
 set:
 	mutex_lock(&event->mmap_mutex);
 	/* Can't redirect output if we've got an active mmap() */
@@ -7441,6 +7799,43 @@
 	mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
 }
 
+static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
+{
+	bool nmi_safe = false;
+
+	switch (clk_id) {
+	case CLOCK_MONOTONIC:
+		event->clock = &ktime_get_mono_fast_ns;
+		nmi_safe = true;
+		break;
+
+	case CLOCK_MONOTONIC_RAW:
+		event->clock = &ktime_get_raw_fast_ns;
+		nmi_safe = true;
+		break;
+
+	case CLOCK_REALTIME:
+		event->clock = &ktime_get_real_ns;
+		break;
+
+	case CLOCK_BOOTTIME:
+		event->clock = &ktime_get_boot_ns;
+		break;
+
+	case CLOCK_TAI:
+		event->clock = &ktime_get_tai_ns;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
+		return -EINVAL;
+
+	return 0;
+}
+
 /**
  * sys_perf_event_open - open a performance event, associate it to a task/cpu
  *
@@ -7465,6 +7860,7 @@
 	int move_group = 0;
 	int err;
 	int f_flags = O_RDWR;
+	int cgroup_fd = -1;
 
 	/* for future expandability... */
 	if (flags & ~PERF_FLAG_ALL)
@@ -7530,21 +7926,16 @@
 
 	get_online_cpus();
 
+	if (flags & PERF_FLAG_PID_CGROUP)
+		cgroup_fd = pid;
+
 	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
-				 NULL, NULL);
+				 NULL, NULL, cgroup_fd);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
 		goto err_cpus;
 	}
 
-	if (flags & PERF_FLAG_PID_CGROUP) {
-		err = perf_cgroup_connect(pid, event, &attr, group_leader);
-		if (err) {
-			__free_event(event);
-			goto err_cpus;
-		}
-	}
-
 	if (is_sampling_event(event)) {
 		if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
 			err = -ENOTSUPP;
@@ -7560,6 +7951,12 @@
 	 */
 	pmu = event->pmu;
 
+	if (attr.use_clockid) {
+		err = perf_event_set_clock(event, attr.clockid);
+		if (err)
+			goto err_alloc;
+	}
+
 	if (group_leader &&
 	    (is_software_event(event) != is_software_event(group_leader))) {
 		if (is_software_event(event)) {
@@ -7586,12 +7983,17 @@
 	/*
 	 * Get the target context (task or percpu):
 	 */
-	ctx = find_get_context(pmu, task, event->cpu);
+	ctx = find_get_context(pmu, task, event);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err_alloc;
 	}
 
+	if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) {
+		err = -EBUSY;
+		goto err_context;
+	}
+
 	if (task) {
 		put_task_struct(task);
 		task = NULL;
@@ -7609,6 +8011,11 @@
 		 */
 		if (group_leader->group_leader != group_leader)
 			goto err_context;
+
+		/* All events in a group should have the same clock */
+		if (group_leader->clock != event->clock)
+			goto err_context;
+
 		/*
 		 * Do not allow to attach to a group in a different
 		 * task or CPU context:
@@ -7709,6 +8116,13 @@
 		get_ctx(ctx);
 	}
 
+	if (!exclusive_event_installable(event, ctx)) {
+		err = -EBUSY;
+		mutex_unlock(&ctx->mutex);
+		fput(event_file);
+		goto err_context;
+	}
+
 	perf_install_in_context(ctx, event, event->cpu);
 	perf_unpin_context(ctx);
 
@@ -7781,7 +8195,7 @@
 	 */
 
 	event = perf_event_alloc(attr, cpu, task, NULL, NULL,
-				 overflow_handler, context);
+				 overflow_handler, context, -1);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
 		goto err;
@@ -7792,7 +8206,7 @@
 
 	account_event(event);
 
-	ctx = find_get_context(event->pmu, task, cpu);
+	ctx = find_get_context(event->pmu, task, event);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err_free;
@@ -7800,6 +8214,14 @@
 
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
+	if (!exclusive_event_installable(event, ctx)) {
+		mutex_unlock(&ctx->mutex);
+		perf_unpin_context(ctx);
+		put_ctx(ctx);
+		err = -EBUSY;
+		goto err_free;
+	}
+
 	perf_install_in_context(ctx, event, cpu);
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
@@ -8142,7 +8564,7 @@
 					   parent_event->cpu,
 					   child,
 					   group_leader, parent_event,
-				           NULL, NULL);
+					   NULL, NULL, -1);
 	if (IS_ERR(child_event))
 		return child_event;
 
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 9803a66..92ce5f4 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -116,12 +116,12 @@
  */
 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 {
-	struct task_struct *tsk = bp->hw.bp_target;
+	struct task_struct *tsk = bp->hw.target;
 	struct perf_event *iter;
 	int count = 0;
 
 	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
-		if (iter->hw.bp_target == tsk &&
+		if (iter->hw.target == tsk &&
 		    find_slot_idx(iter) == type &&
 		    (iter->cpu < 0 || cpu == iter->cpu))
 			count += hw_breakpoint_weight(iter);
@@ -153,7 +153,7 @@
 		int nr;
 
 		nr = info->cpu_pinned;
-		if (!bp->hw.bp_target)
+		if (!bp->hw.target)
 			nr += max_task_bp_pinned(cpu, type);
 		else
 			nr += task_bp_pinned(cpu, bp, type);
@@ -210,7 +210,7 @@
 		weight = -weight;
 
 	/* Pinned counter cpu profiling */
-	if (!bp->hw.bp_target) {
+	if (!bp->hw.target) {
 		get_bp_info(bp->cpu, type)->cpu_pinned += weight;
 		return;
 	}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 569b2187..9f6ce9b 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -27,6 +27,7 @@
 	local_t				lost;		/* nr records lost   */
 
 	long				watermark;	/* wakeup watermark  */
+	long				aux_watermark;
 	/* poll crap */
 	spinlock_t			event_lock;
 	struct list_head		event_list;
@@ -35,6 +36,20 @@
 	unsigned long			mmap_locked;
 	struct user_struct		*mmap_user;
 
+	/* AUX area */
+	local_t				aux_head;
+	local_t				aux_nest;
+	local_t				aux_wakeup;
+	unsigned long			aux_pgoff;
+	int				aux_nr_pages;
+	int				aux_overwrite;
+	atomic_t			aux_mmap_count;
+	unsigned long			aux_mmap_locked;
+	void				(*free_aux)(void *);
+	atomic_t			aux_refcount;
+	void				**aux_pages;
+	void				*aux_priv;
+
 	struct perf_event_mmap_page	*user_page;
 	void				*data_pages[0];
 };
@@ -43,6 +58,19 @@
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
+extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
+			pgoff_t pgoff, int nr_pages, long watermark, int flags);
+extern void rb_free_aux(struct ring_buffer *rb);
+extern struct ring_buffer *ring_buffer_get(struct perf_event *event);
+extern void ring_buffer_put(struct ring_buffer *rb);
+
+static inline bool rb_has_aux(struct ring_buffer *rb)
+{
+	return !!rb->aux_nr_pages;
+}
+
+void perf_event_aux_event(struct perf_event *event, unsigned long head,
+			  unsigned long size, u64 flags);
 
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
@@ -81,6 +109,11 @@
 	return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
 }
 
+static inline unsigned long perf_aux_size(struct ring_buffer *rb)
+{
+	return rb->aux_nr_pages << PAGE_SHIFT;
+}
+
 #define DEFINE_OUTPUT_COPY(func_name, memcpy_func)			\
 static inline unsigned long						\
 func_name(struct perf_output_handle *handle,				\
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index eadb95c..232f00f 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -243,14 +243,317 @@
 	spin_lock_init(&rb->event_lock);
 }
 
+/*
+ * This is called before hardware starts writing to the AUX area to
+ * obtain an output handle and make sure there's room in the buffer.
+ * When the capture completes, call perf_aux_output_end() to commit
+ * the recorded data to the buffer.
+ *
+ * The ordering is similar to that of perf_output_{begin,end}, with
+ * the exception of (B), which should be taken care of by the pmu
+ * driver, since ordering rules will differ depending on hardware.
+ */
+void *perf_aux_output_begin(struct perf_output_handle *handle,
+			    struct perf_event *event)
+{
+	struct perf_event *output_event = event;
+	unsigned long aux_head, aux_tail;
+	struct ring_buffer *rb;
+
+	if (output_event->parent)
+		output_event = output_event->parent;
+
+	/*
+	 * Since this will typically be open across pmu::add/pmu::del, we
+	 * grab ring_buffer's refcount instead of holding rcu read lock
+	 * to make sure it doesn't disappear under us.
+	 */
+	rb = ring_buffer_get(output_event);
+	if (!rb)
+		return NULL;
+
+	if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
+		goto err;
+
+	/*
+	 * Nesting is not supported for AUX area, make sure nested
+	 * writers are caught early
+	 */
+	if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1)))
+		goto err_put;
+
+	aux_head = local_read(&rb->aux_head);
+
+	handle->rb = rb;
+	handle->event = event;
+	handle->head = aux_head;
+	handle->size = 0;
+
+	/*
+	 * In overwrite mode, AUX data stores do not depend on aux_tail,
+	 * therefore (A) control dependency barrier does not exist. The
+	 * (B) <-> (C) ordering is still observed by the pmu driver.
+	 */
+	if (!rb->aux_overwrite) {
+		aux_tail = ACCESS_ONCE(rb->user_page->aux_tail);
+		handle->wakeup = local_read(&rb->aux_wakeup) + rb->aux_watermark;
+		if (aux_head - aux_tail < perf_aux_size(rb))
+			handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb));
+
+		/*
+		 * handle->size computation depends on aux_tail load; this forms a
+		 * control dependency barrier separating aux_tail load from aux data
+		 * store that will be enabled on successful return
+		 */
+		if (!handle->size) { /* A, matches D */
+			event->pending_disable = 1;
+			perf_output_wakeup(handle);
+			local_set(&rb->aux_nest, 0);
+			goto err_put;
+		}
+	}
+
+	return handle->rb->aux_priv;
+
+err_put:
+	rb_free_aux(rb);
+
+err:
+	ring_buffer_put(rb);
+	handle->event = NULL;
+
+	return NULL;
+}
+
+/*
+ * Commit the data written by hardware into the ring buffer by adjusting
+ * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
+ * pmu driver's responsibility to observe ordering rules of the hardware,
+ * so that all the data is externally visible before this is called.
+ */
+void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
+			 bool truncated)
+{
+	struct ring_buffer *rb = handle->rb;
+	unsigned long aux_head;
+	u64 flags = 0;
+
+	if (truncated)
+		flags |= PERF_AUX_FLAG_TRUNCATED;
+
+	/* in overwrite mode, driver provides aux_head via handle */
+	if (rb->aux_overwrite) {
+		flags |= PERF_AUX_FLAG_OVERWRITE;
+
+		aux_head = handle->head;
+		local_set(&rb->aux_head, aux_head);
+	} else {
+		aux_head = local_read(&rb->aux_head);
+		local_add(size, &rb->aux_head);
+	}
+
+	if (size || flags) {
+		/*
+		 * Only send RECORD_AUX if we have something useful to communicate
+		 */
+
+		perf_event_aux_event(handle->event, aux_head, size, flags);
+	}
+
+	aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
+
+	if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) {
+		perf_output_wakeup(handle);
+		local_add(rb->aux_watermark, &rb->aux_wakeup);
+	}
+	handle->event = NULL;
+
+	local_set(&rb->aux_nest, 0);
+	rb_free_aux(rb);
+	ring_buffer_put(rb);
+}
+
+/*
+ * Skip over a given number of bytes in the AUX buffer, due to, for example,
+ * hardware's alignment constraints.
+ */
+int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
+{
+	struct ring_buffer *rb = handle->rb;
+	unsigned long aux_head;
+
+	if (size > handle->size)
+		return -ENOSPC;
+
+	local_add(size, &rb->aux_head);
+
+	aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
+	if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) {
+		perf_output_wakeup(handle);
+		local_add(rb->aux_watermark, &rb->aux_wakeup);
+		handle->wakeup = local_read(&rb->aux_wakeup) +
+				 rb->aux_watermark;
+	}
+
+	handle->head = aux_head;
+	handle->size -= size;
+
+	return 0;
+}
+
+void *perf_get_aux(struct perf_output_handle *handle)
+{
+	/* this is only valid between perf_aux_output_begin and *_end */
+	if (!handle->event)
+		return NULL;
+
+	return handle->rb->aux_priv;
+}
+
+#define PERF_AUX_GFP	(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
+
+static struct page *rb_alloc_aux_page(int node, int order)
+{
+	struct page *page;
+
+	if (order > MAX_ORDER)
+		order = MAX_ORDER;
+
+	do {
+		page = alloc_pages_node(node, PERF_AUX_GFP, order);
+	} while (!page && order--);
+
+	if (page && order) {
+		/*
+		 * Communicate the allocation size to the driver
+		 */
+		split_page(page, order);
+		SetPagePrivate(page);
+		set_page_private(page, order);
+	}
+
+	return page;
+}
+
+static void rb_free_aux_page(struct ring_buffer *rb, int idx)
+{
+	struct page *page = virt_to_page(rb->aux_pages[idx]);
+
+	ClearPagePrivate(page);
+	page->mapping = NULL;
+	__free_page(page);
+}
+
+int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
+		 pgoff_t pgoff, int nr_pages, long watermark, int flags)
+{
+	bool overwrite = !(flags & RING_BUFFER_WRITABLE);
+	int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
+	int ret = -ENOMEM, max_order = 0;
+
+	if (!has_aux(event))
+		return -ENOTSUPP;
+
+	if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
+		/*
+		 * We need to start with the max_order that fits in nr_pages,
+		 * not the other way around, hence ilog2() and not get_order.
+		 */
+		max_order = ilog2(nr_pages);
+
+		/*
+		 * PMU requests more than one contiguous chunks of memory
+		 * for SW double buffering
+		 */
+		if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) &&
+		    !overwrite) {
+			if (!max_order)
+				return -EINVAL;
+
+			max_order--;
+		}
+	}
+
+	rb->aux_pages = kzalloc_node(nr_pages * sizeof(void *), GFP_KERNEL, node);
+	if (!rb->aux_pages)
+		return -ENOMEM;
+
+	rb->free_aux = event->pmu->free_aux;
+	for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) {
+		struct page *page;
+		int last, order;
+
+		order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages));
+		page = rb_alloc_aux_page(node, order);
+		if (!page)
+			goto out;
+
+		for (last = rb->aux_nr_pages + (1 << page_private(page));
+		     last > rb->aux_nr_pages; rb->aux_nr_pages++)
+			rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
+	}
+
+	rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
+					     overwrite);
+	if (!rb->aux_priv)
+		goto out;
+
+	ret = 0;
+
+	/*
+	 * aux_pages (and pmu driver's private data, aux_priv) will be
+	 * referenced in both producer's and consumer's contexts, thus
+	 * we keep a refcount here to make sure either of the two can
+	 * reference them safely.
+	 */
+	atomic_set(&rb->aux_refcount, 1);
+
+	rb->aux_overwrite = overwrite;
+	rb->aux_watermark = watermark;
+
+	if (!rb->aux_watermark && !rb->aux_overwrite)
+		rb->aux_watermark = nr_pages << (PAGE_SHIFT - 1);
+
+out:
+	if (!ret)
+		rb->aux_pgoff = pgoff;
+	else
+		rb_free_aux(rb);
+
+	return ret;
+}
+
+static void __rb_free_aux(struct ring_buffer *rb)
+{
+	int pg;
+
+	if (rb->aux_priv) {
+		rb->free_aux(rb->aux_priv);
+		rb->free_aux = NULL;
+		rb->aux_priv = NULL;
+	}
+
+	for (pg = 0; pg < rb->aux_nr_pages; pg++)
+		rb_free_aux_page(rb, pg);
+
+	kfree(rb->aux_pages);
+	rb->aux_nr_pages = 0;
+}
+
+void rb_free_aux(struct ring_buffer *rb)
+{
+	if (atomic_dec_and_test(&rb->aux_refcount))
+		__rb_free_aux(rb);
+}
+
 #ifndef CONFIG_PERF_USE_VMALLOC
 
 /*
  * Back perf_mmap() with regular GFP_KERNEL-0 pages.
  */
 
-struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+static struct page *
+__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
 {
 	if (pgoff > rb->nr_pages)
 		return NULL;
@@ -340,8 +643,8 @@
 	return rb->nr_pages << page_order(rb);
 }
 
-struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+static struct page *
+__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
 {
 	/* The '>' counts in the user page. */
 	if (pgoff > data_page_nr(rb))
@@ -416,3 +719,19 @@
 }
 
 #endif
+
+struct page *
+perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+{
+	if (rb->aux_nr_pages) {
+		/* above AUX space */
+		if (pgoff > rb->aux_pgoff + rb->aux_nr_pages)
+			return NULL;
+
+		/* AUX space */
+		if (pgoff >= rb->aux_pgoff)
+			return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]);
+	}
+
+	return __perf_mmap_to_page(rb, pgoff);
+}
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 3f9f1d6..284e269 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -335,32 +335,20 @@
 	rcu_read_unlock();
 }
 
-static int klp_disable_func(struct klp_func *func)
+static void klp_disable_func(struct klp_func *func)
 {
 	struct klp_ops *ops;
-	int ret;
 
-	if (WARN_ON(func->state != KLP_ENABLED))
-		return -EINVAL;
-
-	if (WARN_ON(!func->old_addr))
-		return -EINVAL;
+	WARN_ON(func->state != KLP_ENABLED);
+	WARN_ON(!func->old_addr);
 
 	ops = klp_find_ops(func->old_addr);
 	if (WARN_ON(!ops))
-		return -EINVAL;
+		return;
 
 	if (list_is_singular(&ops->func_stack)) {
-		ret = unregister_ftrace_function(&ops->fops);
-		if (ret) {
-			pr_err("failed to unregister ftrace handler for function '%s' (%d)\n",
-			       func->old_name, ret);
-			return ret;
-		}
-
-		ret = ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0);
-		if (ret)
-			pr_warn("function unregister succeeded but failed to clear the filter\n");
+		WARN_ON(unregister_ftrace_function(&ops->fops));
+		WARN_ON(ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0));
 
 		list_del_rcu(&func->stack_node);
 		list_del(&ops->node);
@@ -370,8 +358,6 @@
 	}
 
 	func->state = KLP_DISABLED;
-
-	return 0;
 }
 
 static int klp_enable_func(struct klp_func *func)
@@ -432,23 +418,15 @@
 	return ret;
 }
 
-static int klp_disable_object(struct klp_object *obj)
+static void klp_disable_object(struct klp_object *obj)
 {
 	struct klp_func *func;
-	int ret;
 
-	for (func = obj->funcs; func->old_name; func++) {
-		if (func->state != KLP_ENABLED)
-			continue;
-
-		ret = klp_disable_func(func);
-		if (ret)
-			return ret;
-	}
+	for (func = obj->funcs; func->old_name; func++)
+		if (func->state == KLP_ENABLED)
+			klp_disable_func(func);
 
 	obj->state = KLP_DISABLED;
-
-	return 0;
 }
 
 static int klp_enable_object(struct klp_object *obj)
@@ -464,22 +442,19 @@
 
 	for (func = obj->funcs; func->old_name; func++) {
 		ret = klp_enable_func(func);
-		if (ret)
-			goto unregister;
+		if (ret) {
+			klp_disable_object(obj);
+			return ret;
+		}
 	}
 	obj->state = KLP_ENABLED;
 
 	return 0;
-
-unregister:
-	WARN_ON(klp_disable_object(obj));
-	return ret;
 }
 
 static int __klp_disable_patch(struct klp_patch *patch)
 {
 	struct klp_object *obj;
-	int ret;
 
 	/* enforce stacking: only the last enabled patch can be disabled */
 	if (!list_is_last(&patch->list, &klp_patches) &&
@@ -489,12 +464,8 @@
 	pr_notice("disabling patch '%s'\n", patch->mod->name);
 
 	for (obj = patch->objs; obj->funcs; obj++) {
-		if (obj->state != KLP_ENABLED)
-			continue;
-
-		ret = klp_disable_object(obj);
-		if (ret)
-			return ret;
+		if (obj->state == KLP_ENABLED)
+			klp_disable_object(obj);
 	}
 
 	patch->state = KLP_DISABLED;
@@ -553,8 +524,6 @@
 	pr_notice("enabling patch '%s'\n", patch->mod->name);
 
 	for (obj = patch->objs; obj->funcs; obj++) {
-		klp_find_object_module(obj);
-
 		if (!klp_is_object_loaded(obj))
 			continue;
 
@@ -945,7 +914,6 @@
 {
 	struct module *pmod = patch->mod;
 	struct module *mod = obj->mod;
-	int ret;
 
 	if (patch->state == KLP_DISABLED)
 		goto disabled;
@@ -953,10 +921,7 @@
 	pr_notice("reverting patch '%s' on unloading module '%s'\n",
 		  pmod->name, mod->name);
 
-	ret = klp_disable_object(obj);
-	if (ret)
-		pr_warn("failed to revert patch '%s' on module '%s' (%d)\n",
-			pmod->name, mod->name, ret);
+	klp_disable_object(obj);
 
 disabled:
 	klp_free_object_loaded(obj);
diff --git a/kernel/module.c b/kernel/module.c
index ec53f59..650b038 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2770,6 +2770,9 @@
 	mod->trace_events = section_objs(info, "_ftrace_events",
 					 sizeof(*mod->trace_events),
 					 &mod->num_trace_events);
+	mod->trace_enums = section_objs(info, "_ftrace_enum_map",
+					sizeof(*mod->trace_enums),
+					&mod->num_trace_enums);
 #endif
 #ifdef CONFIG_TRACING
 	mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index bb0635b..879edfc 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -32,7 +32,6 @@
 #include <linux/security.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#include <linux/aio.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
 #include <linux/kdb.h>
@@ -46,6 +45,7 @@
 #include <linux/irq_work.h>
 #include <linux/utsname.h>
 #include <linux/ctype.h>
+#include <linux/uio.h>
 
 #include <asm/uaccess.h>
 
@@ -521,7 +521,7 @@
 	int i;
 	int level = default_message_loglevel;
 	int facility = 1;	/* LOG_USER */
-	size_t len = iocb->ki_nbytes;
+	size_t len = iov_iter_count(from);
 	ssize_t ret = len;
 
 	if (len > LOG_LINE_MAX)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 30d42aa..8dbe276 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -853,6 +853,8 @@
 static int
 rcu_torture_writer(void *arg)
 {
+	bool can_expedite = !rcu_gp_is_expedited();
+	int expediting = 0;
 	unsigned long gp_snap;
 	bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
 	bool gp_sync1 = gp_sync;
@@ -865,9 +867,15 @@
 	int nsynctypes = 0;
 
 	VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
+	pr_alert("%s" TORTURE_FLAG
+		 " Grace periods expedited from boot/sysfs for %s,\n",
+		 torture_type, cur_ops->name);
+	pr_alert("%s" TORTURE_FLAG
+		 " Testing of dynamic grace-period expediting diabled.\n",
+		 torture_type);
 
 	/* Initialize synctype[] array.  If none set, take default. */
-	if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync)
+	if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
 		gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
 	if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync)
 		synctype[nsynctypes++] = RTWS_COND_GET;
@@ -949,9 +957,26 @@
 			}
 		}
 		rcutorture_record_progress(++rcu_torture_current_version);
+		/* Cycle through nesting levels of rcu_expedite_gp() calls. */
+		if (can_expedite &&
+		    !(torture_random(&rand) & 0xff & (!!expediting - 1))) {
+			WARN_ON_ONCE(expediting == 0 && rcu_gp_is_expedited());
+			if (expediting >= 0)
+				rcu_expedite_gp();
+			else
+				rcu_unexpedite_gp();
+			if (++expediting > 3)
+				expediting = -expediting;
+		}
 		rcu_torture_writer_state = RTWS_STUTTER;
 		stutter_wait("rcu_torture_writer");
 	} while (!torture_must_stop());
+	/* Reset expediting back to unexpedited. */
+	if (expediting > 0)
+		expediting = -expediting;
+	while (can_expedite && expediting++ < 0)
+		rcu_unexpedite_gp();
+	WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited());
 	rcu_torture_writer_state = RTWS_STOPPING;
 	torture_kthread_stopping("rcu_torture_writer");
 	return 0;
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 445bf8f..cad76e7 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -402,23 +402,6 @@
 }
 EXPORT_SYMBOL_GPL(call_srcu);
 
-struct rcu_synchronize {
-	struct rcu_head head;
-	struct completion completion;
-};
-
-/*
- * Awaken the corresponding synchronize_srcu() instance now that a
- * grace period has elapsed.
- */
-static void wakeme_after_rcu(struct rcu_head *head)
-{
-	struct rcu_synchronize *rcu;
-
-	rcu = container_of(head, struct rcu_synchronize, head);
-	complete(&rcu->completion);
-}
-
 static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
 static void srcu_reschedule(struct srcu_struct *sp);
 
@@ -507,7 +490,7 @@
  */
 void synchronize_srcu(struct srcu_struct *sp)
 {
-	__synchronize_srcu(sp, rcu_expedited
+	__synchronize_srcu(sp, rcu_gp_is_expedited()
 			   ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
 			   : SYNCHRONIZE_SRCU_TRYCOUNT);
 }
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index cc9ceca..069742d 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -103,8 +103,7 @@
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
 	RCU_TRACE(reset_cpu_stall_ticks(rcp));
-	if (rcp->rcucblist != NULL &&
-	    rcp->donetail != rcp->curtail) {
+	if (rcp->donetail != rcp->curtail) {
 		rcp->donetail = rcp->curtail;
 		return 1;
 	}
@@ -169,17 +168,6 @@
 	unsigned long flags;
 	RCU_TRACE(int cb_count = 0);
 
-	/* If no RCU callbacks ready to invoke, just return. */
-	if (&rcp->rcucblist == rcp->donetail) {
-		RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
-		RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
-					      !!ACCESS_ONCE(rcp->rcucblist),
-					      need_resched(),
-					      is_idle_task(current),
-					      false));
-		return;
-	}
-
 	/* Move the ready-to-invoke callbacks to a local list. */
 	local_irq_save(flags);
 	RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 48d640c..233165d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -91,8 +91,10 @@
 
 #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
 DEFINE_RCU_TPS(sname) \
+DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \
 struct rcu_state sname##_state = { \
 	.level = { &sname##_state.node[0] }, \
+	.rda = &sname##_data, \
 	.call = cr, \
 	.fqs_state = RCU_GP_IDLE, \
 	.gpnum = 0UL - 300UL, \
@@ -101,11 +103,9 @@
 	.orphan_nxttail = &sname##_state.orphan_nxtlist, \
 	.orphan_donetail = &sname##_state.orphan_donelist, \
 	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
-	.onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
 	.name = RCU_STATE_NAME(sname), \
 	.abbr = sabbr, \
-}; \
-DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data)
+}
 
 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
 RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
@@ -152,6 +152,8 @@
  */
 static int rcu_scheduler_fully_active __read_mostly;
 
+static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
+static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
 static void invoke_rcu_core(void);
 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
@@ -160,6 +162,12 @@
 static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
 module_param(kthread_prio, int, 0644);
 
+/* Delay in jiffies for grace-period initialization delays. */
+static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT)
+				? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY
+				: 0;
+module_param(gp_init_delay, int, 0644);
+
 /*
  * Track the rcutorture test sequence number and the update version
  * number within a given test.  The rcutorture_testseq is incremented
@@ -173,6 +181,17 @@
 unsigned long rcutorture_vernum;
 
 /*
+ * Compute the mask of online CPUs for the specified rcu_node structure.
+ * This will not be stable unless the rcu_node structure's ->lock is
+ * held, but the bit corresponding to the current CPU will be stable
+ * in most contexts.
+ */
+unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
+{
+	return ACCESS_ONCE(rnp->qsmaskinitnext);
+}
+
+/*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
  * structure's ->lock, but of course results can be subject to change.
@@ -292,10 +311,10 @@
 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 /*
- * Register a quiesecent state for all RCU flavors.  If there is an
+ * Register a quiescent state for all RCU flavors.  If there is an
  * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
  * dyntick-idle quiescent state visible to other CPUs (but only for those
- * RCU flavors in desparate need of a quiescent state, which will normally
+ * RCU flavors in desperate need of a quiescent state, which will normally
  * be none of them).  Either way, do a lightweight quiescent state for
  * all RCU flavors.
  */
@@ -410,6 +429,15 @@
 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
 
 /*
+ * Force a quiescent state for RCU-sched.
+ */
+void rcu_sched_force_quiescent_state(void)
+{
+	force_quiescent_state(&rcu_sched_state);
+}
+EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
+
+/*
  * Show the state of the grace-period kthreads.
  */
 void show_rcu_gp_kthreads(void)
@@ -483,15 +511,6 @@
 EXPORT_SYMBOL_GPL(rcutorture_record_progress);
 
 /*
- * Force a quiescent state for RCU-sched.
- */
-void rcu_sched_force_quiescent_state(void)
-{
-	force_quiescent_state(&rcu_sched_state);
-}
-EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
-
-/*
  * Does the CPU have callbacks ready to be invoked?
  */
 static int
@@ -954,7 +973,7 @@
 	preempt_disable();
 	rdp = this_cpu_ptr(&rcu_sched_data);
 	rnp = rdp->mynode;
-	ret = (rdp->grpmask & rnp->qsmaskinit) ||
+	ret = (rdp->grpmask & rcu_rnp_online_cpus(rnp)) ||
 	      !rcu_scheduler_fully_active;
 	preempt_enable();
 	return ret;
@@ -1196,9 +1215,10 @@
 		} else {
 			j = jiffies;
 			gpa = ACCESS_ONCE(rsp->gp_activity);
-			pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n",
+			pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
 			       rsp->name, j - gpa, j, gpa,
-			       jiffies_till_next_fqs);
+			       jiffies_till_next_fqs,
+			       rcu_get_root(rsp)->qsmask);
 			/* In this case, the current CPU might be at fault. */
 			sched_show_task(current);
 		}
@@ -1328,17 +1348,27 @@
 }
 
 /*
+ * Initialize the specified rcu_data structure's default callback list
+ * to empty.  The default callback list is the one that is not used by
+ * no-callbacks CPUs.
+ */
+static void init_default_callback_list(struct rcu_data *rdp)
+{
+	int i;
+
+	rdp->nxtlist = NULL;
+	for (i = 0; i < RCU_NEXT_SIZE; i++)
+		rdp->nxttail[i] = &rdp->nxtlist;
+}
+
+/*
  * Initialize the specified rcu_data structure's callback list to empty.
  */
 static void init_callback_list(struct rcu_data *rdp)
 {
-	int i;
-
 	if (init_nocb_callback_list(rdp))
 		return;
-	rdp->nxtlist = NULL;
-	for (i = 0; i < RCU_NEXT_SIZE; i++)
-		rdp->nxttail[i] = &rdp->nxtlist;
+	init_default_callback_list(rdp);
 }
 
 /*
@@ -1703,11 +1733,11 @@
  */
 static int rcu_gp_init(struct rcu_state *rsp)
 {
+	unsigned long oldmask;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp = rcu_get_root(rsp);
 
 	ACCESS_ONCE(rsp->gp_activity) = jiffies;
-	rcu_bind_gp_kthread();
 	raw_spin_lock_irq(&rnp->lock);
 	smp_mb__after_unlock_lock();
 	if (!ACCESS_ONCE(rsp->gp_flags)) {
@@ -1733,9 +1763,54 @@
 	trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
 	raw_spin_unlock_irq(&rnp->lock);
 
-	/* Exclude any concurrent CPU-hotplug operations. */
-	mutex_lock(&rsp->onoff_mutex);
-	smp_mb__after_unlock_lock(); /* ->gpnum increment before GP! */
+	/*
+	 * Apply per-leaf buffered online and offline operations to the
+	 * rcu_node tree.  Note that this new grace period need not wait
+	 * for subsequent online CPUs, and that quiescent-state forcing
+	 * will handle subsequent offline CPUs.
+	 */
+	rcu_for_each_leaf_node(rsp, rnp) {
+		raw_spin_lock_irq(&rnp->lock);
+		smp_mb__after_unlock_lock();
+		if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
+		    !rnp->wait_blkd_tasks) {
+			/* Nothing to do on this leaf rcu_node structure. */
+			raw_spin_unlock_irq(&rnp->lock);
+			continue;
+		}
+
+		/* Record old state, apply changes to ->qsmaskinit field. */
+		oldmask = rnp->qsmaskinit;
+		rnp->qsmaskinit = rnp->qsmaskinitnext;
+
+		/* If zero-ness of ->qsmaskinit changed, propagate up tree. */
+		if (!oldmask != !rnp->qsmaskinit) {
+			if (!oldmask) /* First online CPU for this rcu_node. */
+				rcu_init_new_rnp(rnp);
+			else if (rcu_preempt_has_tasks(rnp)) /* blocked tasks */
+				rnp->wait_blkd_tasks = true;
+			else /* Last offline CPU and can propagate. */
+				rcu_cleanup_dead_rnp(rnp);
+		}
+
+		/*
+		 * If all waited-on tasks from prior grace period are
+		 * done, and if all this rcu_node structure's CPUs are
+		 * still offline, propagate up the rcu_node tree and
+		 * clear ->wait_blkd_tasks.  Otherwise, if one of this
+		 * rcu_node structure's CPUs has since come back online,
+		 * simply clear ->wait_blkd_tasks (but rcu_cleanup_dead_rnp()
+		 * checks for this, so just call it unconditionally).
+		 */
+		if (rnp->wait_blkd_tasks &&
+		    (!rcu_preempt_has_tasks(rnp) ||
+		     rnp->qsmaskinit)) {
+			rnp->wait_blkd_tasks = false;
+			rcu_cleanup_dead_rnp(rnp);
+		}
+
+		raw_spin_unlock_irq(&rnp->lock);
+	}
 
 	/*
 	 * Set the quiescent-state-needed bits in all the rcu_node
@@ -1757,8 +1832,8 @@
 		rcu_preempt_check_blocked_tasks(rnp);
 		rnp->qsmask = rnp->qsmaskinit;
 		ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
-		WARN_ON_ONCE(rnp->completed != rsp->completed);
-		ACCESS_ONCE(rnp->completed) = rsp->completed;
+		if (WARN_ON_ONCE(rnp->completed != rsp->completed))
+			ACCESS_ONCE(rnp->completed) = rsp->completed;
 		if (rnp == rdp->mynode)
 			(void)__note_gp_changes(rsp, rnp, rdp);
 		rcu_preempt_boost_start_gp(rnp);
@@ -1768,9 +1843,12 @@
 		raw_spin_unlock_irq(&rnp->lock);
 		cond_resched_rcu_qs();
 		ACCESS_ONCE(rsp->gp_activity) = jiffies;
+		if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) &&
+		    gp_init_delay > 0 &&
+		    !(rsp->gpnum % (rcu_num_nodes * 10)))
+			schedule_timeout_uninterruptible(gp_init_delay);
 	}
 
-	mutex_unlock(&rsp->onoff_mutex);
 	return 1;
 }
 
@@ -1798,7 +1876,7 @@
 		fqs_state = RCU_FORCE_QS;
 	} else {
 		/* Handle dyntick-idle and offline CPUs. */
-		isidle = false;
+		isidle = true;
 		force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
 	}
 	/* Clear flag to prevent immediate re-entry. */
@@ -1852,6 +1930,8 @@
 	rcu_for_each_node_breadth_first(rsp, rnp) {
 		raw_spin_lock_irq(&rnp->lock);
 		smp_mb__after_unlock_lock();
+		WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
+		WARN_ON_ONCE(rnp->qsmask);
 		ACCESS_ONCE(rnp->completed) = rsp->gpnum;
 		rdp = this_cpu_ptr(rsp->rda);
 		if (rnp == rdp->mynode)
@@ -1895,6 +1975,7 @@
 	struct rcu_state *rsp = arg;
 	struct rcu_node *rnp = rcu_get_root(rsp);
 
+	rcu_bind_gp_kthread();
 	for (;;) {
 
 		/* Handle grace-period start. */
@@ -2062,25 +2143,32 @@
  * Similar to rcu_report_qs_rdp(), for which it is a helper function.
  * Allows quiescent states for a group of CPUs to be reported at one go
  * to the specified rcu_node structure, though all the CPUs in the group
- * must be represented by the same rcu_node structure (which need not be
- * a leaf rcu_node structure, though it often will be).  That structure's
- * lock must be held upon entry, and it is released before return.
+ * must be represented by the same rcu_node structure (which need not be a
+ * leaf rcu_node structure, though it often will be).  The gps parameter
+ * is the grace-period snapshot, which means that the quiescent states
+ * are valid only if rnp->gpnum is equal to gps.  That structure's lock
+ * must be held upon entry, and it is released before return.
  */
 static void
 rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
-		  struct rcu_node *rnp, unsigned long flags)
+		  struct rcu_node *rnp, unsigned long gps, unsigned long flags)
 	__releases(rnp->lock)
 {
+	unsigned long oldmask = 0;
 	struct rcu_node *rnp_c;
 
 	/* Walk up the rcu_node hierarchy. */
 	for (;;) {
-		if (!(rnp->qsmask & mask)) {
+		if (!(rnp->qsmask & mask) || rnp->gpnum != gps) {
 
-			/* Our bit has already been cleared, so done. */
+			/*
+			 * Our bit has already been cleared, or the
+			 * relevant grace period is already over, so done.
+			 */
 			raw_spin_unlock_irqrestore(&rnp->lock, flags);
 			return;
 		}
+		WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
 		rnp->qsmask &= ~mask;
 		trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
 						 mask, rnp->qsmask, rnp->level,
@@ -2104,7 +2192,7 @@
 		rnp = rnp->parent;
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		smp_mb__after_unlock_lock();
-		WARN_ON_ONCE(rnp_c->qsmask);
+		oldmask = rnp_c->qsmask;
 	}
 
 	/*
@@ -2116,6 +2204,46 @@
 }
 
 /*
+ * Record a quiescent state for all tasks that were previously queued
+ * on the specified rcu_node structure and that were blocking the current
+ * RCU grace period.  The caller must hold the specified rnp->lock with
+ * irqs disabled, and this lock is released upon return, but irqs remain
+ * disabled.
+ */
+static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
+				      struct rcu_node *rnp, unsigned long flags)
+	__releases(rnp->lock)
+{
+	unsigned long gps;
+	unsigned long mask;
+	struct rcu_node *rnp_p;
+
+	if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
+	    rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		return;  /* Still need more quiescent states! */
+	}
+
+	rnp_p = rnp->parent;
+	if (rnp_p == NULL) {
+		/*
+		 * Only one rcu_node structure in the tree, so don't
+		 * try to report up to its nonexistent parent!
+		 */
+		rcu_report_qs_rsp(rsp, flags);
+		return;
+	}
+
+	/* Report up the rest of the hierarchy, tracking current ->gpnum. */
+	gps = rnp->gpnum;
+	mask = rnp->grpmask;
+	raw_spin_unlock(&rnp->lock);	/* irqs remain disabled. */
+	raw_spin_lock(&rnp_p->lock);	/* irqs already disabled. */
+	smp_mb__after_unlock_lock();
+	rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
+}
+
+/*
  * Record a quiescent state for the specified CPU to that CPU's rcu_data
  * structure.  This must be either called from the specified CPU, or
  * called when the specified CPU is known to be offline (and when it is
@@ -2163,7 +2291,8 @@
 		 */
 		needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
 
-		rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
+		rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags);
+		/* ^^^ Released rnp->lock */
 		if (needwake)
 			rcu_gp_kthread_wake(rsp);
 	}
@@ -2256,8 +2385,12 @@
 		rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
 	}
 
-	/* Finally, initialize the rcu_data structure's list to empty.  */
+	/*
+	 * Finally, initialize the rcu_data structure's list to empty and
+	 * disallow further callbacks on this CPU.
+	 */
 	init_callback_list(rdp);
+	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
 }
 
 /*
@@ -2355,6 +2488,7 @@
 		raw_spin_lock(&rnp->lock); /* irqs already disabled. */
 		smp_mb__after_unlock_lock(); /* GP memory ordering. */
 		rnp->qsmaskinit &= ~mask;
+		rnp->qsmask &= ~mask;
 		if (rnp->qsmaskinit) {
 			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 			return;
@@ -2364,6 +2498,26 @@
 }
 
 /*
+ * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
+ * function.  We now remove it from the rcu_node tree's ->qsmaskinit
+ * bit masks.
+ */
+static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
+{
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
+
+	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
+	mask = rdp->grpmask;
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	smp_mb__after_unlock_lock();	/* Enforce GP memory-order guarantee. */
+	rnp->qsmaskinitnext &= ~mask;
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
  * The CPU has been completely removed, and some other CPU is reporting
  * this fact from process context.  Do the remainder of the cleanup,
  * including orphaning the outgoing CPU's RCU callbacks, and also
@@ -2379,29 +2533,15 @@
 	/* Adjust any no-longer-needed kthreads. */
 	rcu_boost_kthread_setaffinity(rnp, -1);
 
-	/* Exclude any attempts to start a new grace period. */
-	mutex_lock(&rsp->onoff_mutex);
-	raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
-
 	/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
+	raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
 	rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
 	rcu_adopt_orphan_cbs(rsp, flags);
 	raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
 
-	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
-	raw_spin_lock_irqsave(&rnp->lock, flags);
-	smp_mb__after_unlock_lock();	/* Enforce GP memory-order guarantee. */
-	rnp->qsmaskinit &= ~rdp->grpmask;
-	if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp))
-		rcu_cleanup_dead_rnp(rnp);
-	rcu_report_qs_rnp(rdp->grpmask, rsp, rnp, flags); /* Rlses rnp->lock. */
 	WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
 		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
 		  cpu, rdp->qlen, rdp->nxtlist);
-	init_callback_list(rdp);
-	/* Disallow further callbacks on this CPU. */
-	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
-	mutex_unlock(&rsp->onoff_mutex);
 }
 
 #else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -2414,6 +2554,10 @@
 {
 }
 
+static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
+{
+}
+
 static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 {
 }
@@ -2589,26 +2733,47 @@
 			return;
 		}
 		if (rnp->qsmask == 0) {
-			rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
-			continue;
+			if (rcu_state_p == &rcu_sched_state ||
+			    rsp != rcu_state_p ||
+			    rcu_preempt_blocked_readers_cgp(rnp)) {
+				/*
+				 * No point in scanning bits because they
+				 * are all zero.  But we might need to
+				 * priority-boost blocked readers.
+				 */
+				rcu_initiate_boost(rnp, flags);
+				/* rcu_initiate_boost() releases rnp->lock */
+				continue;
+			}
+			if (rnp->parent &&
+			    (rnp->parent->qsmask & rnp->grpmask)) {
+				/*
+				 * Race between grace-period
+				 * initialization and task exiting RCU
+				 * read-side critical section: Report.
+				 */
+				rcu_report_unblock_qs_rnp(rsp, rnp, flags);
+				/* rcu_report_unblock_qs_rnp() rlses ->lock */
+				continue;
+			}
 		}
 		cpu = rnp->grplo;
 		bit = 1;
 		for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
 			if ((rnp->qsmask & bit) != 0) {
-				if ((rnp->qsmaskinit & bit) != 0)
-					*isidle = false;
+				if ((rnp->qsmaskinit & bit) == 0)
+					*isidle = false; /* Pending hotplug. */
 				if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
 					mask |= bit;
 			}
 		}
 		if (mask != 0) {
-
-			/* rcu_report_qs_rnp() releases rnp->lock. */
-			rcu_report_qs_rnp(mask, rsp, rnp, flags);
-			continue;
+			/* Idle/offline CPUs, report (releases rnp->lock. */
+			rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags);
+		} else {
+			/* Nothing to do here, so just drop the lock. */
+			raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		}
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	}
 }
 
@@ -2741,7 +2906,7 @@
 	 * If called from an extended quiescent state, invoke the RCU
 	 * core in order to force a re-evaluation of RCU's idleness.
 	 */
-	if (!rcu_is_watching() && cpu_online(smp_processor_id()))
+	if (!rcu_is_watching())
 		invoke_rcu_core();
 
 	/* If interrupts were disabled or CPU offline, don't invoke RCU core. */
@@ -2827,11 +2992,22 @@
 
 		if (cpu != -1)
 			rdp = per_cpu_ptr(rsp->rda, cpu);
-		offline = !__call_rcu_nocb(rdp, head, lazy, flags);
-		WARN_ON_ONCE(offline);
-		/* _call_rcu() is illegal on offline CPU; leak the callback. */
-		local_irq_restore(flags);
-		return;
+		if (likely(rdp->mynode)) {
+			/* Post-boot, so this should be for a no-CBs CPU. */
+			offline = !__call_rcu_nocb(rdp, head, lazy, flags);
+			WARN_ON_ONCE(offline);
+			/* Offline CPU, _call_rcu() illegal, leak callback.  */
+			local_irq_restore(flags);
+			return;
+		}
+		/*
+		 * Very early boot, before rcu_init().  Initialize if needed
+		 * and then drop through to queue the callback.
+		 */
+		BUG_ON(cpu != -1);
+		WARN_ON_ONCE(!rcu_is_watching());
+		if (!likely(rdp->nxtlist))
+			init_default_callback_list(rdp);
 	}
 	ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1;
 	if (lazy)
@@ -2954,7 +3130,7 @@
 			   "Illegal synchronize_sched() in RCU-sched read-side critical section");
 	if (rcu_blocking_is_gp())
 		return;
-	if (rcu_expedited)
+	if (rcu_gp_is_expedited())
 		synchronize_sched_expedited();
 	else
 		wait_rcu_gp(call_rcu_sched);
@@ -2981,7 +3157,7 @@
 			   "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
 	if (rcu_blocking_is_gp())
 		return;
-	if (rcu_expedited)
+	if (rcu_gp_is_expedited())
 		synchronize_rcu_bh_expedited();
 	else
 		wait_rcu_gp(call_rcu_bh);
@@ -3518,6 +3694,28 @@
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
 /*
+ * Propagate ->qsinitmask bits up the rcu_node tree to account for the
+ * first CPU in a given leaf rcu_node structure coming online.  The caller
+ * must hold the corresponding leaf rcu_node ->lock with interrrupts
+ * disabled.
+ */
+static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
+{
+	long mask;
+	struct rcu_node *rnp = rnp_leaf;
+
+	for (;;) {
+		mask = rnp->grpmask;
+		rnp = rnp->parent;
+		if (rnp == NULL)
+			return;
+		raw_spin_lock(&rnp->lock); /* Interrupts already disabled. */
+		rnp->qsmaskinit |= mask;
+		raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
+	}
+}
+
+/*
  * Do boot-time initialization of a CPU's per-CPU RCU data.
  */
 static void __init
@@ -3553,49 +3751,37 @@
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_node *rnp = rcu_get_root(rsp);
 
-	/* Exclude new grace periods. */
-	mutex_lock(&rsp->onoff_mutex);
-
 	/* Set up local state, ensuring consistent view of global state. */
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	rdp->beenonline = 1;	 /* We have now been online. */
 	rdp->qlen_last_fqs_check = 0;
 	rdp->n_force_qs_snap = rsp->n_force_qs;
 	rdp->blimit = blimit;
-	init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
+	if (!rdp->nxtlist)
+		init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
 	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
 	rcu_sysidle_init_percpu_data(rdp->dynticks);
 	atomic_set(&rdp->dynticks->dynticks,
 		   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
 	raw_spin_unlock(&rnp->lock);		/* irqs remain disabled. */
 
-	/* Add CPU to rcu_node bitmasks. */
+	/*
+	 * Add CPU to leaf rcu_node pending-online bitmask.  Any needed
+	 * propagation up the rcu_node tree will happen at the beginning
+	 * of the next grace period.
+	 */
 	rnp = rdp->mynode;
 	mask = rdp->grpmask;
-	do {
-		/* Exclude any attempts to start a new GP on small systems. */
-		raw_spin_lock(&rnp->lock);	/* irqs already disabled. */
-		rnp->qsmaskinit |= mask;
-		mask = rnp->grpmask;
-		if (rnp == rdp->mynode) {
-			/*
-			 * If there is a grace period in progress, we will
-			 * set up to wait for it next time we run the
-			 * RCU core code.
-			 */
-			rdp->gpnum = rnp->completed;
-			rdp->completed = rnp->completed;
-			rdp->passed_quiesce = 0;
-			rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
-			rdp->qs_pending = 0;
-			trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
-		}
-		raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
-		rnp = rnp->parent;
-	} while (rnp != NULL && !(rnp->qsmaskinit & mask));
-	local_irq_restore(flags);
-
-	mutex_unlock(&rsp->onoff_mutex);
+	raw_spin_lock(&rnp->lock);		/* irqs already disabled. */
+	smp_mb__after_unlock_lock();
+	rnp->qsmaskinitnext |= mask;
+	rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
+	rdp->completed = rnp->completed;
+	rdp->passed_quiesce = false;
+	rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+	rdp->qs_pending = false;
+	trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
 
 static void rcu_prepare_cpu(int cpu)
@@ -3609,15 +3795,14 @@
 /*
  * Handle CPU online/offline notification events.
  */
-static int rcu_cpu_notify(struct notifier_block *self,
-				    unsigned long action, void *hcpu)
+int rcu_cpu_notify(struct notifier_block *self,
+		   unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
 	struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
 	struct rcu_node *rnp = rdp->mynode;
 	struct rcu_state *rsp;
 
-	trace_rcu_utilization(TPS("Start CPU hotplug"));
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
@@ -3637,6 +3822,11 @@
 		for_each_rcu_flavor(rsp)
 			rcu_cleanup_dying_cpu(rsp);
 		break;
+	case CPU_DYING_IDLE:
+		for_each_rcu_flavor(rsp) {
+			rcu_cleanup_dying_idle_cpu(cpu, rsp);
+		}
+		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 	case CPU_UP_CANCELED:
@@ -3649,7 +3839,6 @@
 	default:
 		break;
 	}
-	trace_rcu_utilization(TPS("End CPU hotplug"));
 	return NOTIFY_OK;
 }
 
@@ -3660,11 +3849,12 @@
 	case PM_HIBERNATION_PREPARE:
 	case PM_SUSPEND_PREPARE:
 		if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
-			rcu_expedited = 1;
+			rcu_expedite_gp();
 		break;
 	case PM_POST_HIBERNATION:
 	case PM_POST_SUSPEND:
-		rcu_expedited = 0;
+		if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
+			rcu_unexpedite_gp();
 		break;
 	default:
 		break;
@@ -3734,30 +3924,26 @@
  * Compute the per-level fanout, either using the exact fanout specified
  * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
  */
-#ifdef CONFIG_RCU_FANOUT_EXACT
 static void __init rcu_init_levelspread(struct rcu_state *rsp)
 {
 	int i;
 
-	rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
-	for (i = rcu_num_lvls - 2; i >= 0; i--)
-		rsp->levelspread[i] = CONFIG_RCU_FANOUT;
-}
-#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
-static void __init rcu_init_levelspread(struct rcu_state *rsp)
-{
-	int ccur;
-	int cprv;
-	int i;
+	if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) {
+		rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
+		for (i = rcu_num_lvls - 2; i >= 0; i--)
+			rsp->levelspread[i] = CONFIG_RCU_FANOUT;
+	} else {
+		int ccur;
+		int cprv;
 
-	cprv = nr_cpu_ids;
-	for (i = rcu_num_lvls - 1; i >= 0; i--) {
-		ccur = rsp->levelcnt[i];
-		rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
-		cprv = ccur;
+		cprv = nr_cpu_ids;
+		for (i = rcu_num_lvls - 1; i >= 0; i--) {
+			ccur = rsp->levelcnt[i];
+			rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
+			cprv = ccur;
+		}
 	}
 }
-#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
 
 /*
  * Helper function for rcu_init() that initializes one rcu_state structure.
@@ -3833,7 +4019,6 @@
 		}
 	}
 
-	rsp->rda = rda;
 	init_waitqueue_head(&rsp->gp_wq);
 	rnp = rsp->level[rcu_num_lvls - 1];
 	for_each_possible_cpu(i) {
@@ -3926,6 +4111,8 @@
 {
 	int cpu;
 
+	rcu_early_boot_tests();
+
 	rcu_bootup_announce();
 	rcu_init_geometry();
 	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
@@ -3942,8 +4129,6 @@
 	pm_notifier(rcu_pm_notify, 0);
 	for_each_online_cpu(cpu)
 		rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
-
-	rcu_early_boot_tests();
 }
 
 #include "tree_plugin.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 119de39..a69d3da 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -141,12 +141,20 @@
 				/*  complete (only for PREEMPT_RCU). */
 	unsigned long qsmaskinit;
 				/* Per-GP initial value for qsmask & expmask. */
+				/*  Initialized from ->qsmaskinitnext at the */
+				/*  beginning of each grace period. */
+	unsigned long qsmaskinitnext;
+				/* Online CPUs for next grace period. */
 	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
 				/*  Only one bit will be set in this mask. */
 	int	grplo;		/* lowest-numbered CPU or group here. */
 	int	grphi;		/* highest-numbered CPU or group here. */
 	u8	grpnum;		/* CPU/group number for next level up. */
 	u8	level;		/* root is at level 0. */
+	bool	wait_blkd_tasks;/* Necessary to wait for blocked tasks to */
+				/*  exit RCU read-side critical sections */
+				/*  before propagating offline up the */
+				/*  rcu_node tree? */
 	struct rcu_node *parent;
 	struct list_head blkd_tasks;
 				/* Tasks blocked in RCU read-side critical */
@@ -448,8 +456,6 @@
 	long qlen;				/* Total number of callbacks. */
 	/* End of fields guarded by orphan_lock. */
 
-	struct mutex onoff_mutex;		/* Coordinate hotplug & GPs. */
-
 	struct mutex barrier_mutex;		/* Guards barrier fields. */
 	atomic_t barrier_cpu_count;		/* # CPUs waiting on. */
 	struct completion barrier_completion;	/* Wake at barrier end. */
@@ -559,6 +565,7 @@
 static void rcu_cleanup_after_idle(void);
 static void rcu_prepare_for_idle(void);
 static void rcu_idle_count_callbacks_posted(void);
+static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
 static void print_cpu_stall_info_begin(void);
 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
 static void print_cpu_stall_info_end(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0a571e9..8c0ec0f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -58,38 +58,33 @@
  */
 static void __init rcu_bootup_announce_oddness(void)
 {
-#ifdef CONFIG_RCU_TRACE
-	pr_info("\tRCU debugfs-based tracing is enabled.\n");
-#endif
-#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
-	pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
-	       CONFIG_RCU_FANOUT);
-#endif
-#ifdef CONFIG_RCU_FANOUT_EXACT
-	pr_info("\tHierarchical RCU autobalancing is disabled.\n");
-#endif
-#ifdef CONFIG_RCU_FAST_NO_HZ
-	pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
-#endif
-#ifdef CONFIG_PROVE_RCU
-	pr_info("\tRCU lockdep checking is enabled.\n");
-#endif
-#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
-	pr_info("\tRCU torture testing starts during boot.\n");
-#endif
-#if defined(CONFIG_RCU_CPU_STALL_INFO)
-	pr_info("\tAdditional per-CPU info printed with stalls.\n");
-#endif
-#if NUM_RCU_LVL_4 != 0
-	pr_info("\tFour-level hierarchy is enabled.\n");
-#endif
+	if (IS_ENABLED(CONFIG_RCU_TRACE))
+		pr_info("\tRCU debugfs-based tracing is enabled.\n");
+	if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) ||
+	    (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32))
+		pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
+		       CONFIG_RCU_FANOUT);
+	if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT))
+		pr_info("\tHierarchical RCU autobalancing is disabled.\n");
+	if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
+		pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
+	if (IS_ENABLED(CONFIG_PROVE_RCU))
+		pr_info("\tRCU lockdep checking is enabled.\n");
+	if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE))
+		pr_info("\tRCU torture testing starts during boot.\n");
+	if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO))
+		pr_info("\tAdditional per-CPU info printed with stalls.\n");
+	if (NUM_RCU_LVL_4 != 0)
+		pr_info("\tFour-level hierarchy is enabled.\n");
+	if (CONFIG_RCU_FANOUT_LEAF != 16)
+		pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
+			CONFIG_RCU_FANOUT_LEAF);
 	if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
 		pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
 	if (nr_cpu_ids != NR_CPUS)
 		pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
-#ifdef CONFIG_RCU_BOOST
-	pr_info("\tRCU kthread priority: %d.\n", kthread_prio);
-#endif
+	if (IS_ENABLED(CONFIG_RCU_BOOST))
+		pr_info("\tRCU kthread priority: %d.\n", kthread_prio);
 }
 
 #ifdef CONFIG_PREEMPT_RCU
@@ -180,7 +175,7 @@
 		 * But first, note that the current CPU must still be
 		 * on line!
 		 */
-		WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
+		WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
 		WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
 		if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
 			list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
@@ -233,43 +228,6 @@
 }
 
 /*
- * Record a quiescent state for all tasks that were previously queued
- * on the specified rcu_node structure and that were blocking the current
- * RCU grace period.  The caller must hold the specified rnp->lock with
- * irqs disabled, and this lock is released upon return, but irqs remain
- * disabled.
- */
-static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
-	__releases(rnp->lock)
-{
-	unsigned long mask;
-	struct rcu_node *rnp_p;
-
-	if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
-		return;  /* Still need more quiescent states! */
-	}
-
-	rnp_p = rnp->parent;
-	if (rnp_p == NULL) {
-		/*
-		 * Either there is only one rcu_node in the tree,
-		 * or tasks were kicked up to root rcu_node due to
-		 * CPUs going offline.
-		 */
-		rcu_report_qs_rsp(&rcu_preempt_state, flags);
-		return;
-	}
-
-	/* Report up the rest of the hierarchy. */
-	mask = rnp->grpmask;
-	raw_spin_unlock(&rnp->lock);	/* irqs remain disabled. */
-	raw_spin_lock(&rnp_p->lock);	/* irqs already disabled. */
-	smp_mb__after_unlock_lock();
-	rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
-}
-
-/*
  * Advance a ->blkd_tasks-list pointer to the next entry, instead
  * returning NULL if at the end of the list.
  */
@@ -300,7 +258,6 @@
  */
 void rcu_read_unlock_special(struct task_struct *t)
 {
-	bool empty;
 	bool empty_exp;
 	bool empty_norm;
 	bool empty_exp_now;
@@ -334,7 +291,13 @@
 	}
 
 	/* Hardware IRQ handlers cannot block, complain if they get here. */
-	if (WARN_ON_ONCE(in_irq() || in_serving_softirq())) {
+	if (in_irq() || in_serving_softirq()) {
+		lockdep_rcu_suspicious(__FILE__, __LINE__,
+				       "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
+		pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n",
+			 t->rcu_read_unlock_special.s,
+			 t->rcu_read_unlock_special.b.blocked,
+			 t->rcu_read_unlock_special.b.need_qs);
 		local_irq_restore(flags);
 		return;
 	}
@@ -356,7 +319,6 @@
 				break;
 			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 		}
-		empty = !rcu_preempt_has_tasks(rnp);
 		empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
 		empty_exp = !rcu_preempted_readers_exp(rnp);
 		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
@@ -377,14 +339,6 @@
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
 		/*
-		 * If this was the last task on the list, go see if we
-		 * need to propagate ->qsmaskinit bit clearing up the
-		 * rcu_node tree.
-		 */
-		if (!empty && !rcu_preempt_has_tasks(rnp))
-			rcu_cleanup_dead_rnp(rnp);
-
-		/*
 		 * If this was the last task on the current list, and if
 		 * we aren't waiting on any CPUs, report the quiescent state.
 		 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
@@ -399,7 +353,8 @@
 							 rnp->grplo,
 							 rnp->grphi,
 							 !!rnp->gp_tasks);
-			rcu_report_unblock_qs_rnp(rnp, flags);
+			rcu_report_unblock_qs_rnp(&rcu_preempt_state,
+						  rnp, flags);
 		} else {
 			raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		}
@@ -520,10 +475,6 @@
 	WARN_ON_ONCE(rnp->qsmask);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
 /*
  * Check for a quiescent state from the current CPU.  When a task blocks,
  * the task is recorded in the corresponding CPU's rcu_node structure,
@@ -585,7 +536,7 @@
 			   "Illegal synchronize_rcu() in RCU read-side critical section");
 	if (!rcu_scheduler_active)
 		return;
-	if (rcu_expedited)
+	if (rcu_gp_is_expedited())
 		synchronize_rcu_expedited();
 	else
 		wait_rcu_gp(call_rcu);
@@ -630,9 +581,6 @@
  * recursively up the tree.  (Calm down, calm down, we do the recursion
  * iteratively!)
  *
- * Most callers will set the "wake" flag, but the task initiating the
- * expedited grace period need not wake itself.
- *
  * Caller must hold sync_rcu_preempt_exp_mutex.
  */
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
@@ -667,29 +615,85 @@
 
 /*
  * Snapshot the tasks blocking the newly started preemptible-RCU expedited
- * grace period for the specified rcu_node structure.  If there are no such
- * tasks, report it up the rcu_node hierarchy.
+ * grace period for the specified rcu_node structure, phase 1.  If there
+ * are such tasks, set the ->expmask bits up the rcu_node tree and also
+ * set the ->expmask bits on the leaf rcu_node structures to tell phase 2
+ * that work is needed here.
  *
- * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
- * CPU hotplug operations.
+ * Caller must hold sync_rcu_preempt_exp_mutex.
  */
 static void
-sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
+sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp)
 {
 	unsigned long flags;
-	int must_wait = 0;
+	unsigned long mask;
+	struct rcu_node *rnp_up;
 
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	smp_mb__after_unlock_lock();
+	WARN_ON_ONCE(rnp->expmask);
+	WARN_ON_ONCE(rnp->exp_tasks);
 	if (!rcu_preempt_has_tasks(rnp)) {
+		/* No blocked tasks, nothing to do. */
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
-	} else {
+		return;
+	}
+	/* Call for Phase 2 and propagate ->expmask bits up the tree. */
+	rnp->expmask = 1;
+	rnp_up = rnp;
+	while (rnp_up->parent) {
+		mask = rnp_up->grpmask;
+		rnp_up = rnp_up->parent;
+		if (rnp_up->expmask & mask)
+			break;
+		raw_spin_lock(&rnp_up->lock); /* irqs already off */
+		smp_mb__after_unlock_lock();
+		rnp_up->expmask |= mask;
+		raw_spin_unlock(&rnp_up->lock); /* irqs still off */
+	}
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
+ * Snapshot the tasks blocking the newly started preemptible-RCU expedited
+ * grace period for the specified rcu_node structure, phase 2.  If the
+ * leaf rcu_node structure has its ->expmask field set, check for tasks.
+ * If there are some, clear ->expmask and set ->exp_tasks accordingly,
+ * then initiate RCU priority boosting.  Otherwise, clear ->expmask and
+ * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits,
+ * enabling rcu_read_unlock_special() to do the bit-clearing.
+ *
+ * Caller must hold sync_rcu_preempt_exp_mutex.
+ */
+static void
+sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	smp_mb__after_unlock_lock();
+	if (!rnp->expmask) {
+		/* Phase 1 didn't do anything, so Phase 2 doesn't either. */
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+
+	/* Phase 1 is over. */
+	rnp->expmask = 0;
+
+	/*
+	 * If there are still blocked tasks, set up ->exp_tasks so that
+	 * rcu_read_unlock_special() will wake us and then boost them.
+	 */
+	if (rcu_preempt_has_tasks(rnp)) {
 		rnp->exp_tasks = rnp->blkd_tasks.next;
 		rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
-		must_wait = 1;
+		return;
 	}
-	if (!must_wait)
-		rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
+
+	/* No longer any blocked tasks, so undo bit setting. */
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	rcu_report_exp_rnp(rsp, rnp, false);
 }
 
 /**
@@ -706,7 +710,6 @@
  */
 void synchronize_rcu_expedited(void)
 {
-	unsigned long flags;
 	struct rcu_node *rnp;
 	struct rcu_state *rsp = &rcu_preempt_state;
 	unsigned long snap;
@@ -757,19 +760,16 @@
 	/* force all RCU readers onto ->blkd_tasks lists. */
 	synchronize_sched_expedited();
 
-	/* Initialize ->expmask for all non-leaf rcu_node structures. */
-	rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
-		raw_spin_lock_irqsave(&rnp->lock, flags);
-		smp_mb__after_unlock_lock();
-		rnp->expmask = rnp->qsmaskinit;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
-	}
-
-	/* Snapshot current state of ->blkd_tasks lists. */
+	/*
+	 * Snapshot current state of ->blkd_tasks lists into ->expmask.
+	 * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special()
+	 * to start clearing them.  Doing this in one phase leads to
+	 * strange races between setting and clearing bits, so just say "no"!
+	 */
 	rcu_for_each_leaf_node(rsp, rnp)
-		sync_rcu_preempt_exp_init(rsp, rnp);
-	if (NUM_RCU_NODES > 1)
-		sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
+		sync_rcu_preempt_exp_init1(rsp, rnp);
+	rcu_for_each_leaf_node(rsp, rnp)
+		sync_rcu_preempt_exp_init2(rsp, rnp);
 
 	put_online_cpus();
 
@@ -859,8 +859,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-
 /*
  * Because there is no preemptible RCU, there can be no readers blocked.
  */
@@ -869,8 +867,6 @@
 	return false;
 }
 
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
 /*
  * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections.
@@ -1170,7 +1166,7 @@
  * Returns zero if all is well, a negated errno otherwise.
  */
 static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
-						 struct rcu_node *rnp)
+				       struct rcu_node *rnp)
 {
 	int rnp_index = rnp - &rsp->node[0];
 	unsigned long flags;
@@ -1180,7 +1176,7 @@
 	if (&rcu_preempt_state != rsp)
 		return 0;
 
-	if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)
+	if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
 		return 0;
 
 	rsp->boost = 1;
@@ -1273,7 +1269,7 @@
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 {
 	struct task_struct *t = rnp->boost_kthread_task;
-	unsigned long mask = rnp->qsmaskinit;
+	unsigned long mask = rcu_rnp_online_cpus(rnp);
 	cpumask_var_t cm;
 	int cpu;
 
@@ -1945,7 +1941,8 @@
 		rhp = ACCESS_ONCE(rdp->nocb_follower_head);
 
 	/* Having no rcuo kthread but CBs after scheduler starts is bad! */
-	if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) {
+	if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp &&
+	    rcu_scheduler_fully_active) {
 		/* RCU callback enqueued before CPU first came online??? */
 		pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
 		       cpu, rhp->func);
@@ -2392,18 +2389,8 @@
 		pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
 
 	for_each_rcu_flavor(rsp) {
-		for_each_cpu(cpu, rcu_nocb_mask) {
-			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-
-			/*
-			 * If there are early callbacks, they will need
-			 * to be moved to the nocb lists.
-			 */
-			WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] !=
-				     &rdp->nxtlist &&
-				     rdp->nxttail[RCU_NEXT_TAIL] != NULL);
-			init_nocb_callback_list(rdp);
-		}
+		for_each_cpu(cpu, rcu_nocb_mask)
+			init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu));
 		rcu_organize_nocb_kthreads(rsp);
 	}
 }
@@ -2540,6 +2527,16 @@
 	if (!rcu_is_nocb_cpu(rdp->cpu))
 		return false;
 
+	/* If there are early-boot callbacks, move them to nocb lists. */
+	if (rdp->nxtlist) {
+		rdp->nocb_head = rdp->nxtlist;
+		rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL];
+		atomic_long_set(&rdp->nocb_q_count, rdp->qlen);
+		atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy);
+		rdp->nxtlist = NULL;
+		rdp->qlen = 0;
+		rdp->qlen_lazy = 0;
+	}
 	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
 	return true;
 }
@@ -2763,7 +2760,8 @@
 
 /*
  * Check to see if the current CPU is idle.  Note that usermode execution
- * does not count as idle.  The caller must have disabled interrupts.
+ * does not count as idle.  The caller must have disabled interrupts,
+ * and must be running on tick_do_timer_cpu.
  */
 static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
 				  unsigned long *maxj)
@@ -2784,8 +2782,8 @@
 	if (!*isidle || rdp->rsp != rcu_state_p ||
 	    cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
 		return;
-	if (rcu_gp_in_progress(rdp->rsp))
-		WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
+	/* Verify affinity of current kthread. */
+	WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
 
 	/* Pick up current idle and NMI-nesting counter and check. */
 	cur = atomic_read(&rdtp->dynticks_idle);
@@ -3068,11 +3066,10 @@
 		return;
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 	cpu = tick_do_timer_cpu;
-	if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu)
+	if (cpu >= 0 && cpu < nr_cpu_ids)
 		set_cpus_allowed_ptr(current, cpumask_of(cpu));
 #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-	if (!is_housekeeping_cpu(raw_smp_processor_id()))
-		housekeeping_affine(current);
+	housekeeping_affine(current);
 #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 }
 
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index fbb6240..f92361e 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -283,8 +283,8 @@
 			seq_puts(m, "\n");
 			level = rnp->level;
 		}
-		seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d    ",
-			   rnp->qsmask, rnp->qsmaskinit,
+		seq_printf(m, "%lx/%lx->%lx %c%c>%c %d:%d ^%d    ",
+			   rnp->qsmask, rnp->qsmaskinit, rnp->qsmaskinitnext,
 			   ".G"[rnp->gp_tasks != NULL],
 			   ".E"[rnp->exp_tasks != NULL],
 			   ".T"[!list_empty(&rnp->blkd_tasks)],
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index e0d31a345..1f13335 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -62,6 +62,63 @@
 
 module_param(rcu_expedited, int, 0);
 
+#ifndef CONFIG_TINY_RCU
+
+static atomic_t rcu_expedited_nesting =
+	ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
+
+/*
+ * Should normal grace-period primitives be expedited?  Intended for
+ * use within RCU.  Note that this function takes the rcu_expedited
+ * sysfs/boot variable into account as well as the rcu_expedite_gp()
+ * nesting.  So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited()
+ * returns false is a -really- bad idea.
+ */
+bool rcu_gp_is_expedited(void)
+{
+	return rcu_expedited || atomic_read(&rcu_expedited_nesting);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_is_expedited);
+
+/**
+ * rcu_expedite_gp - Expedite future RCU grace periods
+ *
+ * After a call to this function, future calls to synchronize_rcu() and
+ * friends act as the corresponding synchronize_rcu_expedited() function
+ * had instead been called.
+ */
+void rcu_expedite_gp(void)
+{
+	atomic_inc(&rcu_expedited_nesting);
+}
+EXPORT_SYMBOL_GPL(rcu_expedite_gp);
+
+/**
+ * rcu_unexpedite_gp - Cancel prior rcu_expedite_gp() invocation
+ *
+ * Undo a prior call to rcu_expedite_gp().  If all prior calls to
+ * rcu_expedite_gp() are undone by a subsequent call to rcu_unexpedite_gp(),
+ * and if the rcu_expedited sysfs/boot parameter is not set, then all
+ * subsequent calls to synchronize_rcu() and friends will return to
+ * their normal non-expedited behavior.
+ */
+void rcu_unexpedite_gp(void)
+{
+	atomic_dec(&rcu_expedited_nesting);
+}
+EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
+
+#endif /* #ifndef CONFIG_TINY_RCU */
+
+/*
+ * Inform RCU of the end of the in-kernel boot sequence.
+ */
+void rcu_end_inkernel_boot(void)
+{
+	if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT))
+		rcu_unexpedite_gp();
+}
+
 #ifdef CONFIG_PREEMPT_RCU
 
 /*
@@ -199,16 +256,13 @@
 
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-struct rcu_synchronize {
-	struct rcu_head head;
-	struct completion completion;
-};
-
-/*
- * Awaken the corresponding synchronize_rcu() instance now that a
- * grace period has elapsed.
+/**
+ * wakeme_after_rcu() - Callback function to awaken a task after grace period
+ * @head: Pointer to rcu_head member within rcu_synchronize structure
+ *
+ * Awaken the corresponding task now that a grace period has elapsed.
  */
-static void wakeme_after_rcu(struct rcu_head  *head)
+void wakeme_after_rcu(struct rcu_head *head)
 {
 	struct rcu_synchronize *rcu;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2f7937e..f9123a8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2853,7 +2853,7 @@
 	 * we find a better solution.
 	 *
 	 * NB: There are buggy callers of this function.  Ideally we
-	 * should warn if prev_state != IN_USER, but that will trigger
+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
 	 * too frequently to make sense yet.
 	 */
 	enum ctx_state prev_state = exception_enter();
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 4d207d2..deef1ca 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -209,6 +209,8 @@
 	goto exit_idle;
 }
 
+DEFINE_PER_CPU(bool, cpu_dead_idle);
+
 /*
  * Generic idle loop implementation
  *
@@ -233,8 +235,13 @@
 			check_pgt_cache();
 			rmb();
 
-			if (cpu_is_offline(smp_processor_id()))
+			if (cpu_is_offline(smp_processor_id())) {
+				rcu_cpu_notify(NULL, CPU_DYING_IDLE,
+					       (void *)(long)smp_processor_id());
+				smp_mb(); /* all activity before dead. */
+				this_cpu_write(cpu_dead_idle, true);
 				arch_cpu_idle_dead();
+			}
 
 			local_irq_disable();
 			arch_cpu_idle_enter();
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 40190f2..c697f73 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -4,6 +4,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/smp.h>
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -314,3 +315,158 @@
 	put_online_cpus();
 }
 EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
+
+static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
+
+/*
+ * Called to poll specified CPU's state, for example, when waiting for
+ * a CPU to come online.
+ */
+int cpu_report_state(int cpu)
+{
+	return atomic_read(&per_cpu(cpu_hotplug_state, cpu));
+}
+
+/*
+ * If CPU has died properly, set its state to CPU_UP_PREPARE and
+ * return success.  Otherwise, return -EBUSY if the CPU died after
+ * cpu_wait_death() timed out.  And yet otherwise again, return -EAGAIN
+ * if cpu_wait_death() timed out and the CPU still hasn't gotten around
+ * to dying.  In the latter two cases, the CPU might not be set up
+ * properly, but it is up to the arch-specific code to decide.
+ * Finally, -EIO indicates an unanticipated problem.
+ *
+ * Note that it is permissible to omit this call entirely, as is
+ * done in architectures that do no CPU-hotplug error checking.
+ */
+int cpu_check_up_prepare(int cpu)
+{
+	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
+		atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
+		return 0;
+	}
+
+	switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) {
+
+	case CPU_POST_DEAD:
+
+		/* The CPU died properly, so just start it up again. */
+		atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
+		return 0;
+
+	case CPU_DEAD_FROZEN:
+
+		/*
+		 * Timeout during CPU death, so let caller know.
+		 * The outgoing CPU completed its processing, but after
+		 * cpu_wait_death() timed out and reported the error. The
+		 * caller is free to proceed, in which case the state
+		 * will be reset properly by cpu_set_state_online().
+		 * Proceeding despite this -EBUSY return makes sense
+		 * for systems where the outgoing CPUs take themselves
+		 * offline, with no post-death manipulation required from
+		 * a surviving CPU.
+		 */
+		return -EBUSY;
+
+	case CPU_BROKEN:
+
+		/*
+		 * The most likely reason we got here is that there was
+		 * a timeout during CPU death, and the outgoing CPU never
+		 * did complete its processing.  This could happen on
+		 * a virtualized system if the outgoing VCPU gets preempted
+		 * for more than five seconds, and the user attempts to
+		 * immediately online that same CPU.  Trying again later
+		 * might return -EBUSY above, hence -EAGAIN.
+		 */
+		return -EAGAIN;
+
+	default:
+
+		/* Should not happen.  Famous last words. */
+		return -EIO;
+	}
+}
+
+/*
+ * Mark the specified CPU online.
+ *
+ * Note that it is permissible to omit this call entirely, as is
+ * done in architectures that do no CPU-hotplug error checking.
+ */
+void cpu_set_state_online(int cpu)
+{
+	(void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Wait for the specified CPU to exit the idle loop and die.
+ */
+bool cpu_wait_death(unsigned int cpu, int seconds)
+{
+	int jf_left = seconds * HZ;
+	int oldstate;
+	bool ret = true;
+	int sleep_jf = 1;
+
+	might_sleep();
+
+	/* The outgoing CPU will normally get done quite quickly. */
+	if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD)
+		goto update_state;
+	udelay(5);
+
+	/* But if the outgoing CPU dawdles, wait increasingly long times. */
+	while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) {
+		schedule_timeout_uninterruptible(sleep_jf);
+		jf_left -= sleep_jf;
+		if (jf_left <= 0)
+			break;
+		sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10);
+	}
+update_state:
+	oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
+	if (oldstate == CPU_DEAD) {
+		/* Outgoing CPU died normally, update state. */
+		smp_mb(); /* atomic_read() before update. */
+		atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD);
+	} else {
+		/* Outgoing CPU still hasn't died, set state accordingly. */
+		if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
+				   oldstate, CPU_BROKEN) != oldstate)
+			goto update_state;
+		ret = false;
+	}
+	return ret;
+}
+
+/*
+ * Called by the outgoing CPU to report its successful death.  Return
+ * false if this report follows the surviving CPU's timing out.
+ *
+ * A separate "CPU_DEAD_FROZEN" is used when the surviving CPU
+ * timed out.  This approach allows architectures to omit calls to
+ * cpu_check_up_prepare() and cpu_set_state_online() without defeating
+ * the next cpu_wait_death()'s polling loop.
+ */
+bool cpu_report_death(void)
+{
+	int oldstate;
+	int newstate;
+	int cpu = smp_processor_id();
+
+	do {
+		oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
+		if (oldstate != CPU_BROKEN)
+			newstate = CPU_DEAD;
+		else
+			newstate = CPU_DEAD_FROZEN;
+	} while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
+				oldstate, newstate) != oldstate);
+	return newstate == CPU_DEAD;
+}
+
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 245e7dc..8c0eabd 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/aio.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a5da09c..3b9a48a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -432,6 +432,14 @@
 	  This option is required if you plan to use perf-probe subcommand
 	  of perf tools on user space applications.
 
+config BPF_EVENTS
+	depends on BPF_SYSCALL
+	depends on KPROBE_EVENT
+	bool
+	default y
+	help
+	  This allows the user to attach BPF programs to kprobe events.
+
 config PROBE_EVENTS
 	def_bool n
 
@@ -599,6 +607,34 @@
 
 	 If unsure, say N
 
+config TRACE_ENUM_MAP_FILE
+       bool "Show enum mappings for trace events"
+       depends on TRACING
+       help
+        The "print fmt" of the trace events will show the enum names instead
+	of their values. This can cause problems for user space tools that
+	use this string to parse the raw data as user space does not know
+	how to convert the string to its value.
+
+	To fix this, there's a special macro in the kernel that can be used
+	to convert the enum into its value. If this macro is used, then the
+	print fmt strings will have the enums converted to their values.
+
+	If something does not get converted properly, this option can be
+	used to show what enums the kernel tried to convert.
+
+	This option is for debugging the enum conversions. A file is created
+	in the tracing directory called "enum_map" that will show the enum
+	names matched with their values and what trace event system they
+	belong too.
+
+	Normally, the mapping of the strings to values will be freed after
+	boot up or module load. With this option, they will not be freed, as
+	they are needed for the "enum_map" file. Enabling this option will
+	increase the memory footprint of the running kernel.
+
+	If unsure, say N
+
 endif # FTRACE
 
 endif # TRACING_SUPPORT
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 98f2658..9b1044e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,7 @@
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
+obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM),y)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
new file mode 100644
index 0000000..2d56ce5
--- /dev/null
+++ b/kernel/trace/bpf_trace.c
@@ -0,0 +1,222 @@
+/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/uaccess.h>
+#include <linux/ctype.h>
+#include "trace.h"
+
+static DEFINE_PER_CPU(int, bpf_prog_active);
+
+/**
+ * trace_call_bpf - invoke BPF program
+ * @prog: BPF program
+ * @ctx: opaque context pointer
+ *
+ * kprobe handlers execute BPF programs via this helper.
+ * Can be used from static tracepoints in the future.
+ *
+ * Return: BPF programs always return an integer which is interpreted by
+ * kprobe handler as:
+ * 0 - return from kprobe (event is filtered out)
+ * 1 - store kprobe event into ring buffer
+ * Other values are reserved and currently alias to 1
+ */
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+	unsigned int ret;
+
+	if (in_nmi()) /* not supported yet */
+		return 1;
+
+	preempt_disable();
+
+	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
+		/*
+		 * since some bpf program is already running on this cpu,
+		 * don't call into another bpf program (same or different)
+		 * and don't send kprobe event into ring-buffer,
+		 * so return zero here
+		 */
+		ret = 0;
+		goto out;
+	}
+
+	rcu_read_lock();
+	ret = BPF_PROG_RUN(prog, ctx);
+	rcu_read_unlock();
+
+ out:
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(trace_call_bpf);
+
+static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	void *dst = (void *) (long) r1;
+	int size = (int) r2;
+	void *unsafe_ptr = (void *) (long) r3;
+
+	return probe_kernel_read(dst, unsafe_ptr, size);
+}
+
+static const struct bpf_func_proto bpf_probe_read_proto = {
+	.func		= bpf_probe_read,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	/* NMI safe access to clock monotonic */
+	return ktime_get_mono_fast_ns();
+}
+
+static const struct bpf_func_proto bpf_ktime_get_ns_proto = {
+	.func		= bpf_ktime_get_ns,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+};
+
+/*
+ * limited trace_printk()
+ * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
+ */
+static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
+{
+	char *fmt = (char *) (long) r1;
+	int mod[3] = {};
+	int fmt_cnt = 0;
+	int i;
+
+	/*
+	 * bpf_check()->check_func_arg()->check_stack_boundary()
+	 * guarantees that fmt points to bpf program stack,
+	 * fmt_size bytes of it were initialized and fmt_size > 0
+	 */
+	if (fmt[--fmt_size] != 0)
+		return -EINVAL;
+
+	/* check format string for allowed specifiers */
+	for (i = 0; i < fmt_size; i++) {
+		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
+			return -EINVAL;
+
+		if (fmt[i] != '%')
+			continue;
+
+		if (fmt_cnt >= 3)
+			return -EINVAL;
+
+		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
+		i++;
+		if (fmt[i] == 'l') {
+			mod[fmt_cnt]++;
+			i++;
+		} else if (fmt[i] == 'p') {
+			mod[fmt_cnt]++;
+			i++;
+			if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
+				return -EINVAL;
+			fmt_cnt++;
+			continue;
+		}
+
+		if (fmt[i] == 'l') {
+			mod[fmt_cnt]++;
+			i++;
+		}
+
+		if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
+			return -EINVAL;
+		fmt_cnt++;
+	}
+
+	return __trace_printk(1/* fake ip will not be printed */, fmt,
+			      mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
+			      mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
+			      mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
+}
+
+static const struct bpf_func_proto bpf_trace_printk_proto = {
+	.func		= bpf_trace_printk,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+};
+
+static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_map_update_elem:
+		return &bpf_map_update_elem_proto;
+	case BPF_FUNC_map_delete_elem:
+		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_probe_read:
+		return &bpf_probe_read_proto;
+	case BPF_FUNC_ktime_get_ns:
+		return &bpf_ktime_get_ns_proto;
+
+	case BPF_FUNC_trace_printk:
+		/*
+		 * this program might be calling bpf_trace_printk,
+		 * so allocate per-cpu printk buffers
+		 */
+		trace_printk_init_buffers();
+
+		return &bpf_trace_printk_proto;
+	default:
+		return NULL;
+	}
+}
+
+/* bpf+kprobe programs can access fields of 'struct pt_regs' */
+static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
+{
+	/* check bounds */
+	if (off < 0 || off >= sizeof(struct pt_regs))
+		return false;
+
+	/* only read is allowed */
+	if (type != BPF_READ)
+		return false;
+
+	/* disallow misaligned access */
+	if (off % size != 0)
+		return false;
+
+	return true;
+}
+
+static struct bpf_verifier_ops kprobe_prog_ops = {
+	.get_func_proto  = kprobe_prog_func_proto,
+	.is_valid_access = kprobe_prog_is_valid_access,
+};
+
+static struct bpf_prog_type_list kprobe_tl = {
+	.ops	= &kprobe_prog_ops,
+	.type	= BPF_PROG_TYPE_KPROBE,
+};
+
+static int __init register_kprobe_prog_ops(void)
+{
+	bpf_register_prog_type(&kprobe_tl);
+	return 0;
+}
+late_initcall(register_kprobe_prog_ops);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4f22802..02bece4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -18,7 +18,7 @@
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/suspend.h>
-#include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
 #include <linux/uaccess.h>
@@ -249,6 +249,19 @@
 static inline void update_function_graph_func(void) { }
 #endif
 
+
+static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops)
+{
+	/*
+	 * If this is a dynamic ops or we force list func,
+	 * then it needs to call the list anyway.
+	 */
+	if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC)
+		return ftrace_ops_list_func;
+
+	return ftrace_ops_get_func(ops);
+}
+
 static void update_ftrace_function(void)
 {
 	ftrace_func_t func;
@@ -270,7 +283,7 @@
 	 * then have the mcount trampoline call the function directly.
 	 */
 	} else if (ftrace_ops_list->next == &ftrace_list_end) {
-		func = ftrace_ops_get_func(ftrace_ops_list);
+		func = ftrace_ops_get_list_func(ftrace_ops_list);
 
 	} else {
 		/* Just use the default ftrace_ops */
@@ -1008,7 +1021,7 @@
 	.stat_show	= function_stat_show
 };
 
-static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
 {
 	struct ftrace_profile_stat *stat;
 	struct dentry *entry;
@@ -1044,15 +1057,15 @@
 		}
 	}
 
-	entry = debugfs_create_file("function_profile_enabled", 0644,
+	entry = tracefs_create_file("function_profile_enabled", 0644,
 				    d_tracer, NULL, &ftrace_profile_fops);
 	if (!entry)
-		pr_warning("Could not create debugfs "
+		pr_warning("Could not create tracefs "
 			   "'function_profile_enabled' entry\n");
 }
 
 #else /* CONFIG_FUNCTION_PROFILER */
-static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
 {
 }
 #endif /* CONFIG_FUNCTION_PROFILER */
@@ -4712,7 +4725,7 @@
 	mutex_unlock(&ftrace_lock);
 }
 
-static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
+static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
 {
 
 	trace_create_file("available_filter_functions", 0444,
@@ -5020,7 +5033,7 @@
 }
 core_initcall(ftrace_nodyn_init);
 
-static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
+static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; }
 static inline void ftrace_startup_enable(int command) { }
 static inline void ftrace_startup_all(int command) { }
 /* Keep as macros so we do not need to define the commands */
@@ -5209,13 +5222,6 @@
 ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
 {
 	/*
-	 * If this is a dynamic ops or we force list func,
-	 * then it needs to call the list anyway.
-	 */
-	if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC)
-		return ftrace_ops_list_func;
-
-	/*
 	 * If the func handles its own recursion, call it directly.
 	 * Otherwise call the recursion protected function that
 	 * will call the ftrace ops function.
@@ -5473,7 +5479,7 @@
 	.release	= ftrace_pid_release,
 };
 
-static __init int ftrace_init_debugfs(void)
+static __init int ftrace_init_tracefs(void)
 {
 	struct dentry *d_tracer;
 
@@ -5481,16 +5487,16 @@
 	if (IS_ERR(d_tracer))
 		return 0;
 
-	ftrace_init_dyn_debugfs(d_tracer);
+	ftrace_init_dyn_tracefs(d_tracer);
 
 	trace_create_file("set_ftrace_pid", 0644, d_tracer,
 			    NULL, &ftrace_pid_fops);
 
-	ftrace_profile_debugfs(d_tracer);
+	ftrace_profile_tracefs(d_tracer);
 
 	return 0;
 }
-fs_initcall(ftrace_init_debugfs);
+fs_initcall(ftrace_init_tracefs);
 
 /**
  * ftrace_kill - kill ftrace
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5040d44..0315d43 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2679,7 +2679,7 @@
 
 static __always_inline int trace_recursive_lock(void)
 {
-	unsigned int val = this_cpu_read(current_context);
+	unsigned int val = __this_cpu_read(current_context);
 	int bit;
 
 	if (in_interrupt()) {
@@ -2696,18 +2696,14 @@
 		return 1;
 
 	val |= (1 << bit);
-	this_cpu_write(current_context, val);
+	__this_cpu_write(current_context, val);
 
 	return 0;
 }
 
 static __always_inline void trace_recursive_unlock(void)
 {
-	unsigned int val = this_cpu_read(current_context);
-
-	val--;
-	val &= this_cpu_read(current_context);
-	this_cpu_write(current_context, val);
+	__this_cpu_and(current_context, __this_cpu_read(current_context) - 1);
 }
 
 #else
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 62c6506..91eecaa 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -20,6 +20,7 @@
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
 #include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
 #include <linux/linkage.h>
@@ -31,6 +32,7 @@
 #include <linux/splice.h>
 #include <linux/kdebug.h>
 #include <linux/string.h>
+#include <linux/mount.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
@@ -123,6 +125,42 @@
 /* When set, tracing will stop when a WARN*() is hit */
 int __disable_trace_on_warning;
 
+#ifdef CONFIG_TRACE_ENUM_MAP_FILE
+/* Map of enums to their values, for "enum_map" file */
+struct trace_enum_map_head {
+	struct module			*mod;
+	unsigned long			length;
+};
+
+union trace_enum_map_item;
+
+struct trace_enum_map_tail {
+	/*
+	 * "end" is first and points to NULL as it must be different
+	 * than "mod" or "enum_string"
+	 */
+	union trace_enum_map_item	*next;
+	const char			*end;	/* points to NULL */
+};
+
+static DEFINE_MUTEX(trace_enum_mutex);
+
+/*
+ * The trace_enum_maps are saved in an array with two extra elements,
+ * one at the beginning, and one at the end. The beginning item contains
+ * the count of the saved maps (head.length), and the module they
+ * belong to if not built in (head.mod). The ending item contains a
+ * pointer to the next array of saved enum_map items.
+ */
+union trace_enum_map_item {
+	struct trace_enum_map		map;
+	struct trace_enum_map_head	head;
+	struct trace_enum_map_tail	tail;
+};
+
+static union trace_enum_map_item *trace_enum_maps;
+#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
+
 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
 
 #define MAX_TRACER_SIZE		100
@@ -3908,6 +3946,182 @@
 	.write		= tracing_saved_cmdlines_size_write,
 };
 
+#ifdef CONFIG_TRACE_ENUM_MAP_FILE
+static union trace_enum_map_item *
+update_enum_map(union trace_enum_map_item *ptr)
+{
+	if (!ptr->map.enum_string) {
+		if (ptr->tail.next) {
+			ptr = ptr->tail.next;
+			/* Set ptr to the next real item (skip head) */
+			ptr++;
+		} else
+			return NULL;
+	}
+	return ptr;
+}
+
+static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	union trace_enum_map_item *ptr = v;
+
+	/*
+	 * Paranoid! If ptr points to end, we don't want to increment past it.
+	 * This really should never happen.
+	 */
+	ptr = update_enum_map(ptr);
+	if (WARN_ON_ONCE(!ptr))
+		return NULL;
+
+	ptr++;
+
+	(*pos)++;
+
+	ptr = update_enum_map(ptr);
+
+	return ptr;
+}
+
+static void *enum_map_start(struct seq_file *m, loff_t *pos)
+{
+	union trace_enum_map_item *v;
+	loff_t l = 0;
+
+	mutex_lock(&trace_enum_mutex);
+
+	v = trace_enum_maps;
+	if (v)
+		v++;
+
+	while (v && l < *pos) {
+		v = enum_map_next(m, v, &l);
+	}
+
+	return v;
+}
+
+static void enum_map_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&trace_enum_mutex);
+}
+
+static int enum_map_show(struct seq_file *m, void *v)
+{
+	union trace_enum_map_item *ptr = v;
+
+	seq_printf(m, "%s %ld (%s)\n",
+		   ptr->map.enum_string, ptr->map.enum_value,
+		   ptr->map.system);
+
+	return 0;
+}
+
+static const struct seq_operations tracing_enum_map_seq_ops = {
+	.start		= enum_map_start,
+	.next		= enum_map_next,
+	.stop		= enum_map_stop,
+	.show		= enum_map_show,
+};
+
+static int tracing_enum_map_open(struct inode *inode, struct file *filp)
+{
+	if (tracing_disabled)
+		return -ENODEV;
+
+	return seq_open(filp, &tracing_enum_map_seq_ops);
+}
+
+static const struct file_operations tracing_enum_map_fops = {
+	.open		= tracing_enum_map_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static inline union trace_enum_map_item *
+trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
+{
+	/* Return tail of array given the head */
+	return ptr + ptr->head.length + 1;
+}
+
+static void
+trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
+			   int len)
+{
+	struct trace_enum_map **stop;
+	struct trace_enum_map **map;
+	union trace_enum_map_item *map_array;
+	union trace_enum_map_item *ptr;
+
+	stop = start + len;
+
+	/*
+	 * The trace_enum_maps contains the map plus a head and tail item,
+	 * where the head holds the module and length of array, and the
+	 * tail holds a pointer to the next list.
+	 */
+	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
+	if (!map_array) {
+		pr_warning("Unable to allocate trace enum mapping\n");
+		return;
+	}
+
+	mutex_lock(&trace_enum_mutex);
+
+	if (!trace_enum_maps)
+		trace_enum_maps = map_array;
+	else {
+		ptr = trace_enum_maps;
+		for (;;) {
+			ptr = trace_enum_jmp_to_tail(ptr);
+			if (!ptr->tail.next)
+				break;
+			ptr = ptr->tail.next;
+
+		}
+		ptr->tail.next = map_array;
+	}
+	map_array->head.mod = mod;
+	map_array->head.length = len;
+	map_array++;
+
+	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
+		map_array->map = **map;
+		map_array++;
+	}
+	memset(map_array, 0, sizeof(*map_array));
+
+	mutex_unlock(&trace_enum_mutex);
+}
+
+static void trace_create_enum_file(struct dentry *d_tracer)
+{
+	trace_create_file("enum_map", 0444, d_tracer,
+			  NULL, &tracing_enum_map_fops);
+}
+
+#else /* CONFIG_TRACE_ENUM_MAP_FILE */
+static inline void trace_create_enum_file(struct dentry *d_tracer) { }
+static inline void trace_insert_enum_map_file(struct module *mod,
+			      struct trace_enum_map **start, int len) { }
+#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
+
+static void trace_insert_enum_map(struct module *mod,
+				  struct trace_enum_map **start, int len)
+{
+	struct trace_enum_map **map;
+
+	if (len <= 0)
+		return;
+
+	map = start;
+
+	trace_event_enum_update(map, len);
+
+	trace_insert_enum_map_file(mod, start, len);
+}
+
 static ssize_t
 tracing_set_trace_read(struct file *filp, char __user *ubuf,
 		       size_t cnt, loff_t *ppos)
@@ -4105,9 +4319,24 @@
 	tr->current_trace = &nop_trace;
 }
 
-static int tracing_set_tracer(struct trace_array *tr, const char *buf)
+static void update_tracer_options(struct trace_array *tr, struct tracer *t)
 {
 	static struct trace_option_dentry *topts;
+
+	/* Only enable if the directory has been created already. */
+	if (!tr->dir)
+		return;
+
+	/* Currently, only the top instance has options */
+	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL))
+		return;
+
+	destroy_trace_option_files(topts);
+	topts = create_trace_option_files(tr, t);
+}
+
+static int tracing_set_tracer(struct trace_array *tr, const char *buf)
+{
 	struct tracer *t;
 #ifdef CONFIG_TRACER_MAX_TRACE
 	bool had_max_tr;
@@ -4172,11 +4401,7 @@
 		free_snapshot(tr);
 	}
 #endif
-	/* Currently, only the top instance has options */
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
-		destroy_trace_option_files(topts);
-		topts = create_trace_option_files(tr, t);
-	}
+	update_tracer_options(tr, t);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 	if (t->use_max_tr && !had_max_tr) {
@@ -5817,6 +6042,14 @@
 
 static struct dentry *tracing_get_dentry(struct trace_array *tr)
 {
+	if (WARN_ON(!tr->dir))
+		return ERR_PTR(-ENODEV);
+
+	/* Top directory uses NULL as the parent */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+		return NULL;
+
+	/* All sub buffers have a descriptor */
 	return tr->dir;
 }
 
@@ -5831,10 +6064,10 @@
 	if (IS_ERR(d_tracer))
 		return NULL;
 
-	tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
+	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
 
 	WARN_ONCE(!tr->percpu_dir,
-		  "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
+		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
 
 	return tr->percpu_dir;
 }
@@ -5851,7 +6084,7 @@
 }
 
 static void
-tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
+tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
 {
 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
 	struct dentry *d_cpu;
@@ -5861,9 +6094,9 @@
 		return;
 
 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
-	d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
+	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
 	if (!d_cpu) {
-		pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
+		pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
 		return;
 	}
 
@@ -6015,9 +6248,9 @@
 {
 	struct dentry *ret;
 
-	ret = debugfs_create_file(name, mode, parent, data, fops);
+	ret = tracefs_create_file(name, mode, parent, data, fops);
 	if (!ret)
-		pr_warning("Could not create debugfs '%s' entry\n", name);
+		pr_warning("Could not create tracefs '%s' entry\n", name);
 
 	return ret;
 }
@@ -6034,9 +6267,9 @@
 	if (IS_ERR(d_tracer))
 		return NULL;
 
-	tr->options = debugfs_create_dir("options", d_tracer);
+	tr->options = tracefs_create_dir("options", d_tracer);
 	if (!tr->options) {
-		pr_warning("Could not create debugfs directory 'options'\n");
+		pr_warning("Could not create tracefs directory 'options'\n");
 		return NULL;
 	}
 
@@ -6105,7 +6338,7 @@
 		return;
 
 	for (cnt = 0; topts[cnt].opt; cnt++)
-		debugfs_remove(topts[cnt].entry);
+		tracefs_remove(topts[cnt].entry);
 
 	kfree(topts);
 }
@@ -6194,7 +6427,7 @@
 struct dentry *trace_instance_dir;
 
 static void
-init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
+init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
 
 static int
 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
@@ -6271,7 +6504,7 @@
 #endif
 }
 
-static int new_instance_create(const char *name)
+static int instance_mkdir(const char *name)
 {
 	struct trace_array *tr;
 	int ret;
@@ -6310,17 +6543,17 @@
 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
 		goto out_free_tr;
 
-	tr->dir = debugfs_create_dir(name, trace_instance_dir);
+	tr->dir = tracefs_create_dir(name, trace_instance_dir);
 	if (!tr->dir)
 		goto out_free_tr;
 
 	ret = event_trace_add_tracer(tr->dir, tr);
 	if (ret) {
-		debugfs_remove_recursive(tr->dir);
+		tracefs_remove_recursive(tr->dir);
 		goto out_free_tr;
 	}
 
-	init_tracer_debugfs(tr, tr->dir);
+	init_tracer_tracefs(tr, tr->dir);
 
 	list_add(&tr->list, &ftrace_trace_arrays);
 
@@ -6341,7 +6574,7 @@
 
 }
 
-static int instance_delete(const char *name)
+static int instance_rmdir(const char *name)
 {
 	struct trace_array *tr;
 	int found = 0;
@@ -6382,82 +6615,17 @@
 	return ret;
 }
 
-static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
-{
-	struct dentry *parent;
-	int ret;
-
-	/* Paranoid: Make sure the parent is the "instances" directory */
-	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
-	if (WARN_ON_ONCE(parent != trace_instance_dir))
-		return -ENOENT;
-
-	/*
-	 * The inode mutex is locked, but debugfs_create_dir() will also
-	 * take the mutex. As the instances directory can not be destroyed
-	 * or changed in any other way, it is safe to unlock it, and
-	 * let the dentry try. If two users try to make the same dir at
-	 * the same time, then the new_instance_create() will determine the
-	 * winner.
-	 */
-	mutex_unlock(&inode->i_mutex);
-
-	ret = new_instance_create(dentry->d_iname);
-
-	mutex_lock(&inode->i_mutex);
-
-	return ret;
-}
-
-static int instance_rmdir(struct inode *inode, struct dentry *dentry)
-{
-	struct dentry *parent;
-	int ret;
-
-	/* Paranoid: Make sure the parent is the "instances" directory */
-	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
-	if (WARN_ON_ONCE(parent != trace_instance_dir))
-		return -ENOENT;
-
-	/* The caller did a dget() on dentry */
-	mutex_unlock(&dentry->d_inode->i_mutex);
-
-	/*
-	 * The inode mutex is locked, but debugfs_create_dir() will also
-	 * take the mutex. As the instances directory can not be destroyed
-	 * or changed in any other way, it is safe to unlock it, and
-	 * let the dentry try. If two users try to make the same dir at
-	 * the same time, then the instance_delete() will determine the
-	 * winner.
-	 */
-	mutex_unlock(&inode->i_mutex);
-
-	ret = instance_delete(dentry->d_iname);
-
-	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-	mutex_lock(&dentry->d_inode->i_mutex);
-
-	return ret;
-}
-
-static const struct inode_operations instance_dir_inode_operations = {
-	.lookup		= simple_lookup,
-	.mkdir		= instance_mkdir,
-	.rmdir		= instance_rmdir,
-};
-
 static __init void create_trace_instances(struct dentry *d_tracer)
 {
-	trace_instance_dir = debugfs_create_dir("instances", d_tracer);
+	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
+							 instance_mkdir,
+							 instance_rmdir);
 	if (WARN_ON(!trace_instance_dir))
 		return;
-
-	/* Hijack the dir inode operations, to allow mkdir */
-	trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
 }
 
 static void
-init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
+init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 {
 	int cpu;
 
@@ -6511,10 +6679,32 @@
 #endif
 
 	for_each_tracing_cpu(cpu)
-		tracing_init_debugfs_percpu(tr, cpu);
+		tracing_init_tracefs_percpu(tr, cpu);
 
 }
 
+static struct vfsmount *trace_automount(void *ingore)
+{
+	struct vfsmount *mnt;
+	struct file_system_type *type;
+
+	/*
+	 * To maintain backward compatibility for tools that mount
+	 * debugfs to get to the tracing facility, tracefs is automatically
+	 * mounted to the debugfs/tracing directory.
+	 */
+	type = get_fs_type("tracefs");
+	if (!type)
+		return NULL;
+	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
+	put_filesystem(type);
+	if (IS_ERR(mnt))
+		return NULL;
+	mntget(mnt);
+
+	return mnt;
+}
+
 /**
  * tracing_init_dentry - initialize top level trace array
  *
@@ -6526,23 +6716,112 @@
 {
 	struct trace_array *tr = &global_trace;
 
+	/* The top level trace array uses  NULL as parent */
 	if (tr->dir)
-		return tr->dir;
+		return NULL;
 
 	if (WARN_ON(!debugfs_initialized()))
 		return ERR_PTR(-ENODEV);
 
-	tr->dir = debugfs_create_dir("tracing", NULL);
-
+	/*
+	 * As there may still be users that expect the tracing
+	 * files to exist in debugfs/tracing, we must automount
+	 * the tracefs file system there, so older tools still
+	 * work with the newer kerenl.
+	 */
+	tr->dir = debugfs_create_automount("tracing", NULL,
+					   trace_automount, NULL);
 	if (!tr->dir) {
 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	return tr->dir;
+	return NULL;
 }
 
-static __init int tracer_init_debugfs(void)
+extern struct trace_enum_map *__start_ftrace_enum_maps[];
+extern struct trace_enum_map *__stop_ftrace_enum_maps[];
+
+static void __init trace_enum_init(void)
+{
+	int len;
+
+	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
+	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
+}
+
+#ifdef CONFIG_MODULES
+static void trace_module_add_enums(struct module *mod)
+{
+	if (!mod->num_trace_enums)
+		return;
+
+	/*
+	 * Modules with bad taint do not have events created, do
+	 * not bother with enums either.
+	 */
+	if (trace_module_has_bad_taint(mod))
+		return;
+
+	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
+}
+
+#ifdef CONFIG_TRACE_ENUM_MAP_FILE
+static void trace_module_remove_enums(struct module *mod)
+{
+	union trace_enum_map_item *map;
+	union trace_enum_map_item **last = &trace_enum_maps;
+
+	if (!mod->num_trace_enums)
+		return;
+
+	mutex_lock(&trace_enum_mutex);
+
+	map = trace_enum_maps;
+
+	while (map) {
+		if (map->head.mod == mod)
+			break;
+		map = trace_enum_jmp_to_tail(map);
+		last = &map->tail.next;
+		map = map->tail.next;
+	}
+	if (!map)
+		goto out;
+
+	*last = trace_enum_jmp_to_tail(map)->tail.next;
+	kfree(map);
+ out:
+	mutex_unlock(&trace_enum_mutex);
+}
+#else
+static inline void trace_module_remove_enums(struct module *mod) { }
+#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
+
+static int trace_module_notify(struct notifier_block *self,
+			       unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	switch (val) {
+	case MODULE_STATE_COMING:
+		trace_module_add_enums(mod);
+		break;
+	case MODULE_STATE_GOING:
+		trace_module_remove_enums(mod);
+		break;
+	}
+
+	return 0;
+}
+
+static struct notifier_block trace_module_nb = {
+	.notifier_call = trace_module_notify,
+	.priority = 0,
+};
+#endif /* CONFIG_MODULES */
+
+static __init int tracer_init_tracefs(void)
 {
 	struct dentry *d_tracer;
 
@@ -6552,7 +6831,7 @@
 	if (IS_ERR(d_tracer))
 		return 0;
 
-	init_tracer_debugfs(&global_trace, d_tracer);
+	init_tracer_tracefs(&global_trace, d_tracer);
 
 	trace_create_file("tracing_thresh", 0644, d_tracer,
 			&global_trace, &tracing_thresh_fops);
@@ -6566,6 +6845,14 @@
 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
 			  NULL, &tracing_saved_cmdlines_size_fops);
 
+	trace_enum_init();
+
+	trace_create_enum_file(d_tracer);
+
+#ifdef CONFIG_MODULES
+	register_module_notifier(&trace_module_nb);
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -6575,6 +6862,10 @@
 
 	create_trace_options_dir(&global_trace);
 
+	/* If the tracer was started via cmdline, create options for it here */
+	if (global_trace.current_trace != &nop_trace)
+		update_tracer_options(&global_trace, global_trace.current_trace);
+
 	return 0;
 }
 
@@ -6888,7 +7179,7 @@
 			tracepoint_printk = 0;
 	}
 	tracer_alloc_buffers();
-	trace_event_init();	
+	trace_event_init();
 }
 
 __init static int clear_boot_tracer(void)
@@ -6910,5 +7201,5 @@
 	return 0;
 }
 
-fs_initcall(tracer_init_debugfs);
+fs_initcall(tracer_init_tracefs);
 late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dd8205a..d261201 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -334,7 +334,7 @@
 
 
 /**
- * struct tracer - a specific tracer and its callbacks to interact with debugfs
+ * struct tracer - a specific tracer and its callbacks to interact with tracefs
  * @name: the name chosen to select it on the available_tracers file
  * @init: called when one switches to this tracer (echo name > current_tracer)
  * @reset: called when one switches to another tracer
@@ -1309,8 +1309,10 @@
 
 #ifdef CONFIG_EVENT_TRACING
 void trace_event_init(void);
+void trace_event_enum_update(struct trace_enum_map **map, int len);
 #else
 static inline void __init trace_event_init(void) { }
+static inlin void trace_event_enum_update(struct trace_enum_map **map, int len) { }
 #endif
 
 extern struct trace_iterator *tracepoint_print_iter;
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e2d027a..ee7b94a 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -223,7 +223,7 @@
 		__dynamic_array(	u32,	buf	)
 	),
 
-	F_printk("%pf: %s",
+	F_printk("%ps: %s",
 		 (void *)__entry->ip, __entry->fmt),
 
 	FILTER_OTHER
@@ -238,7 +238,7 @@
 		__dynamic_array(	char,	buf	)
 	),
 
-	F_printk("%pf: %s",
+	F_printk("%ps: %s",
 		 (void *)__entry->ip, __entry->buf),
 
 	FILTER_OTHER
@@ -253,7 +253,7 @@
 		__field(	const char *,	str	)
 	),
 
-	F_printk("%pf: %s",
+	F_printk("%ps: %s",
 		 (void *)__entry->ip, __entry->str),
 
 	FILTER_OTHER
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index db54dda..7da1dfe 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -13,7 +13,7 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/kthread.h>
-#include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
@@ -480,7 +480,7 @@
 		return;
 
 	if (!--dir->nr_events) {
-		debugfs_remove_recursive(dir->entry);
+		tracefs_remove_recursive(dir->entry);
 		list_del(&dir->list);
 		__put_system_dir(dir);
 	}
@@ -499,7 +499,7 @@
 		}
 		spin_unlock(&dir->d_lock);
 
-		debugfs_remove_recursive(dir);
+		tracefs_remove_recursive(dir);
 	}
 
 	list_del(&file->list);
@@ -1526,7 +1526,7 @@
 	} else
 		__get_system(system);
 
-	dir->entry = debugfs_create_dir(name, parent);
+	dir->entry = tracefs_create_dir(name, parent);
 	if (!dir->entry) {
 		pr_warn("Failed to create system directory %s\n", name);
 		__put_system(system);
@@ -1539,12 +1539,12 @@
 	dir->subsystem = system;
 	file->system = dir;
 
-	entry = debugfs_create_file("filter", 0644, dir->entry, dir,
+	entry = tracefs_create_file("filter", 0644, dir->entry, dir,
 				    &ftrace_subsystem_filter_fops);
 	if (!entry) {
 		kfree(system->filter);
 		system->filter = NULL;
-		pr_warn("Could not create debugfs '%s/filter' entry\n", name);
+		pr_warn("Could not create tracefs '%s/filter' entry\n", name);
 	}
 
 	trace_create_file("enable", 0644, dir->entry, dir,
@@ -1585,9 +1585,9 @@
 		d_events = parent;
 
 	name = ftrace_event_name(call);
-	file->dir = debugfs_create_dir(name, d_events);
+	file->dir = tracefs_create_dir(name, d_events);
 	if (!file->dir) {
-		pr_warn("Could not create debugfs '%s' directory\n", name);
+		pr_warn("Could not create tracefs '%s' directory\n", name);
 		return -1;
 	}
 
@@ -1704,6 +1704,125 @@
 	return 0;
 }
 
+static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
+{
+	int rlen;
+	int elen;
+
+	/* Find the length of the enum value as a string */
+	elen = snprintf(ptr, 0, "%ld", map->enum_value);
+	/* Make sure there's enough room to replace the string with the value */
+	if (len < elen)
+		return NULL;
+
+	snprintf(ptr, elen + 1, "%ld", map->enum_value);
+
+	/* Get the rest of the string of ptr */
+	rlen = strlen(ptr + len);
+	memmove(ptr + elen, ptr + len, rlen);
+	/* Make sure we end the new string */
+	ptr[elen + rlen] = 0;
+
+	return ptr + elen;
+}
+
+static void update_event_printk(struct ftrace_event_call *call,
+				struct trace_enum_map *map)
+{
+	char *ptr;
+	int quote = 0;
+	int len = strlen(map->enum_string);
+
+	for (ptr = call->print_fmt; *ptr; ptr++) {
+		if (*ptr == '\\') {
+			ptr++;
+			/* paranoid */
+			if (!*ptr)
+				break;
+			continue;
+		}
+		if (*ptr == '"') {
+			quote ^= 1;
+			continue;
+		}
+		if (quote)
+			continue;
+		if (isdigit(*ptr)) {
+			/* skip numbers */
+			do {
+				ptr++;
+				/* Check for alpha chars like ULL */
+			} while (isalnum(*ptr));
+			/*
+			 * A number must have some kind of delimiter after
+			 * it, and we can ignore that too.
+			 */
+			continue;
+		}
+		if (isalpha(*ptr) || *ptr == '_') {
+			if (strncmp(map->enum_string, ptr, len) == 0 &&
+			    !isalnum(ptr[len]) && ptr[len] != '_') {
+				ptr = enum_replace(ptr, map, len);
+				/* Hmm, enum string smaller than value */
+				if (WARN_ON_ONCE(!ptr))
+					return;
+				/*
+				 * No need to decrement here, as enum_replace()
+				 * returns the pointer to the character passed
+				 * the enum, and two enums can not be placed
+				 * back to back without something in between.
+				 * We can skip that something in between.
+				 */
+				continue;
+			}
+		skip_more:
+			do {
+				ptr++;
+			} while (isalnum(*ptr) || *ptr == '_');
+			/*
+			 * If what comes after this variable is a '.' or
+			 * '->' then we can continue to ignore that string.
+			 */
+			if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
+				ptr += *ptr == '.' ? 1 : 2;
+				goto skip_more;
+			}
+			/*
+			 * Once again, we can skip the delimiter that came
+			 * after the string.
+			 */
+			continue;
+		}
+	}
+}
+
+void trace_event_enum_update(struct trace_enum_map **map, int len)
+{
+	struct ftrace_event_call *call, *p;
+	const char *last_system = NULL;
+	int last_i;
+	int i;
+
+	down_write(&trace_event_sem);
+	list_for_each_entry_safe(call, p, &ftrace_events, list) {
+		/* events are usually grouped together with systems */
+		if (!last_system || call->class->system != last_system) {
+			last_i = 0;
+			last_system = call->class->system;
+		}
+
+		for (i = last_i; i < len; i++) {
+			if (call->class->system == map[i]->system) {
+				/* Save the first system if need be */
+				if (!last_i)
+					last_i = i;
+				update_event_printk(call, map[i]);
+			}
+		}
+	}
+	up_write(&trace_event_sem);
+}
+
 static struct ftrace_event_file *
 trace_create_new_event(struct ftrace_event_call *call,
 		       struct trace_array *tr)
@@ -1915,7 +2034,7 @@
 
 static struct notifier_block trace_module_nb = {
 	.notifier_call = trace_module_notify,
-	.priority = 0,
+	.priority = 1, /* higher than trace.c module notify */
 };
 #endif /* CONFIG_MODULES */
 
@@ -2228,7 +2347,7 @@
 /*
  * The top level array has already had its ftrace_event_file
  * descriptors created in order to allow for early events to
- * be recorded. This function is called after the debugfs has been
+ * be recorded. This function is called after the tracefs has been
  * initialized, and we now have to create the files associated
  * to the events.
  */
@@ -2311,16 +2430,16 @@
 	struct dentry *d_events;
 	struct dentry *entry;
 
-	entry = debugfs_create_file("set_event", 0644, parent,
+	entry = tracefs_create_file("set_event", 0644, parent,
 				    tr, &ftrace_set_event_fops);
 	if (!entry) {
-		pr_warn("Could not create debugfs 'set_event' entry\n");
+		pr_warn("Could not create tracefs 'set_event' entry\n");
 		return -ENOMEM;
 	}
 
-	d_events = debugfs_create_dir("events", parent);
+	d_events = tracefs_create_dir("events", parent);
 	if (!d_events) {
-		pr_warn("Could not create debugfs 'events' directory\n");
+		pr_warn("Could not create tracefs 'events' directory\n");
 		return -ENOMEM;
 	}
 
@@ -2412,7 +2531,7 @@
 
 	down_write(&trace_event_sem);
 	__trace_remove_event_dirs(tr);
-	debugfs_remove_recursive(tr->event_dir);
+	tracefs_remove_recursive(tr->event_dir);
 	up_write(&trace_event_sem);
 
 	tr->event_dir = NULL;
@@ -2534,10 +2653,10 @@
 	if (IS_ERR(d_tracer))
 		return 0;
 
-	entry = debugfs_create_file("available_events", 0444, d_tracer,
+	entry = tracefs_create_file("available_events", 0444, d_tracer,
 				    tr, &ftrace_avail_fops);
 	if (!entry)
-		pr_warn("Could not create debugfs 'available_events' entry\n");
+		pr_warn("Could not create tracefs 'available_events' entry\n");
 
 	if (trace_define_common_fields())
 		pr_warn("tracing: Failed to allocate common fields");
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 12e2b99..174a6a7 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -177,7 +177,7 @@
 	},								\
 	.event.type		= etype,				\
 	.print_fmt		= print,				\
-	.flags			= TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \
+	.flags			= TRACE_EVENT_FL_IGNORE_ENABLE,		\
 };									\
 struct ftrace_event_call __used						\
 __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 2d25ad1..9cfea4c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -6,7 +6,6 @@
  * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
  *
  */
-#include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/slab.h>
@@ -151,7 +150,7 @@
 	 * The curr_ret_stack is initialized to -1 and get increased
 	 * in this function.  So it can be less than -1 only if it was
 	 * filtered out via ftrace_graph_notrace_addr() which can be
-	 * set from set_graph_notrace file in debugfs by user.
+	 * set from set_graph_notrace file in tracefs by user.
 	 */
 	if (current->curr_ret_stack < -1)
 		return -EBUSY;
@@ -1432,7 +1431,7 @@
 	.llseek		= generic_file_llseek,
 };
 
-static __init int init_graph_debugfs(void)
+static __init int init_graph_tracefs(void)
 {
 	struct dentry *d_tracer;
 
@@ -1445,7 +1444,7 @@
 
 	return 0;
 }
-fs_initcall(init_graph_debugfs);
+fs_initcall(init_graph_tracefs);
 
 static __init int init_graph_trace(void)
 {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index d73f565..d0ce590 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -250,7 +250,7 @@
 #define fetch_file_offset_string_size	NULL
 
 /* Fetch type information table */
-const struct fetch_type kprobes_fetch_type_table[] = {
+static const struct fetch_type kprobes_fetch_type_table[] = {
 	/* Special types */
 	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
 					sizeof(u32), 1, "__data_loc char[]"),
@@ -760,7 +760,8 @@
 
 		/* Parse fetch argument */
 		ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
-						is_return, true);
+						is_return, true,
+						kprobes_fetch_type_table);
 		if (ret) {
 			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
 			goto error;
@@ -1134,11 +1135,15 @@
 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
 	struct ftrace_event_call *call = &tk->tp.call;
+	struct bpf_prog *prog = call->prog;
 	struct kprobe_trace_entry_head *entry;
 	struct hlist_head *head;
 	int size, __size, dsize;
 	int rctx;
 
+	if (prog && !trace_call_bpf(prog, regs))
+		return;
+
 	head = this_cpu_ptr(call->perf_events);
 	if (hlist_empty(head))
 		return;
@@ -1165,11 +1170,15 @@
 		    struct pt_regs *regs)
 {
 	struct ftrace_event_call *call = &tk->tp.call;
+	struct bpf_prog *prog = call->prog;
 	struct kretprobe_trace_entry_head *entry;
 	struct hlist_head *head;
 	int size, __size, dsize;
 	int rctx;
 
+	if (prog && !trace_call_bpf(prog, regs))
+		return;
+
 	head = this_cpu_ptr(call->perf_events);
 	if (hlist_empty(head))
 		return;
@@ -1286,7 +1295,7 @@
 		kfree(call->print_fmt);
 		return -ENODEV;
 	}
-	call->flags = 0;
+	call->flags = TRACE_EVENT_FL_KPROBE;
 	call->class->reg = kprobe_register;
 	call->data = tk;
 	ret = trace_add_event_call(call);
@@ -1310,7 +1319,7 @@
 	return ret;
 }
 
-/* Make a debugfs interface for controlling probe points */
+/* Make a tracefs interface for controlling probe points */
 static __init int init_kprobe_trace(void)
 {
 	struct dentry *d_tracer;
@@ -1323,20 +1332,20 @@
 	if (IS_ERR(d_tracer))
 		return 0;
 
-	entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
+	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
 				    NULL, &kprobe_events_ops);
 
 	/* Event list interface */
 	if (!entry)
-		pr_warning("Could not create debugfs "
+		pr_warning("Could not create tracefs "
 			   "'kprobe_events' entry\n");
 
 	/* Profile interface */
-	entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
+	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
 				    NULL, &kprobe_profile_ops);
 
 	if (!entry)
-		pr_warning("Could not create debugfs "
+		pr_warning("Could not create tracefs "
 			   "'kprobe_profile' entry\n");
 	return 0;
 }
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index b983b2f..1769a81 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -356,17 +356,14 @@
 
 /* Recursive argument parser */
 static int parse_probe_arg(char *arg, const struct fetch_type *t,
-		     struct fetch_param *f, bool is_return, bool is_kprobe)
+		     struct fetch_param *f, bool is_return, bool is_kprobe,
+		     const struct fetch_type *ftbl)
 {
-	const struct fetch_type *ftbl;
 	unsigned long param;
 	long offset;
 	char *tmp;
 	int ret = 0;
 
-	ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table;
-	BUG_ON(ftbl == NULL);
-
 	switch (arg[0]) {
 	case '$':
 		ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe);
@@ -447,7 +444,7 @@
 			dprm->fetch_size = get_fetch_size_function(t,
 							dprm->fetch, ftbl);
 			ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
-							is_kprobe);
+							is_kprobe, ftbl);
 			if (ret)
 				kfree(dprm);
 			else {
@@ -505,15 +502,12 @@
 
 /* String length checking wrapper */
 int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
-		struct probe_arg *parg, bool is_return, bool is_kprobe)
+		struct probe_arg *parg, bool is_return, bool is_kprobe,
+		const struct fetch_type *ftbl)
 {
-	const struct fetch_type *ftbl;
 	const char *t;
 	int ret;
 
-	ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table;
-	BUG_ON(ftbl == NULL);
-
 	if (strlen(arg) > MAX_ARGSTR_LEN) {
 		pr_info("Argument is too long.: %s\n",  arg);
 		return -ENOSPC;
@@ -535,7 +529,8 @@
 	}
 	parg->offset = *size;
 	*size += parg->type->size;
-	ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe);
+	ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return,
+			      is_kprobe, ftbl);
 
 	if (ret >= 0 && t != NULL)
 		ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 4f815fb..ab283e1 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -25,7 +25,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
-#include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
@@ -229,13 +229,6 @@
 #define FETCH_TYPE_STRING	0
 #define FETCH_TYPE_STRSIZE	1
 
-/*
- * Fetch type information table.
- * It's declared as a weak symbol due to conditional compilation.
- */
-extern __weak const struct fetch_type kprobes_fetch_type_table[];
-extern __weak const struct fetch_type uprobes_fetch_type_table[];
-
 #ifdef CONFIG_KPROBE_EVENT
 struct symbol_cache;
 unsigned long update_symbol_cache(struct symbol_cache *sc);
@@ -333,7 +326,8 @@
 }
 
 extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
-		   struct probe_arg *parg, bool is_return, bool is_kprobe);
+		   struct probe_arg *parg, bool is_return, bool is_kprobe,
+		   const struct fetch_type *ftbl);
 
 extern int traceprobe_conflict_field_name(const char *name,
 			       struct probe_arg *args, int narg);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 75e19e8..6cf9353 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -12,7 +12,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
-#include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include "trace_stat.h"
 #include "trace.h"
 
@@ -65,7 +65,7 @@
 
 static void destroy_session(struct stat_session *session)
 {
-	debugfs_remove(session->file);
+	tracefs_remove(session->file);
 	__reset_stat_session(session);
 	mutex_destroy(&session->stat_mutex);
 	kfree(session);
@@ -279,9 +279,9 @@
 	if (IS_ERR(d_tracing))
 		return 0;
 
-	stat_dir = debugfs_create_dir("trace_stat", d_tracing);
+	stat_dir = tracefs_create_dir("trace_stat", d_tracing);
 	if (!stat_dir)
-		pr_warning("Could not create debugfs "
+		pr_warning("Could not create tracefs "
 			   "'trace_stat' entry\n");
 	return 0;
 }
@@ -291,7 +291,7 @@
 	if (!stat_dir && tracing_stat_init())
 		return -ENODEV;
 
-	session->file = debugfs_create_file(session->ts->name, 0644,
+	session->file = tracefs_create_file(session->ts->name, 0644,
 					    stat_dir,
 					    session, &tracing_stat_fops);
 	if (!session->file)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 7dc1c8a..d60fe62 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -196,7 +196,7 @@
 DEFINE_FETCH_file_offset(string_size)
 
 /* Fetch type information table */
-const struct fetch_type uprobes_fetch_type_table[] = {
+static const struct fetch_type uprobes_fetch_type_table[] = {
 	/* Special types */
 	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
 					sizeof(u32), 1, "__data_loc char[]"),
@@ -535,7 +535,8 @@
 
 		/* Parse fetch argument */
 		ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg,
-						 is_return, false);
+						 is_return, false,
+						 uprobes_fetch_type_table);
 		if (ret) {
 			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
 			goto error;
@@ -1005,7 +1006,7 @@
 		return true;
 
 	list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
-		if (event->hw.tp_target->mm == mm)
+		if (event->hw.target->mm == mm)
 			return true;
 	}
 
@@ -1015,7 +1016,7 @@
 static inline bool
 uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
 {
-	return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
+	return __uprobe_perf_filter(&tu->filter, event->hw.target->mm);
 }
 
 static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
@@ -1023,10 +1024,10 @@
 	bool done;
 
 	write_lock(&tu->filter.rwlock);
-	if (event->hw.tp_target) {
+	if (event->hw.target) {
 		list_del(&event->hw.tp_list);
 		done = tu->filter.nr_systemwide ||
-			(event->hw.tp_target->flags & PF_EXITING) ||
+			(event->hw.target->flags & PF_EXITING) ||
 			uprobe_filter_event(tu, event);
 	} else {
 		tu->filter.nr_systemwide--;
@@ -1046,7 +1047,7 @@
 	int err;
 
 	write_lock(&tu->filter.rwlock);
-	if (event->hw.tp_target) {
+	if (event->hw.target) {
 		/*
 		 * event->parent != NULL means copy_process(), we can avoid
 		 * uprobe_apply(). current->mm must be probed and we can rely
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f2be11a..2316f50 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -603,9 +603,37 @@
 		cpu0_err = 0;
 	}
 }
+
+void watchdog_nmi_enable_all(void)
+{
+	int cpu;
+
+	if (!watchdog_user_enabled)
+		return;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		watchdog_nmi_enable(cpu);
+	put_online_cpus();
+}
+
+void watchdog_nmi_disable_all(void)
+{
+	int cpu;
+
+	if (!watchdog_running)
+		return;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		watchdog_nmi_disable(cpu);
+	put_online_cpus();
+}
 #else
 static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
 static void watchdog_nmi_disable(unsigned int cpu) { return; }
+void watchdog_nmi_enable_all(void) {}
+void watchdog_nmi_disable_all(void) {}
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 static struct smp_hotplug_thread watchdog_threads = {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 15c9118..1767057 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1193,16 +1193,7 @@
 menu "RCU Debugging"
 
 config PROVE_RCU
-	bool "RCU debugging: prove RCU correctness"
-	depends on PROVE_LOCKING
-	default n
-	help
-	 This feature enables lockdep extensions that check for correct
-	 use of RCU APIs.  This is currently under development.  Say Y
-	 if you want to debug RCU usage or help work on the PROVE_RCU
-	 feature.
-
-	 Say N if you are unsure.
+	def_bool PROVE_LOCKING
 
 config PROVE_RCU_REPEATEDLY
 	bool "RCU debugging: don't disable PROVE_RCU on first splat"
@@ -1270,6 +1261,30 @@
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
+config RCU_TORTURE_TEST_SLOW_INIT
+	bool "Slow down RCU grace-period initialization to expose races"
+	depends on RCU_TORTURE_TEST
+	help
+	  This option makes grace-period initialization block for a
+	  few jiffies between initializing each pair of consecutive
+	  rcu_node structures.	This helps to expose races involving
+	  grace-period initialization, in other words, it makes your
+	  kernel less stable.  It can also greatly increase grace-period
+	  latency, especially on systems with large numbers of CPUs.
+	  This is useful when torture-testing RCU, but in almost no
+	  other circumstance.
+
+	  Say Y here if you want your system to crash and hang more often.
+	  Say N if you want a sane system.
+
+config RCU_TORTURE_TEST_SLOW_INIT_DELAY
+	int "How much to slow down RCU grace-period initialization"
+	range 0 5
+	default 3
+	help
+	  This option specifies the number of jiffies to wait between
+	  each rcu_node structure initialization.
+
 config RCU_CPU_STALL_TIMEOUT
 	int "RCU CPU stall timeout in seconds"
 	depends on RCU_STALL_COMMON
diff --git a/lib/div64.c b/lib/div64.c
index 4382ad7..19ea7ed 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -127,7 +127,7 @@
  * by the book 'Hacker's Delight'.  The original source and full proof
  * can be found here and is available for use without restriction.
  *
- * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt'
+ * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
  */
 #ifndef div64_u64
 u64 div64_u64(u64 dividend, u64 divisor)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 9d96e283..75232ad 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -317,6 +317,32 @@
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
+/*
+ * Fault in one or more iovecs of the given iov_iter, to a maximum length of
+ * bytes.  For each iovec, fault in each page that constitutes the iovec.
+ *
+ * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
+ * because it is an invalid address).
+ */
+int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
+{
+	size_t skip = i->iov_offset;
+	const struct iovec *iov;
+	int err;
+	struct iovec v;
+
+	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
+		iterate_iovec(i, bytes, v, iov, skip, ({
+			err = fault_in_multipages_readable(v.iov_base,
+					v.iov_len);
+			if (unlikely(err))
+			return err;
+		0;}))
+	}
+	return 0;
+}
+EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
+
 void iov_iter_init(struct iov_iter *i, int direction,
 			const struct iovec *iov, unsigned long nr_segs,
 			size_t count)
@@ -766,3 +792,60 @@
 				   flags);
 }
 EXPORT_SYMBOL(dup_iter);
+
+int import_iovec(int type, const struct iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i)
+{
+	ssize_t n;
+	struct iovec *p;
+	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+				  *iov, &p);
+	if (n < 0) {
+		if (p != *iov)
+			kfree(p);
+		*iov = NULL;
+		return n;
+	}
+	iov_iter_init(i, type, p, nr_segs, n);
+	*iov = p == *iov ? NULL : p;
+	return 0;
+}
+EXPORT_SYMBOL(import_iovec);
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i)
+{
+	ssize_t n;
+	struct iovec *p;
+	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+				  *iov, &p);
+	if (n < 0) {
+		if (p != *iov)
+			kfree(p);
+		*iov = NULL;
+		return n;
+	}
+	iov_iter_init(i, type, p, nr_segs, n);
+	*iov = p == *iov ? NULL : p;
+	return 0;
+}
+#endif
+
+int import_single_range(int rw, void __user *buf, size_t len,
+		 struct iovec *iov, struct iov_iter *i)
+{
+	if (len > MAX_RW_COUNT)
+		len = MAX_RW_COUNT;
+	if (unlikely(!access_ok(!rw, buf, len)))
+		return -EFAULT;
+
+	iov->iov_base = buf;
+	iov->iov_len = len;
+	iov_iter_init(i, rw, iov, 1, len);
+	return 0;
+}
diff --git a/mm/filemap.c b/mm/filemap.c
index 434dba3..12548d0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -13,7 +13,6 @@
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
-#include <linux/aio.h>
 #include <linux/capability.h>
 #include <linux/kernel_stat.h>
 #include <linux/gfp.h>
diff --git a/mm/page_io.c b/mm/page_io.c
index e604580..a96c856 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -20,8 +20,8 @@
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
 #include <linux/frontswap.h>
-#include <linux/aio.h>
 #include <linux/blkdev.h>
+#include <linux/uio.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags,
@@ -274,7 +274,6 @@
 		iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
-		kiocb.ki_nbytes = PAGE_SIZE;
 
 		set_page_writeback(page);
 		unlock_page(page);
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index b159769..e88d071 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -257,22 +257,18 @@
 	struct iovec *iov_r = iovstack_r;
 	struct iov_iter iter;
 	ssize_t rc;
+	int dir = vm_write ? WRITE : READ;
 
 	if (flags != 0)
 		return -EINVAL;
 
 	/* Check iovecs */
-	if (vm_write)
-		rc = rw_copy_check_uvector(WRITE, lvec, liovcnt, UIO_FASTIOV,
-					   iovstack_l, &iov_l);
-	else
-		rc = rw_copy_check_uvector(READ, lvec, liovcnt, UIO_FASTIOV,
-					   iovstack_l, &iov_l);
-	if (rc <= 0)
+	rc = import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
+	if (rc < 0)
+		return rc;
+	if (!iov_iter_count(&iter))
 		goto free_iovecs;
 
-	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
-
 	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
 				   iovstack_r, &iov_r);
 	if (rc <= 0)
@@ -283,8 +279,7 @@
 free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
-	if (iov_l != iovstack_l)
-		kfree(iov_l);
+	kfree(iov_l);
 
 	return rc;
 }
@@ -320,21 +315,16 @@
 	struct iovec *iov_r = iovstack_r;
 	struct iov_iter iter;
 	ssize_t rc = -EFAULT;
+	int dir = vm_write ? WRITE : READ;
 
 	if (flags != 0)
 		return -EINVAL;
 
-	if (vm_write)
-		rc = compat_rw_copy_check_uvector(WRITE, lvec, liovcnt,
-						  UIO_FASTIOV, iovstack_l,
-						  &iov_l);
-	else
-		rc = compat_rw_copy_check_uvector(READ, lvec, liovcnt,
-						  UIO_FASTIOV, iovstack_l,
-						  &iov_l);
-	if (rc <= 0)
+	rc = compat_import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
+	if (rc < 0)
+		return rc;
+	if (!iov_iter_count(&iter))
 		goto free_iovecs;
-	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
 	rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
 					  UIO_FASTIOV, iovstack_r,
 					  &iov_r);
@@ -346,8 +336,7 @@
 free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
-	if (iov_l != iovstack_l)
-		kfree(iov_l);
+	kfree(iov_l);
 	return rc;
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index cf2d0ca..80b360c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -31,7 +31,7 @@
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 static struct vfsmount *shm_mnt;
 
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 5bdd300..2df34eb 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -272,7 +272,7 @@
 	qos.rxtp.max_pcr = rx_pcr;
 	qos.rxtp.max_sdu = rx_sdu;
 	qos.aal = ATM_AAL5;
-	dprintk("parse_qos(): setting qos paramameters to tx=%d,%d rx=%d,%d\n",
+	dprintk("parse_qos(): setting qos parameters to tx=%d,%d rx=%d,%d\n",
 		qos.txtp.max_pcr, qos.txtp.max_sdu,
 		qos.rxtp.max_pcr, qos.rxtp.max_sdu);
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f027a70..4a356b7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -46,7 +46,6 @@
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
-#include <linux/aio.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
diff --git a/net/mac80211/trace.c b/net/mac80211/trace.c
index 386e45d8..edfe0c1 100644
--- a/net/mac80211/trace.c
+++ b/net/mac80211/trace.c
@@ -8,6 +8,7 @@
 #include "debug.h"
 #define CREATE_TRACE_POINTS
 #include "trace.h"
+#include "trace_msg.h"
 
 #ifdef CONFIG_MAC80211_MESSAGE_TRACING
 void __sdata_info(const char *fmt, ...)
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 263a956..755a538 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2312,44 +2312,6 @@
 	)
 );
 
-#ifdef CONFIG_MAC80211_MESSAGE_TRACING
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM mac80211_msg
-
-#define MAX_MSG_LEN	100
-
-DECLARE_EVENT_CLASS(mac80211_msg_event,
-	TP_PROTO(struct va_format *vaf),
-
-	TP_ARGS(vaf),
-
-	TP_STRUCT__entry(
-		__dynamic_array(char, msg, MAX_MSG_LEN)
-	),
-
-	TP_fast_assign(
-		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
-				       MAX_MSG_LEN, vaf->fmt,
-				       *vaf->va) >= MAX_MSG_LEN);
-	),
-
-	TP_printk("%s", __get_str(msg))
-);
-
-DEFINE_EVENT(mac80211_msg_event, mac80211_info,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-DEFINE_EVENT(mac80211_msg_event, mac80211_dbg,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-DEFINE_EVENT(mac80211_msg_event, mac80211_err,
-	TP_PROTO(struct va_format *vaf),
-	TP_ARGS(vaf)
-);
-#endif
-
 #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
new file mode 100644
index 0000000..768f7c2
--- /dev/null
+++ b/net/mac80211/trace_msg.h
@@ -0,0 +1,53 @@
+#ifdef CONFIG_MAC80211_MESSAGE_TRACING
+
+#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
+#define __MAC80211_MSG_DRIVER_TRACE
+
+#include <linux/tracepoint.h>
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mac80211_msg
+
+#define MAX_MSG_LEN	100
+
+DECLARE_EVENT_CLASS(mac80211_msg_event,
+	TP_PROTO(struct va_format *vaf),
+
+	TP_ARGS(vaf),
+
+	TP_STRUCT__entry(
+		__dynamic_array(char, msg, MAX_MSG_LEN)
+	),
+
+	TP_fast_assign(
+		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+				       MAX_MSG_LEN, vaf->fmt,
+				       *vaf->va) >= MAX_MSG_LEN);
+	),
+
+	TP_printk("%s", __get_str(msg))
+);
+
+DEFINE_EVENT(mac80211_msg_event, mac80211_info,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+DEFINE_EVENT(mac80211_msg_event, mac80211_dbg,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+DEFINE_EVENT(mac80211_msg_event, mac80211_err,
+	TP_PROTO(struct va_format *vaf),
+	TP_ARGS(vaf)
+);
+#endif /* !__MAC80211_MSG_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace_msg
+#include <trace/define_trace.h>
+
+#endif
diff --git a/net/socket.c b/net/socket.c
index 245330c..1dbff3e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -633,8 +633,7 @@
 	init_sync_kiocb(&iocb, NULL);
 	ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
 		      __sock_sendmsg(&iocb, sock, msg, size);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }
 
@@ -766,8 +765,7 @@
 
 	init_sync_kiocb(&iocb, NULL);
 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }
 EXPORT_SYMBOL(sock_recvmsg);
@@ -780,8 +778,7 @@
 
 	init_sync_kiocb(&iocb, NULL);
 	ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }
 
@@ -858,11 +855,11 @@
 	if (iocb->ki_pos != 0)
 		return -ESPIPE;
 
-	if (iocb->ki_nbytes == 0)	/* Match SYS5 behaviour */
+	if (!iov_iter_count(to))	/* Match SYS5 behaviour */
 		return 0;
 
 	res = __sock_recvmsg(iocb, sock, &msg,
-			     iocb->ki_nbytes, msg.msg_flags);
+			     iov_iter_count(to), msg.msg_flags);
 	*to = msg.msg_iter;
 	return res;
 }
@@ -883,7 +880,7 @@
 	if (sock->type == SOCK_SEQPACKET)
 		msg.msg_flags |= MSG_EOR;
 
-	res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
+	res = __sock_sendmsg(iocb, sock, &msg, iov_iter_count(from));
 	*from = msg.msg_iter;
 	return res;
 }
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 124676c..e28909f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1136,7 +1136,7 @@
 	int i, rc;
 
 	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-	dprintk("RPC:       %s: initalizing %d FMRs\n", __func__, i);
+	dprintk("RPC:       %s: initializing %d FMRs\n", __func__, i);
 
 	while (i--) {
 		r = kzalloc(sizeof(*r), GFP_KERNEL);
@@ -1169,7 +1169,7 @@
 	int i, rc;
 
 	i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-	dprintk("RPC:       %s: initalizing %d FRMRs\n", __func__, i);
+	dprintk("RPC:       %s: initializing %d FRMRs\n", __func__, i);
 
 	while (i--) {
 		r = kzalloc(sizeof(*r), GFP_KERNEL);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index b5b3600..fe98fb2 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -6,23 +6,39 @@
 hostprogs-y += sock_example
 hostprogs-y += sockex1
 hostprogs-y += sockex2
+hostprogs-y += tracex1
+hostprogs-y += tracex2
+hostprogs-y += tracex3
+hostprogs-y += tracex4
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
 sock_example-objs := sock_example.o libbpf.o
 sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
 sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
+tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
+tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
+tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
+tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
 always += sockex1_kern.o
 always += sockex2_kern.o
+always += tracex1_kern.o
+always += tracex2_kern.o
+always += tracex3_kern.o
+always += tracex4_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
 HOSTLOADLIBES_sockex1 += -lelf
 HOSTLOADLIBES_sockex2 += -lelf
+HOSTLOADLIBES_tracex1 += -lelf
+HOSTLOADLIBES_tracex2 += -lelf
+HOSTLOADLIBES_tracex3 += -lelf
+HOSTLOADLIBES_tracex4 += -lelf -lrt
 
 # point this to your LLVM backend with bpf support
 LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index ca03331..1c872bc 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -15,6 +15,12 @@
 	(void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
 	(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+	(void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+	(void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+	(void *) BPF_FUNC_trace_printk;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 1831d23..38dac5a 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -8,29 +8,70 @@
 #include <unistd.h>
 #include <string.h>
 #include <stdbool.h>
+#include <stdlib.h>
 #include <linux/bpf.h>
 #include <linux/filter.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <poll.h>
 #include "libbpf.h"
 #include "bpf_helpers.h"
 #include "bpf_load.h"
 
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
 static char license[128];
+static int kern_version;
 static bool processed_sec[128];
 int map_fd[MAX_MAPS];
 int prog_fd[MAX_PROGS];
+int event_fd[MAX_PROGS];
 int prog_cnt;
 
 static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 {
-	int fd;
 	bool is_socket = strncmp(event, "socket", 6) == 0;
+	bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
+	bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
+	enum bpf_prog_type prog_type;
+	char buf[256];
+	int fd, efd, err, id;
+	struct perf_event_attr attr = {};
 
-	if (!is_socket)
-		/* tracing events tbd */
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+
+	if (is_socket) {
+		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	} else if (is_kprobe || is_kretprobe) {
+		prog_type = BPF_PROG_TYPE_KPROBE;
+	} else {
+		printf("Unknown event '%s'\n", event);
 		return -1;
+	}
 
-	fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
-			   prog, size, license);
+	if (is_kprobe || is_kretprobe) {
+		if (is_kprobe)
+			event += 7;
+		else
+			event += 10;
+
+		snprintf(buf, sizeof(buf),
+			 "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
+			 is_kprobe ? 'p' : 'r', event, event);
+		err = system(buf);
+		if (err < 0) {
+			printf("failed to create kprobe '%s' error '%s'\n",
+			       event, strerror(errno));
+			return -1;
+		}
+	}
+
+	fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
 
 	if (fd < 0) {
 		printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
@@ -39,6 +80,41 @@
 
 	prog_fd[prog_cnt++] = fd;
 
+	if (is_socket)
+		return 0;
+
+	strcpy(buf, DEBUGFS);
+	strcat(buf, "events/kprobes/");
+	strcat(buf, event);
+	strcat(buf, "/id");
+
+	efd = open(buf, O_RDONLY, 0);
+	if (efd < 0) {
+		printf("failed to open event %s\n", event);
+		return -1;
+	}
+
+	err = read(efd, buf, sizeof(buf));
+	if (err < 0 || err >= sizeof(buf)) {
+		printf("read from '%s' failed '%s'\n", event, strerror(errno));
+		return -1;
+	}
+
+	close(efd);
+
+	buf[err] = 0;
+	id = atoi(buf);
+	attr.config = id;
+
+	efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+	if (efd < 0) {
+		printf("event %d fd %d err %s\n", id, efd, strerror(errno));
+		return -1;
+	}
+	event_fd[prog_cnt - 1] = efd;
+	ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+	ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+
 	return 0;
 }
 
@@ -135,6 +211,9 @@
 	if (gelf_getehdr(elf, &ehdr) != &ehdr)
 		return 1;
 
+	/* clear all kprobes */
+	i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+
 	/* scan over all elf sections to get license and map info */
 	for (i = 1; i < ehdr.e_shnum; i++) {
 
@@ -149,6 +228,14 @@
 		if (strcmp(shname, "license") == 0) {
 			processed_sec[i] = true;
 			memcpy(license, data->d_buf, data->d_size);
+		} else if (strcmp(shname, "version") == 0) {
+			processed_sec[i] = true;
+			if (data->d_size != sizeof(int)) {
+				printf("invalid size of version section %zd\n",
+				       data->d_size);
+				return 1;
+			}
+			memcpy(&kern_version, data->d_buf, sizeof(int));
 		} else if (strcmp(shname, "maps") == 0) {
 			processed_sec[i] = true;
 			if (load_maps(data->d_buf, data->d_size))
@@ -178,7 +265,8 @@
 			if (parse_relo_and_apply(data, symbols, &shdr, insns))
 				continue;
 
-			if (memcmp(shname_prog, "events/", 7) == 0 ||
+			if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
+			    memcmp(shname_prog, "kretprobe/", 10) == 0 ||
 			    memcmp(shname_prog, "socket", 6) == 0)
 				load_and_attach(shname_prog, insns, data_prog->d_size);
 		}
@@ -193,7 +281,8 @@
 		if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
 			continue;
 
-		if (memcmp(shname, "events/", 7) == 0 ||
+		if (memcmp(shname, "kprobe/", 7) == 0 ||
+		    memcmp(shname, "kretprobe/", 10) == 0 ||
 		    memcmp(shname, "socket", 6) == 0)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
@@ -201,3 +290,23 @@
 	close(fd);
 	return 0;
 }
+
+void read_trace_pipe(void)
+{
+	int trace_fd;
+
+	trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+	if (trace_fd < 0)
+		return;
+
+	while (1) {
+		static char buf[4096];
+		ssize_t sz;
+
+		sz = read(trace_fd, buf, sizeof(buf));
+		if (sz > 0) {
+			buf[sz] = 0;
+			puts(buf);
+		}
+	}
+}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 27789a3..cbd7c2b 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -6,6 +6,7 @@
 
 extern int map_fd[MAX_MAPS];
 extern int prog_fd[MAX_PROGS];
+extern int event_fd[MAX_PROGS];
 
 /* parses elf file compiled by llvm .c->.o
  * . parses 'maps' section and creates maps via BPF syscall
@@ -21,4 +22,6 @@
  */
 int load_bpf_file(char *path);
 
+void read_trace_pipe(void);
+
 #endif
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 46d50b7..7e1efa7 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -81,7 +81,7 @@
 
 int bpf_prog_load(enum bpf_prog_type prog_type,
 		  const struct bpf_insn *insns, int prog_len,
-		  const char *license)
+		  const char *license, int kern_version)
 {
 	union bpf_attr attr = {
 		.prog_type = prog_type,
@@ -93,6 +93,11 @@
 		.log_level = 1,
 	};
 
+	/* assign one field outside of struct init to make sure any
+	 * padding is zero initialized
+	 */
+	attr.kern_version = kern_version;
+
 	bpf_log_buf[0] = 0;
 
 	return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
@@ -121,3 +126,10 @@
 
 	return sock;
 }
+
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+		    int group_fd, unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, attr, pid, cpu,
+		       group_fd, flags);
+}
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index 58c5fe1..ac7b096 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -13,7 +13,7 @@
 
 int bpf_prog_load(enum bpf_prog_type prog_type,
 		  const struct bpf_insn *insns, int insn_len,
-		  const char *license);
+		  const char *license, int kern_version);
 
 #define LOG_BUF_SIZE 65536
 extern char bpf_log_buf[LOG_BUF_SIZE];
@@ -182,4 +182,7 @@
 /* create RAW socket and bind to interface 'name' */
 int open_raw_sock(const char *name);
 
+struct perf_event_attr;
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+		    int group_fd, unsigned long flags);
 #endif
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
index c8ad040..a0ce251 100644
--- a/samples/bpf/sock_example.c
+++ b/samples/bpf/sock_example.c
@@ -56,7 +56,7 @@
 	};
 
 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog),
-				"GPL");
+				"GPL", 0);
 	if (prog_fd < 0) {
 		printf("failed to load prog '%s'\n", strerror(errno));
 		goto cleanup;
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index b96175e..740ce97 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -689,7 +689,7 @@
 
 		prog_fd = bpf_prog_load(BPF_PROG_TYPE_UNSPEC, prog,
 					prog_len * sizeof(struct bpf_insn),
-					"GPL");
+					"GPL", 0);
 
 		if (tests[i].result == ACCEPT) {
 			if (prog_fd < 0) {
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
new file mode 100644
index 0000000..3162046
--- /dev/null
+++ b/samples/bpf/tracex1_kern.c
@@ -0,0 +1,50 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	/* attaches to kprobe netif_receive_skb,
+	 * looks for packets on loobpack device and prints them
+	 */
+	char devname[IFNAMSIZ] = {};
+	struct net_device *dev;
+	struct sk_buff *skb;
+	int len;
+
+	/* non-portable! works for the given kernel only */
+	skb = (struct sk_buff *) ctx->di;
+
+	dev = _(skb->dev);
+
+	len = _(skb->len);
+
+	bpf_probe_read(devname, sizeof(devname), dev->name);
+
+	if (devname[0] == 'l' && devname[1] == 'o') {
+		char fmt[] = "skb %p len %d\n";
+		/* using bpf_trace_printk() for DEBUG ONLY */
+		bpf_trace_printk(fmt, sizeof(fmt), skb, len);
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
new file mode 100644
index 0000000..31a4818
--- /dev/null
+++ b/samples/bpf/tracex1_user.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+int main(int ac, char **argv)
+{
+	FILE *f;
+	char filename[256];
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	f = popen("taskset 1 ping -c5 localhost", "r");
+	(void) f;
+
+	read_trace_pipe();
+
+	return 0;
+}
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
new file mode 100644
index 0000000..19ec1cf
--- /dev/null
+++ b/samples/bpf/tracex2_kern.c
@@ -0,0 +1,86 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(long),
+	.value_size = sizeof(long),
+	.max_entries = 1024,
+};
+
+/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
+ * example will no longer be meaningful
+ */
+SEC("kprobe/kfree_skb")
+int bpf_prog2(struct pt_regs *ctx)
+{
+	long loc = 0;
+	long init_val = 1;
+	long *value;
+
+	/* x64 specific: read ip of kfree_skb caller.
+	 * non-portable version of __builtin_return_address(0)
+	 */
+	bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp);
+
+	value = bpf_map_lookup_elem(&my_map, &loc);
+	if (value)
+		*value += 1;
+	else
+		bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY);
+	return 0;
+}
+
+static unsigned int log2(unsigned int v)
+{
+	unsigned int r;
+	unsigned int shift;
+
+	r = (v > 0xFFFF) << 4; v >>= r;
+	shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+	shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+	shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+	r |= (v >> 1);
+	return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+	unsigned int hi = v >> 32;
+	if (hi)
+		return log2(hi) + 32;
+	else
+		return log2(v);
+}
+
+struct bpf_map_def SEC("maps") my_hist_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 64,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog3(struct pt_regs *ctx)
+{
+	long write_size = ctx->dx; /* arg3 */
+	long init_val = 1;
+	long *value;
+	u32 index = log2l(write_size);
+
+	value = bpf_map_lookup_elem(&my_hist_map, &index);
+	if (value)
+		__sync_fetch_and_add(value, 1);
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
new file mode 100644
index 0000000..91b8d08
--- /dev/null
+++ b/samples/bpf/tracex2_user.c
@@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_INDEX	64
+#define MAX_STARS	38
+
+static void stars(char *str, long val, long max, int width)
+{
+	int i;
+
+	for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
+		str[i] = '*';
+	if (val > max)
+		str[i - 1] = '+';
+	str[i] = '\0';
+}
+
+static void print_hist(int fd)
+{
+	int key;
+	long value;
+	long data[MAX_INDEX] = {};
+	char starstr[MAX_STARS];
+	int i;
+	int max_ind = -1;
+	long max_value = 0;
+
+	for (key = 0; key < MAX_INDEX; key++) {
+		bpf_lookup_elem(fd, &key, &value);
+		data[key] = value;
+		if (value && key > max_ind)
+			max_ind = key;
+		if (value > max_value)
+			max_value = value;
+	}
+
+	printf("           syscall write() stats\n");
+	printf("     byte_size       : count     distribution\n");
+	for (i = 1; i <= max_ind + 1; i++) {
+		stars(starstr, data[i - 1], max_value, MAX_STARS);
+		printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
+		       (1l << i) >> 1, (1l << i) - 1, data[i - 1],
+		       MAX_STARS, starstr);
+	}
+}
+static void int_exit(int sig)
+{
+	print_hist(map_fd[1]);
+	exit(0);
+}
+
+int main(int ac, char **argv)
+{
+	char filename[256];
+	long key, next_key, value;
+	FILE *f;
+	int i;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	signal(SIGINT, int_exit);
+
+	/* start 'ping' in the background to have some kfree_skb events */
+	f = popen("ping -c5 localhost", "r");
+	(void) f;
+
+	/* start 'dd' in the background to have plenty of 'write' syscalls */
+	f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
+	(void) f;
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	for (i = 0; i < 5; i++) {
+		key = 0;
+		while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+			bpf_lookup_elem(map_fd[0], &next_key, &value);
+			printf("location 0x%lx count %ld\n", next_key, value);
+			key = next_key;
+		}
+		if (key)
+			printf("\n");
+		sleep(1);
+	}
+	print_hist(map_fd[1]);
+
+	return 0;
+}
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
new file mode 100644
index 0000000..255ff27
--- /dev/null
+++ b/samples/bpf/tracex3_kern.c
@@ -0,0 +1,89 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(long),
+	.value_size = sizeof(u64),
+	.max_entries = 4096,
+};
+
+/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
+ * example will no longer be meaningful
+ */
+SEC("kprobe/blk_mq_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	long rq = ctx->di;
+	u64 val = bpf_ktime_get_ns();
+
+	bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
+	return 0;
+}
+
+static unsigned int log2l(unsigned long long n)
+{
+#define S(k) if (n >= (1ull << k)) { i += k; n >>= k; }
+	int i = -(n == 0);
+	S(32); S(16); S(8); S(4); S(2); S(1);
+	return i;
+#undef S
+}
+
+#define SLOTS 100
+
+struct bpf_map_def SEC("maps") lat_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = SLOTS,
+};
+
+SEC("kprobe/blk_update_request")
+int bpf_prog2(struct pt_regs *ctx)
+{
+	long rq = ctx->di;
+	u64 *value, l, base;
+	u32 index;
+
+	value = bpf_map_lookup_elem(&my_map, &rq);
+	if (!value)
+		return 0;
+
+	u64 cur_time = bpf_ktime_get_ns();
+	u64 delta = cur_time - *value;
+
+	bpf_map_delete_elem(&my_map, &rq);
+
+	/* the lines below are computing index = log10(delta)*10
+	 * using integer arithmetic
+	 * index = 29 ~ 1 usec
+	 * index = 59 ~ 1 msec
+	 * index = 89 ~ 1 sec
+	 * index = 99 ~ 10sec or more
+	 * log10(x)*10 = log2(x)*10/log2(10) = log2(x)*3
+	 */
+	l = log2l(delta);
+	base = 1ll << l;
+	index = (l * 64 + (delta - base) * 64 / base) * 3 / 64;
+
+	if (index >= SLOTS)
+		index = SLOTS - 1;
+
+	value = bpf_map_lookup_elem(&lat_map, &index);
+	if (value)
+		__sync_fetch_and_add((long *)value, 1);
+
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
new file mode 100644
index 0000000..0aaa933
--- /dev/null
+++ b/samples/bpf/tracex3_user.c
@@ -0,0 +1,150 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+#define SLOTS 100
+
+static void clear_stats(int fd)
+{
+	__u32 key;
+	__u64 value = 0;
+
+	for (key = 0; key < SLOTS; key++)
+		bpf_update_elem(fd, &key, &value, BPF_ANY);
+}
+
+const char *color[] = {
+	"\033[48;5;255m",
+	"\033[48;5;252m",
+	"\033[48;5;250m",
+	"\033[48;5;248m",
+	"\033[48;5;246m",
+	"\033[48;5;244m",
+	"\033[48;5;242m",
+	"\033[48;5;240m",
+	"\033[48;5;238m",
+	"\033[48;5;236m",
+	"\033[48;5;234m",
+	"\033[48;5;232m",
+};
+const int num_colors = ARRAY_SIZE(color);
+
+const char nocolor[] = "\033[00m";
+
+const char *sym[] = {
+	" ",
+	" ",
+	".",
+	".",
+	"*",
+	"*",
+	"o",
+	"o",
+	"O",
+	"O",
+	"#",
+	"#",
+};
+
+bool full_range = false;
+bool text_only = false;
+
+static void print_banner(void)
+{
+	if (full_range)
+		printf("|1ns     |10ns     |100ns    |1us      |10us     |100us"
+		       "    |1ms      |10ms     |100ms    |1s       |10s\n");
+	else
+		printf("|1us      |10us     |100us    |1ms      |10ms     "
+		       "|100ms    |1s       |10s\n");
+}
+
+static void print_hist(int fd)
+{
+	__u32 key;
+	__u64 value;
+	__u64 cnt[SLOTS];
+	__u64 max_cnt = 0;
+	__u64 total_events = 0;
+
+	for (key = 0; key < SLOTS; key++) {
+		value = 0;
+		bpf_lookup_elem(fd, &key, &value);
+		cnt[key] = value;
+		total_events += value;
+		if (value > max_cnt)
+			max_cnt = value;
+	}
+	clear_stats(fd);
+	for (key = full_range ? 0 : 29; key < SLOTS; key++) {
+		int c = num_colors * cnt[key] / (max_cnt + 1);
+
+		if (text_only)
+			printf("%s", sym[c]);
+		else
+			printf("%s %s", color[c], nocolor);
+	}
+	printf(" # %lld\n", total_events);
+}
+
+int main(int ac, char **argv)
+{
+	char filename[256];
+	int i;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	for (i = 1; i < ac; i++) {
+		if (strcmp(argv[i], "-a") == 0) {
+			full_range = true;
+		} else if (strcmp(argv[i], "-t") == 0) {
+			text_only = true;
+		} else if (strcmp(argv[i], "-h") == 0) {
+			printf("Usage:\n"
+			       "  -a display wider latency range\n"
+			       "  -t text only\n");
+			return 1;
+		}
+	}
+
+	printf("  heatmap of IO latency\n");
+	if (text_only)
+		printf("  %s", sym[num_colors - 1]);
+	else
+		printf("  %s %s", color[num_colors - 1], nocolor);
+	printf(" - many events with this latency\n");
+
+	if (text_only)
+		printf("  %s", sym[0]);
+	else
+		printf("  %s %s", color[0], nocolor);
+	printf(" - few events\n");
+
+	for (i = 0; ; i++) {
+		if (i % 20 == 0)
+			print_banner();
+		print_hist(map_fd[1]);
+		sleep(2);
+	}
+
+	return 0;
+}
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c
new file mode 100644
index 0000000..126b805
--- /dev/null
+++ b/samples/bpf/tracex4_kern.c
@@ -0,0 +1,54 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct pair {
+	u64 val;
+	u64 ip;
+};
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(long),
+	.value_size = sizeof(struct pair),
+	.max_entries = 1000000,
+};
+
+/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
+ * example will no longer be meaningful
+ */
+SEC("kprobe/kmem_cache_free")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	long ptr = ctx->si;
+
+	bpf_map_delete_elem(&my_map, &ptr);
+	return 0;
+}
+
+SEC("kretprobe/kmem_cache_alloc_node")
+int bpf_prog2(struct pt_regs *ctx)
+{
+	long ptr = ctx->ax;
+	long ip = 0;
+
+	/* get ip address of kmem_cache_alloc_node() caller */
+	bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip)));
+
+	struct pair v = {
+		.val = bpf_ktime_get_ns(),
+		.ip = ip,
+	};
+
+	bpf_map_update_elem(&my_map, &ptr, &v, BPF_ANY);
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
new file mode 100644
index 0000000..bc4a3bd
--- /dev/null
+++ b/samples/bpf/tracex4_user.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+struct pair {
+	long long val;
+	__u64 ip;
+};
+
+static __u64 time_get_ns(void)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static void print_old_objects(int fd)
+{
+	long long val = time_get_ns();
+	__u64 key, next_key;
+	struct pair v;
+
+	key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
+
+	key = -1;
+	while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+		bpf_lookup_elem(map_fd[0], &next_key, &v);
+		key = next_key;
+		if (val - v.val < 1000000000ll)
+			/* object was allocated more then 1 sec ago */
+			continue;
+		printf("obj 0x%llx is %2lldsec old was allocated at ip %llx\n",
+		       next_key, (val - v.val) / 1000000000ll, v.ip);
+	}
+}
+
+int main(int ac, char **argv)
+{
+	char filename[256];
+	int i;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	for (i = 0; ; i++) {
+		print_old_objects(map_fd[1]);
+		sleep(1);
+	}
+
+	return 0;
+}
diff --git a/samples/hidraw/Makefile b/samples/hidraw/Makefile
index 382eeae..a9ab961 100644
--- a/samples/hidraw/Makefile
+++ b/samples/hidraw/Makefile
@@ -8,3 +8,5 @@
 always := $(hostprogs-y)
 
 HOSTCFLAGS_hid-example.o += -I$(objtree)/usr/include
+
+all: hid-example
diff --git a/samples/hidraw/hid-example.c b/samples/hidraw/hid-example.c
index 512a7e5..92e6c15 100644
--- a/samples/hidraw/hid-example.c
+++ b/samples/hidraw/hid-example.c
@@ -46,10 +46,14 @@
 	char buf[256];
 	struct hidraw_report_descriptor rpt_desc;
 	struct hidraw_devinfo info;
+	char *device = "/dev/hidraw0";
+
+	if (argc > 1)
+		device = argv[1];
 
 	/* Open the Device with non-blocking reads. In real life,
 	   don't use a hard coded path; use libudev instead. */
-	fd = open("/dev/hidraw0", O_RDWR|O_NONBLOCK);
+	fd = open(device, O_RDWR|O_NONBLOCK);
 
 	if (fd < 0) {
 		perror("Unable to open device");
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index a2c8b02..8965d1b 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -22,7 +22,25 @@
  * protection, just like TRACE_INCLUDE_FILE.
  */
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM sample
+#define TRACE_SYSTEM sample-trace
+
+/*
+ * TRACE_SYSTEM is expected to be a C valid variable (alpha-numeric
+ * and underscore), although it may start with numbers. If for some
+ * reason it is not, you need to add the following lines:
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR sample_trace
+/*
+ * But the above is only needed if TRACE_SYSTEM is not alpha-numeric
+ * and underscored. By default, TRACE_SYSTEM_VAR will be equal to
+ * TRACE_SYSTEM. As TRACE_SYSTEM_VAR must be alpha-numeric, if
+ * TRACE_SYSTEM is not, then TRACE_SYSTEM_VAR must be defined with
+ * only alpha-numeric and underscores.
+ *
+ * The TRACE_SYSTEM_VAR is only used internally and not visible to
+ * user space.
+ */
 
 /*
  * Notice that this file is not protected like a normal header.
@@ -180,8 +198,30 @@
 		;
 	return i;
 }
+
+enum {
+	TRACE_SAMPLE_FOO = 2,
+	TRACE_SAMPLE_BAR = 4,
+	TRACE_SAMPLE_ZOO = 8,
+};
 #endif
 
+/*
+ * If enums are used in the TP_printk(), their names will be shown in
+ * format files and not their values. This can cause problems with user
+ * space programs that parse the format files to know how to translate
+ * the raw binary trace output into human readable text.
+ *
+ * To help out user space programs, any enum that is used in the TP_printk()
+ * should be defined by TRACE_DEFINE_ENUM() macro. All that is needed to
+ * be done is to add this macro with the enum within it in the trace
+ * header file, and it will be converted in the output.
+ */
+
+TRACE_DEFINE_ENUM(TRACE_SAMPLE_FOO);
+TRACE_DEFINE_ENUM(TRACE_SAMPLE_BAR);
+TRACE_DEFINE_ENUM(TRACE_SAMPLE_ZOO);
+
 TRACE_EVENT(foo_bar,
 
 	TP_PROTO(const char *foo, int bar, const int *lst,
@@ -206,7 +246,47 @@
 		__assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus());
 	),
 
-	TP_printk("foo %s %d %s %s (%s)", __entry->foo, __entry->bar,
+	TP_printk("foo %s %d %s %s %s %s (%s)", __entry->foo, __entry->bar,
+
+/*
+ * Notice here the use of some helper functions. This includes:
+ *
+ *  __print_symbolic( variable, { value, "string" }, ... ),
+ *
+ *    The variable is tested against each value of the { } pair. If
+ *    the variable matches one of the values, then it will print the
+ *    string in that pair. If non are matched, it returns a string
+ *    version of the number (if __entry->bar == 7 then "7" is returned).
+ */
+		  __print_symbolic(__entry->bar,
+				   { 0, "zero" },
+				   { TRACE_SAMPLE_FOO, "TWO" },
+				   { TRACE_SAMPLE_BAR, "FOUR" },
+				   { TRACE_SAMPLE_ZOO, "EIGHT" },
+				   { 10, "TEN" }
+			  ),
+
+/*
+ *  __print_flags( variable, "delim", { value, "flag" }, ... ),
+ *
+ *    This is similar to __print_symbolic, except that it tests the bits
+ *    of the value. If ((FLAG & variable) == FLAG) then the string is
+ *    printed. If more than one flag matches, then each one that does is
+ *    also printed with delim in between them.
+ *    If not all bits are accounted for, then the not found bits will be
+ *    added in hex format: 0x506 will show BIT2|BIT4|0x500
+ */
+		  __print_flags(__entry->bar, "|",
+				{ 1, "BIT1" },
+				{ 2, "BIT2" },
+				{ 4, "BIT3" },
+				{ 8, "BIT4" }
+			  ),
+/*
+ *  __print_array( array, len, element_size )
+ *
+ *    This prints out the array that is defined by __array in a nice format.
+ */
 		  __print_array(__get_dynamic_array(list),
 				__get_dynamic_array_len(list),
 				sizeof(int)),
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 107db88..dd56bff 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -364,12 +364,12 @@
 	return common_perm(OP_CHOWN, path, AA_MAY_CHOWN, &cond);
 }
 
-static int apparmor_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int apparmor_inode_getattr(const struct path *path)
 {
-	if (!mediated_filesystem(dentry))
+	if (!mediated_filesystem(path->dentry))
 		return 0;
 
-	return common_perm_mnt_dentry(OP_GETATTR, mnt, dentry,
+	return common_perm_mnt_dentry(OP_GETATTR, path->mnt, path->dentry,
 				      AA_MAY_META_READ);
 }
 
diff --git a/security/capability.c b/security/capability.c
index 070dd46..bdf2203 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -225,7 +225,7 @@
 	return 0;
 }
 
-static int cap_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int cap_inode_getattr(const struct path *path)
 {
 	return 0;
 }
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 3478965..25430a3 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -31,30 +31,21 @@
 	key_serial_t ringid)
 {
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	struct iov_iter from;
 	long ret;
 
-	if (!_payload_iov || !ioc)
-		goto no_payload;
+	if (!_payload_iov)
+		ioc = 0;
 
-	ret = compat_rw_copy_check_uvector(WRITE, _payload_iov, ioc,
-					   ARRAY_SIZE(iovstack),
-					   iovstack, &iov);
+	ret = compat_import_iovec(WRITE, _payload_iov, ioc,
+				  ARRAY_SIZE(iovstack), &iov,
+				  &from);
 	if (ret < 0)
-		goto err;
-	if (ret == 0)
-		goto no_payload_free;
+		return ret;
 
-	ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
-err:
-	if (iov != iovstack)
-		kfree(iov);
+	ret = keyctl_instantiate_key_common(id, &from, ringid);
+	kfree(iov);
 	return ret;
-
-no_payload_free:
-	if (iov != iovstack)
-		kfree(iov);
-no_payload:
-	return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
 }
 
 /*
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 200e378..5105c2c 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -243,9 +243,10 @@
 				       unsigned, key_serial_t);
 extern long keyctl_invalidate_key(key_serial_t);
 
+struct iov_iter;
 extern long keyctl_instantiate_key_common(key_serial_t,
-					  const struct iovec *,
-					  unsigned, size_t, key_serial_t);
+					  struct iov_iter *,
+					  key_serial_t);
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 extern long keyctl_get_persistent(uid_t, key_serial_t);
 extern unsigned persistent_keyring_expiry;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 4743d71..0b9ec78 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -998,21 +998,6 @@
 }
 
 /*
- * Copy the iovec data from userspace
- */
-static long copy_from_user_iovec(void *buffer, const struct iovec *iov,
-				 unsigned ioc)
-{
-	for (; ioc > 0; ioc--) {
-		if (copy_from_user(buffer, iov->iov_base, iov->iov_len) != 0)
-			return -EFAULT;
-		buffer += iov->iov_len;
-		iov++;
-	}
-	return 0;
-}
-
-/*
  * Instantiate a key with the specified payload and link the key into the
  * destination keyring if one is given.
  *
@@ -1022,20 +1007,21 @@
  * If successful, 0 will be returned.
  */
 long keyctl_instantiate_key_common(key_serial_t id,
-				   const struct iovec *payload_iov,
-				   unsigned ioc,
-				   size_t plen,
+				   struct iov_iter *from,
 				   key_serial_t ringid)
 {
 	const struct cred *cred = current_cred();
 	struct request_key_auth *rka;
 	struct key *instkey, *dest_keyring;
+	size_t plen = from ? iov_iter_count(from) : 0;
 	void *payload;
 	long ret;
-	bool vm = false;
 
 	kenter("%d,,%zu,%d", id, plen, ringid);
 
+	if (!plen)
+		from = NULL;
+
 	ret = -EINVAL;
 	if (plen > 1024 * 1024 - 1)
 		goto error;
@@ -1054,20 +1040,19 @@
 	/* pull the payload in if one was supplied */
 	payload = NULL;
 
-	if (payload_iov) {
+	if (from) {
 		ret = -ENOMEM;
 		payload = kmalloc(plen, GFP_KERNEL);
 		if (!payload) {
 			if (plen <= PAGE_SIZE)
 				goto error;
-			vm = true;
 			payload = vmalloc(plen);
 			if (!payload)
 				goto error;
 		}
 
-		ret = copy_from_user_iovec(payload, payload_iov, ioc);
-		if (ret < 0)
+		ret = -EFAULT;
+		if (copy_from_iter(payload, plen, from) != plen)
 			goto error2;
 	}
 
@@ -1089,10 +1074,7 @@
 		keyctl_change_reqkey_auth(NULL);
 
 error2:
-	if (!vm)
-		kfree(payload);
-	else
-		vfree(payload);
+	kvfree(payload);
 error:
 	return ret;
 }
@@ -1112,15 +1094,19 @@
 			    key_serial_t ringid)
 {
 	if (_payload && plen) {
-		struct iovec iov[1] = {
-			[0].iov_base = (void __user *)_payload,
-			[0].iov_len  = plen
-		};
+		struct iovec iov;
+		struct iov_iter from;
+		int ret;
 
-		return keyctl_instantiate_key_common(id, iov, 1, plen, ringid);
+		ret = import_single_range(WRITE, (void __user *)_payload, plen,
+					  &iov, &from);
+		if (unlikely(ret))
+			return ret;
+
+		return keyctl_instantiate_key_common(id, &from, ringid);
 	}
 
-	return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
+	return keyctl_instantiate_key_common(id, NULL, ringid);
 }
 
 /*
@@ -1138,29 +1124,19 @@
 				key_serial_t ringid)
 {
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	struct iov_iter from;
 	long ret;
 
-	if (!_payload_iov || !ioc)
-		goto no_payload;
+	if (!_payload_iov)
+		ioc = 0;
 
-	ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc,
-				    ARRAY_SIZE(iovstack), iovstack, &iov);
+	ret = import_iovec(WRITE, _payload_iov, ioc,
+				    ARRAY_SIZE(iovstack), &iov, &from);
 	if (ret < 0)
-		goto err;
-	if (ret == 0)
-		goto no_payload_free;
-
-	ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
-err:
-	if (iov != iovstack)
-		kfree(iov);
+		return ret;
+	ret = keyctl_instantiate_key_common(id, &from, ringid);
+	kfree(iov);
 	return ret;
-
-no_payload_free:
-	if (iov != iovstack)
-		kfree(iov);
-no_payload:
-	return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
 }
 
 /*
diff --git a/security/security.c b/security/security.c
index e81d5bb..ed890c6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -608,11 +608,11 @@
 }
 EXPORT_SYMBOL_GPL(security_inode_setattr);
 
-int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+int security_inode_getattr(const struct path *path)
 {
-	if (unlikely(IS_PRIVATE(dentry->d_inode)))
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
 		return 0;
-	return security_ops->inode_getattr(mnt, dentry);
+	return security_ops->inode_getattr(path);
 }
 
 int security_inode_setxattr(struct dentry *dentry, const char *name,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4d1a541..e119cdc 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1623,7 +1623,7 @@
    the path to help the auditing code to more easily generate the
    pathname if needed. */
 static inline int path_has_perm(const struct cred *cred,
-				struct path *path,
+				const struct path *path,
 				u32 av)
 {
 	struct inode *inode = path->dentry->d_inode;
@@ -2954,15 +2954,9 @@
 	return dentry_has_perm(cred, dentry, av);
 }
 
-static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int selinux_inode_getattr(const struct path *path)
 {
-	const struct cred *cred = current_cred();
-	struct path path;
-
-	path.dentry = dentry;
-	path.mnt = mnt;
-
-	return path_has_perm(cred, &path, FILE__GETATTR);
+	return path_has_perm(current_cred(), path, FILE__GETATTR);
 }
 
 static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index c934311..1511965 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1034,19 +1034,16 @@
  *
  * Returns 0 if access is permitted, an error code otherwise
  */
-static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int smack_inode_getattr(const struct path *path)
 {
 	struct smk_audit_info ad;
-	struct path path;
+	struct inode *inode = path->dentry->d_inode;
 	int rc;
 
-	path.dentry = dentry;
-	path.mnt = mnt;
-
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
-	smk_ad_setfield_u_fs_path(&ad, path);
-	rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
-	rc = smk_bu_inode(dentry->d_inode, MAY_READ, rc);
+	smk_ad_setfield_u_fs_path(&ad, *path);
+	rc = smk_curacc(smk_of_inode(inode), MAY_READ, &ad);
+	rc = smk_bu_inode(inode, MAY_READ, rc);
 	return rc;
 }
 
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index b897d48..f9c9fb1 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -945,7 +945,7 @@
 char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
 		      va_list args);
 char *tomoyo_read_token(struct tomoyo_acl_param *param);
-char *tomoyo_realpath_from_path(struct path *path);
+char *tomoyo_realpath_from_path(const struct path *path);
 char *tomoyo_realpath_nofollow(const char *pathname);
 const char *tomoyo_get_exe(void);
 const char *tomoyo_yesno(const unsigned int value);
@@ -978,7 +978,7 @@
 		      struct path *path2);
 int tomoyo_path_number_perm(const u8 operation, struct path *path,
 			    unsigned long number);
-int tomoyo_path_perm(const u8 operation, struct path *path,
+int tomoyo_path_perm(const u8 operation, const struct path *path,
 		     const char *target);
 unsigned int tomoyo_poll_control(struct file *file, poll_table *wait);
 unsigned int tomoyo_poll_log(struct file *file, poll_table *wait);
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index c151a18..2367b10 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -145,7 +145,7 @@
  *
  * Returns true on success, false otherwise.
  */
-static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, struct path *path)
+static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, const struct path *path)
 {
 	buf->name = tomoyo_realpath_from_path(path);
 	if (buf->name) {
@@ -782,7 +782,7 @@
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_path_perm(const u8 operation, struct path *path, const char *target)
+int tomoyo_path_perm(const u8 operation, const struct path *path, const char *target)
 {
 	struct tomoyo_request_info r;
 	struct tomoyo_obj_info obj = {
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index bed745c..1e0d480 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -89,7 +89,7 @@
  *
  * If dentry is a directory, trailing '/' is appended.
  */
-static char *tomoyo_get_absolute_path(struct path *path, char * const buffer,
+static char *tomoyo_get_absolute_path(const struct path *path, char * const buffer,
 				      const int buflen)
 {
 	char *pos = ERR_PTR(-ENOMEM);
@@ -216,7 +216,7 @@
  *
  * Returns the buffer.
  */
-static char *tomoyo_get_socket_name(struct path *path, char * const buffer,
+static char *tomoyo_get_socket_name(const struct path *path, char * const buffer,
 				    const int buflen)
 {
 	struct inode *inode = path->dentry->d_inode;
@@ -247,7 +247,7 @@
  * These functions use kzalloc(), so the caller must call kfree()
  * if these functions didn't return NULL.
  */
-char *tomoyo_realpath_from_path(struct path *path)
+char *tomoyo_realpath_from_path(const struct path *path)
 {
 	char *buf = NULL;
 	char *name = NULL;
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index f0b756e..57c88d5 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -144,10 +144,9 @@
  *
  * Returns 0 on success, negative value otherwise.
  */
-static int tomoyo_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+static int tomoyo_inode_getattr(const struct path *path)
 {
-	struct path path = { mnt, dentry };
-	return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, &path, NULL);
+	return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, path, NULL);
 }
 
 /**
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 279e24f..a69ebc7 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/pm_qos.h>
-#include <linux/aio.h>
 #include <linux/io.h>
 #include <linux/dma-mapping.h>
 #include <sound/core.h>
@@ -35,6 +34,7 @@
 #include <sound/pcm_params.h>
 #include <sound/timer.h>
 #include <sound/minors.h>
+#include <linux/uio.h>
 
 /*
  *  Compatibility
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index e5c9908..10f0886 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2755,7 +2755,7 @@
 
 	ret = snd_soc_register_dais(cmpnt, dai_drv, num_dai, true);
 	if (ret < 0) {
-		dev_err(dev, "ASoC: Failed to regster DAIs: %d\n", ret);
+		dev_err(dev, "ASoC: Failed to register DAIs: %d\n", ret);
 		goto err_cleanup;
 	}
 
@@ -3076,7 +3076,7 @@
 
 	ret = snd_soc_register_dais(&codec->component, dai_drv, num_dai, false);
 	if (ret < 0) {
-		dev_err(dev, "ASoC: Failed to regster DAIs: %d\n", ret);
+		dev_err(dev, "ASoC: Failed to register DAIs: %d\n", ret);
 		goto err_cleanup;
 	}
 
diff --git a/tools/build/Build.include b/tools/build/Build.include
new file mode 100644
index 0000000..4c8daac
--- /dev/null
+++ b/tools/build/Build.include
@@ -0,0 +1,81 @@
+###
+# build: Generic definitions
+#
+#  Lots of this code have been borrowed or heavily inspired from parts
+#  of kbuild code, which is not credited, but mostly developed by:
+#
+#  Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015
+#  Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015
+#
+
+###
+# Convenient variables
+comma   := ,
+squote  := '
+
+###
+# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
+dot-target = $(dir $@).$(notdir $@)
+
+###
+# filename of target with directory and extension stripped
+basetarget = $(basename $(notdir $@))
+
+###
+# The temporary file to save gcc -MD generated dependencies must not
+# contain a comma
+depfile = $(subst $(comma),_,$(dot-target).d)
+
+###
+# Check if both arguments has same arguments. Result is empty string if equal.
+arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \
+                    $(filter-out $(cmd_$@),   $(cmd_$(1))) )
+
+###
+# Escape single quote for use in echo statements
+escsq = $(subst $(squote),'\$(squote)',$1)
+
+# Echo command
+# Short version is used, if $(quiet) equals `quiet_', otherwise full one.
+echo-cmd = $(if $($(quiet)cmd_$(1)),\
+           echo '  $(call escsq,$($(quiet)cmd_$(1)))';)
+
+###
+# Replace >$< with >$$< to preserve $ when reloading the .cmd file
+# (needed for make)
+# Replace >#< with >\#< to avoid starting a comment in the .cmd file
+# (needed for make)
+# Replace >'< with >'\''< to be able to enclose the whole string in '...'
+# (needed for the shell)
+make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1)))))
+
+###
+# Find any prerequisites that is newer than target or that does not exist.
+# PHONY targets skipped in both cases.
+any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^)
+
+###
+# if_changed_dep  - execute command if any prerequisite is newer than
+#                   target, or command line has changed and update
+#                   dependencies in the cmd file
+if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)),         \
+	@set -e;                                                   \
+	$(echo-cmd) $(cmd_$(1));                                   \
+	cat $(depfile) > $(dot-target).cmd;                        \
+	printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd)
+
+# if_changed      - execute command if any prerequisite is newer than
+#                   target, or command line has changed
+if_changed = $(if $(strip $(any-prereq) $(arg-check)),             \
+	@set -e;                                                   \
+	$(echo-cmd) $(cmd_$(1));                                   \
+	printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd)
+
+###
+# C flags to be used in rule definitions, includes:
+# - depfile generation
+# - global $(CFLAGS)
+# - per target C flags
+# - per object C flags
+# - BUILD_STR macro to allow '-D"$(variable)"' constructs
+c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt
new file mode 100644
index 0000000..00ad2d6
--- /dev/null
+++ b/tools/build/Documentation/Build.txt
@@ -0,0 +1,139 @@
+Build Framework
+===============
+
+The perf build framework was adopted from the kernel build system, hence the
+idea and the way how objects are built is the same.
+
+Basically the user provides set of 'Build' files that list objects and
+directories to nest for specific target to be build.
+
+Unlike the kernel we don't have a single build object 'obj-y' list that where
+we setup source objects, but we support more. This allows one 'Build' file to
+carry a sources list for multiple build objects.
+
+a) Build framework makefiles
+----------------------------
+
+The build framework consists of 2 Makefiles:
+
+  Build.include
+  Makefile.build
+
+While the 'Build.include' file contains just some generic definitions, the
+'Makefile.build' file is the makefile used from the outside. It's
+interface/usage is following:
+
+  $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT)
+
+where:
+
+  KSRC   - is the path to kernel sources
+  DIR    - is the path to the project to be built
+  OBJECT - is the name of the build object
+
+When succefully finished the $(DIR) directory contains the final object file
+called $(OBJECT)-in.o:
+
+  $ ls $(DIR)/$(OBJECT)-in.o
+
+which includes all compiled sources described in 'Build' makefiles.
+
+a) Build makefiles
+------------------
+
+The user supplies 'Build' makefiles that contains a objects list, and connects
+the build to nested directories.
+
+Assume we have the following project structure:
+
+  ex/a.c
+    /b.c
+    /c.c
+    /d.c
+    /arch/e.c
+    /arch/f.c
+
+Out of which you build the 'ex' binary ' and the 'libex.a' library:
+
+  'ex'      - consists of 'a.o', 'b.o' and libex.a
+  'libex.a' - consists of 'c.o', 'd.o', 'e.o' and 'f.o'
+
+The build framework does not create the 'ex' and 'libex.a' binaries for you, it
+only prepares proper objects to be compiled and grouped together.
+
+To follow the above example, the user provides following 'Build' files:
+
+  ex/Build:
+    ex-y += a.o
+    ex-y += b.o
+
+    libex-y += c.o
+    libex-y += d.o
+    libex-y += arch/
+
+  ex/arch/Build:
+    libex-y += e.o
+    libex-y += f.o
+
+and runs:
+
+  $ make -f tools/build/Makefile.build dir=. obj=ex
+  $ make -f tools/build/Makefile.build dir=. obj=libex
+
+which creates the following objects:
+
+  ex/ex-in.o
+  ex/libex-in.o
+
+that contain request objects names in Build files.
+
+It's only a matter of 2 single commands to create the final binaries:
+
+  $ ar  rcs libex.a libex-in.o
+  $ gcc -o ex ex-in.o libex.a
+
+You can check the 'ex' example in 'tools/build/tests/ex' for more details.
+
+b) Rules
+--------
+
+The build framework provides standard compilation rules to handle .S and .c
+compilation.
+
+It's possible to include special rule if needed (like we do for flex or bison
+code generation).
+
+c) CFLAGS
+---------
+
+It's possible to alter the standard object C flags in the following way:
+
+  CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object
+  CFLAGS_gtk += '...'    - alters CFLAGS for gtk build object
+
+This C flags changes has the scope of the Build makefile they are defined in.
+
+
+d) Dependencies
+---------------
+
+For each built object file 'a.o' the '.a.cmd' is created and holds:
+
+  - Command line used to built that object
+    (for each object)
+
+  - Dependency rules generated by 'gcc -Wp,-MD,...'
+    (for compiled object)
+
+All existing '.cmd' files are included in the Build process to follow properly
+the dependencies and trigger a rebuild when necessary.
+
+
+e) Single rules
+---------------
+
+It's possible to build single object file by choice, like:
+
+  $ make util/map.o    # objects
+  $ make util/map.i    # preprocessor
+  $ make util/map.s    # assembly
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
new file mode 100644
index 0000000..10df572
--- /dev/null
+++ b/tools/build/Makefile.build
@@ -0,0 +1,130 @@
+###
+# Main build makefile.
+#
+#  Lots of this code have been borrowed or heavily inspired from parts
+#  of kbuild code, which is not credited, but mostly developed by:
+#
+#  Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015
+#  Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015
+#
+
+PHONY := __build
+__build:
+
+ifeq ($(V),1)
+  quiet =
+  Q =
+else
+  quiet=quiet_
+  Q=@
+endif
+
+build-dir := $(srctree)/tools/build
+
+# Generic definitions
+include $(build-dir)/Build.include
+
+# do not force detected configuration
+-include .config-detected
+
+# Init all relevant variables used in build files so
+# 1) they have correct type
+# 2) they do not inherit any value from the environment
+subdir-y     :=
+obj-y        :=
+subdir-y     :=
+subdir-obj-y :=
+
+# Build definitions
+build-file := $(dir)/Build
+include $(build-file)
+
+quiet_cmd_flex  = FLEX     $@
+quiet_cmd_bison = BISON    $@
+
+# Create directory unless it exists
+quiet_cmd_mkdir = MKDIR    $(dir $@)
+      cmd_mkdir = mkdir -p $(dir $@)
+     rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir))
+
+# Compile command
+quiet_cmd_cc_o_c = CC       $@
+      cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
+
+quiet_cmd_cc_i_c = CPP      $@
+      cmd_cc_i_c = $(CC) $(c_flags) -E -o $@ $<
+
+quiet_cmd_cc_s_c = AS       $@
+      cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
+
+# Link agregate command
+# If there's nothing to link, create empty $@ object.
+quiet_cmd_ld_multi = LD       $@
+      cmd_ld_multi = $(if $(strip $(obj-y)),\
+		       $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@)
+
+# Build rules
+$(OUTPUT)%.o: %.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)%.o: %.S FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)%.i: %.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_i_c)
+
+$(OUTPUT)%.i: %.S FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_i_c)
+
+$(OUTPUT)%.s: %.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_s_c)
+
+# Gather build data:
+#   obj-y        - list of build objects
+#   subdir-y     - list of directories to nest
+#   subdir-obj-y - list of directories objects 'dir/$(obj)-in.o'
+obj-y        := $($(obj)-y)
+subdir-y     := $(patsubst %/,%,$(filter %/, $(obj-y)))
+obj-y        := $(patsubst %/, %/$(obj)-in.o, $(obj-y))
+subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y))
+
+# '$(OUTPUT)/dir' prefix to all objects
+prefix       := $(subst ./,,$(OUTPUT)$(dir)/)
+obj-y        := $(addprefix $(prefix),$(obj-y))
+subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y))
+
+# Final '$(obj)-in.o' object
+in-target := $(prefix)$(obj)-in.o
+
+PHONY += $(subdir-y)
+
+$(subdir-y):
+	$(Q)$(MAKE) -f $(build-dir)/Makefile.build dir=$(dir)/$@ obj=$(obj)
+
+$(sort $(subdir-obj-y)): $(subdir-y) ;
+
+$(in-target): $(obj-y) FORCE
+	$(call rule_mkdir)
+	$(call if_changed,ld_multi)
+
+__build: $(in-target)
+	@:
+
+PHONY += FORCE
+FORCE:
+
+# Include all cmd files to get all the dependency rules
+# for all objects included
+targets   := $(wildcard $(sort $(obj-y) $(in-target) $(MAKECMDGOALS)))
+cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
+
+ifneq ($(cmd_files),)
+  include $(cmd_files)
+endif
+
+.PHONY: $(PHONY)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
new file mode 100644
index 0000000..3a0b0ca
--- /dev/null
+++ b/tools/build/Makefile.feature
@@ -0,0 +1,171 @@
+feature_dir := $(srctree)/tools/build/feature
+
+ifneq ($(OUTPUT),)
+  OUTPUT_FEATURES = $(OUTPUT)feature/
+  $(shell mkdir -p $(OUTPUT_FEATURES))
+endif
+
+feature_check = $(eval $(feature_check_code))
+define feature_check_code
+  feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+endef
+
+feature_set = $(eval $(feature_set_code))
+define feature_set_code
+  feature-$(1) := 1
+endef
+
+#
+# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output:
+#
+
+#
+# Note that this is not a complete list of all feature tests, just
+# those that are typically built on a fully configured system.
+#
+# [ Feature tests not mentioned here have to be built explicitly in
+#   the rule that uses them - an example for that is the 'bionic'
+#   feature check. ]
+#
+FEATURE_TESTS =			\
+	backtrace			\
+	dwarf				\
+	fortify-source			\
+	sync-compare-and-swap		\
+	glibc				\
+	gtk2				\
+	gtk2-infobar			\
+	libaudit			\
+	libbfd				\
+	libelf				\
+	libelf-getphdrnum		\
+	libelf-mmap			\
+	libnuma				\
+	libperl				\
+	libpython			\
+	libpython-version		\
+	libslang			\
+	libunwind			\
+	pthread-attr-setaffinity-np	\
+	stackprotector-all		\
+	timerfd				\
+	libdw-dwarf-unwind		\
+	zlib				\
+	lzma
+
+FEATURE_DISPLAY =			\
+	dwarf				\
+	glibc				\
+	gtk2				\
+	libaudit			\
+	libbfd				\
+	libelf				\
+	libnuma				\
+	libperl				\
+	libpython			\
+	libslang			\
+	libunwind			\
+	libdw-dwarf-unwind		\
+	zlib				\
+	lzma
+
+# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
+# If in the future we need per-feature checks/flags for features not
+# mentioned in this list we need to refactor this ;-).
+set_test_all_flags = $(eval $(set_test_all_flags_code))
+define set_test_all_flags_code
+  FEATURE_CHECK_CFLAGS-all  += $(FEATURE_CHECK_CFLAGS-$(1))
+  FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1))
+endef
+
+$(foreach feat,$(FEATURE_TESTS),$(call set_test_all_flags,$(feat)))
+
+#
+# Special fast-path for the 'all features are available' case:
+#
+$(call feature_check,all,$(MSG))
+
+#
+# Just in case the build freshly failed, make sure we print the
+# feature matrix:
+#
+ifeq ($(feature-all), 1)
+  #
+  # test-all.c passed - just set all the core feature flags to 1:
+  #
+  $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
+else
+  $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C $(feature_dir) $(addsuffix .bin,$(FEATURE_TESTS)) >/dev/null 2>&1)
+  $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
+endif
+
+#
+# Print the result of the feature test:
+#
+feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG))
+
+define feature_print_status_code
+  ifeq ($(feature-$(1)), 1)
+    MSG = $(shell printf '...%30s: [ \033[32mon\033[m  ]' $(1))
+  else
+    MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1))
+  endif
+endef
+
+feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG))
+define feature_print_text_code
+    MSG = $(shell printf '...%30s: %s' $(1) $(2))
+endef
+
+FEATURE_DUMP := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat))))
+FEATURE_DUMP_FILE := $(shell touch $(OUTPUT)FEATURE-DUMP; cat $(OUTPUT)FEATURE-DUMP)
+
+ifeq ($(dwarf-post-unwind),1)
+  FEATURE_DUMP += dwarf-post-unwind($(dwarf-post-unwind-text))
+endif
+
+# The $(feature_display) controls the default detection message
+# output. It's set if:
+# - detected features differes from stored features from
+#   last build (in FEATURE-DUMP file)
+# - one of the $(FEATURE_DISPLAY) is not detected
+# - VF is enabled
+
+ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)")
+  $(shell echo "$(FEATURE_DUMP)" > $(OUTPUT)FEATURE-DUMP)
+  feature_display := 1
+endif
+
+feature_display_check = $(eval $(feature_check_code))
+define feature_display_check_code
+  ifneq ($(feature-$(1)), 1)
+    feature_display := 1
+  endif
+endef
+
+$(foreach feat,$(FEATURE_DISPLAY),$(call feature_display_check,$(feat)))
+
+ifeq ($(VF),1)
+  feature_display := 1
+  feature_verbose := 1
+endif
+
+ifeq ($(feature_display),1)
+  $(info )
+  $(info Auto-detecting system features:)
+  $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),))
+
+  ifeq ($(dwarf-post-unwind),1)
+    $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
+  endif
+
+  ifneq ($(feature_verbose),1)
+    $(info )
+  endif
+endif
+
+ifeq ($(feature_verbose),1)
+  TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS))
+  $(foreach feat,$(TMP),$(call feature_print_status,$(feat),))
+  $(info )
+endif
diff --git a/tools/build/feature/.gitignore b/tools/build/feature/.gitignore
new file mode 100644
index 0000000..09b335b
--- /dev/null
+++ b/tools/build/feature/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.bin
+*.output
diff --git a/tools/perf/config/feature-checks/Makefile b/tools/build/feature/Makefile
similarity index 72%
rename from tools/perf/config/feature-checks/Makefile
rename to tools/build/feature/Makefile
index b32ff33..463ed8f 100644
--- a/tools/perf/config/feature-checks/Makefile
+++ b/tools/build/feature/Makefile
@@ -29,33 +29,36 @@
 	test-stackprotector-all.bin	\
 	test-timerfd.bin		\
 	test-libdw-dwarf-unwind.bin	\
+	test-libbabeltrace.bin		\
 	test-compile-32.bin		\
 	test-compile-x32.bin		\
-	test-zlib.bin
+	test-zlib.bin			\
+	test-lzma.bin
 
 CC := $(CROSS_COMPILE)gcc -MD
 PKG_CONFIG := $(CROSS_COMPILE)pkg-config
 
 all: $(FILES)
 
-BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS)
+__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS)
+  BUILD = $(__BUILD) > $(OUTPUT)$(@:.bin=.make.output) 2>&1
 
 ###############################
 
 test-all.bin:
-	$(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz
+	$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
 
 test-hello.bin:
 	$(BUILD)
 
 test-pthread-attr-setaffinity-np.bin:
-	$(BUILD) -D_GNU_SOURCE -Werror -lpthread
+	$(BUILD) -D_GNU_SOURCE -lpthread
 
 test-stackprotector-all.bin:
-	$(BUILD) -Werror -fstack-protector-all
+	$(BUILD) -fstack-protector-all
 
 test-fortify-source.bin:
-	$(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2
+	$(BUILD) -O2 -D_FORTIFY_SOURCE=2
 
 test-bionic.bin:
 	$(BUILD)
@@ -118,10 +121,10 @@
 	$(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
 
 test-liberty.bin:
-	$(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty
+	$(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty
 
 test-liberty-z.bin:
-	$(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz
+	$(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz
 
 test-cplus-demangle.bin:
 	$(BUILD) -liberty
@@ -133,10 +136,13 @@
 	$(BUILD)
 
 test-libdw-dwarf-unwind.bin:
-	$(BUILD)
+	$(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind)
+
+test-libbabeltrace.bin:
+	$(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace)
 
 test-sync-compare-and-swap.bin:
-	$(BUILD) -Werror
+	$(BUILD)
 
 test-compile-32.bin:
 	$(CC) -m32 -o $(OUTPUT)$@ test-compile.c
@@ -147,9 +153,12 @@
 test-zlib.bin:
 	$(BUILD) -lz
 
+test-lzma.bin:
+	$(BUILD) -llzma
+
 -include *.d
 
 ###############################
 
 clean:
-	rm -f $(FILES) *.d
+	rm -f $(FILES) *.d $(FILES:.bin=.make.output)
diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/build/feature/test-all.c
similarity index 87%
rename from tools/perf/config/feature-checks/test-all.c
rename to tools/build/feature/test-all.c
index 6d4d093..84689a6 100644
--- a/tools/perf/config/feature-checks/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -98,7 +98,23 @@
 #undef main
 
 #define main main_test_pthread_attr_setaffinity_np
-# include "test-pthread_attr_setaffinity_np.c"
+# include "test-pthread-attr-setaffinity-np.c"
+#undef main
+
+# if 0
+/*
+ * Disable libbabeltrace check for test-all, because the requested
+ * library version is not released yet in most distributions. Will
+ * reenable later.
+ */
+
+#define main main_test_libbabeltrace
+# include "test-libbabeltrace.c"
+#undef main
+#endif
+
+#define main main_test_lzma
+# include "test-lzma.c"
 #undef main
 
 int main(int argc, char *argv[])
@@ -126,6 +142,7 @@
 	main_test_sync_compare_and_swap(argc, argv);
 	main_test_zlib();
 	main_test_pthread_attr_setaffinity_np();
+	main_test_lzma();
 
 	return 0;
 }
diff --git a/tools/perf/config/feature-checks/test-backtrace.c b/tools/build/feature/test-backtrace.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-backtrace.c
rename to tools/build/feature/test-backtrace.c
diff --git a/tools/perf/config/feature-checks/test-bionic.c b/tools/build/feature/test-bionic.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-bionic.c
rename to tools/build/feature/test-bionic.c
diff --git a/tools/perf/config/feature-checks/test-compile.c b/tools/build/feature/test-compile.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-compile.c
rename to tools/build/feature/test-compile.c
diff --git a/tools/perf/config/feature-checks/test-cplus-demangle.c b/tools/build/feature/test-cplus-demangle.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-cplus-demangle.c
rename to tools/build/feature/test-cplus-demangle.c
diff --git a/tools/perf/config/feature-checks/test-dwarf.c b/tools/build/feature/test-dwarf.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-dwarf.c
rename to tools/build/feature/test-dwarf.c
diff --git a/tools/perf/config/feature-checks/test-fortify-source.c b/tools/build/feature/test-fortify-source.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-fortify-source.c
rename to tools/build/feature/test-fortify-source.c
diff --git a/tools/perf/config/feature-checks/test-glibc.c b/tools/build/feature/test-glibc.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-glibc.c
rename to tools/build/feature/test-glibc.c
diff --git a/tools/perf/config/feature-checks/test-gtk2-infobar.c b/tools/build/feature/test-gtk2-infobar.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-gtk2-infobar.c
rename to tools/build/feature/test-gtk2-infobar.c
diff --git a/tools/perf/config/feature-checks/test-gtk2.c b/tools/build/feature/test-gtk2.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-gtk2.c
rename to tools/build/feature/test-gtk2.c
diff --git a/tools/perf/config/feature-checks/test-hello.c b/tools/build/feature/test-hello.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-hello.c
rename to tools/build/feature/test-hello.c
diff --git a/tools/perf/config/feature-checks/test-libaudit.c b/tools/build/feature/test-libaudit.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libaudit.c
rename to tools/build/feature/test-libaudit.c
diff --git a/tools/build/feature/test-libbabeltrace.c b/tools/build/feature/test-libbabeltrace.c
new file mode 100644
index 0000000..9cf802a
--- /dev/null
+++ b/tools/build/feature/test-libbabeltrace.c
@@ -0,0 +1,9 @@
+
+#include <babeltrace/ctf-writer/writer.h>
+#include <babeltrace/ctf-ir/stream-class.h>
+
+int main(void)
+{
+	bt_ctf_stream_class_get_packet_context_type((void *) 0);
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-libbfd.c b/tools/build/feature/test-libbfd.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libbfd.c
rename to tools/build/feature/test-libbfd.c
diff --git a/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c b/tools/build/feature/test-libdw-dwarf-unwind.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c
rename to tools/build/feature/test-libdw-dwarf-unwind.c
diff --git a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c b/tools/build/feature/test-libelf-getphdrnum.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libelf-getphdrnum.c
rename to tools/build/feature/test-libelf-getphdrnum.c
diff --git a/tools/perf/config/feature-checks/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libelf-mmap.c
rename to tools/build/feature/test-libelf-mmap.c
diff --git a/tools/perf/config/feature-checks/test-libelf.c b/tools/build/feature/test-libelf.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libelf.c
rename to tools/build/feature/test-libelf.c
diff --git a/tools/perf/config/feature-checks/test-libnuma.c b/tools/build/feature/test-libnuma.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libnuma.c
rename to tools/build/feature/test-libnuma.c
diff --git a/tools/perf/config/feature-checks/test-libperl.c b/tools/build/feature/test-libperl.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libperl.c
rename to tools/build/feature/test-libperl.c
diff --git a/tools/perf/config/feature-checks/test-libpython-version.c b/tools/build/feature/test-libpython-version.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libpython-version.c
rename to tools/build/feature/test-libpython-version.c
diff --git a/tools/perf/config/feature-checks/test-libpython.c b/tools/build/feature/test-libpython.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libpython.c
rename to tools/build/feature/test-libpython.c
diff --git a/tools/perf/config/feature-checks/test-libslang.c b/tools/build/feature/test-libslang.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libslang.c
rename to tools/build/feature/test-libslang.c
diff --git a/tools/perf/config/feature-checks/test-libunwind-debug-frame.c b/tools/build/feature/test-libunwind-debug-frame.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libunwind-debug-frame.c
rename to tools/build/feature/test-libunwind-debug-frame.c
diff --git a/tools/perf/config/feature-checks/test-libunwind.c b/tools/build/feature/test-libunwind.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-libunwind.c
rename to tools/build/feature/test-libunwind.c
diff --git a/tools/build/feature/test-lzma.c b/tools/build/feature/test-lzma.c
new file mode 100644
index 0000000..95adc8c
--- /dev/null
+++ b/tools/build/feature/test-lzma.c
@@ -0,0 +1,10 @@
+#include <lzma.h>
+
+int main(void)
+{
+	lzma_stream strm = LZMA_STREAM_INIT;
+	int ret;
+
+	ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
+	return ret ? -1 : 0;
+}
diff --git a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c b/tools/build/feature/test-pthread-attr-setaffinity-np.c
similarity index 77%
rename from tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c
rename to tools/build/feature/test-pthread-attr-setaffinity-np.c
index 2b81b72..fdada5e 100644
--- a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c
+++ b/tools/build/feature/test-pthread-attr-setaffinity-np.c
@@ -1,5 +1,6 @@
 #include <stdint.h>
 #include <pthread.h>
+#include <sched.h>
 
 int main(void)
 {
@@ -8,7 +9,8 @@
 	cpu_set_t cs;
 
 	pthread_attr_init(&thread_attr);
-	/* don't care abt exact args, just the API itself in libpthread */
+	CPU_ZERO(&cs);
+
 	ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cs), &cs);
 
 	return ret;
diff --git a/tools/perf/config/feature-checks/test-stackprotector-all.c b/tools/build/feature/test-stackprotector-all.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-stackprotector-all.c
rename to tools/build/feature/test-stackprotector-all.c
diff --git a/tools/perf/config/feature-checks/test-sync-compare-and-swap.c b/tools/build/feature/test-sync-compare-and-swap.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-sync-compare-and-swap.c
rename to tools/build/feature/test-sync-compare-and-swap.c
diff --git a/tools/perf/config/feature-checks/test-timerfd.c b/tools/build/feature/test-timerfd.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-timerfd.c
rename to tools/build/feature/test-timerfd.c
diff --git a/tools/perf/config/feature-checks/test-zlib.c b/tools/build/feature/test-zlib.c
similarity index 100%
rename from tools/perf/config/feature-checks/test-zlib.c
rename to tools/build/feature/test-zlib.c
diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build
new file mode 100644
index 0000000..0e6c3e6
--- /dev/null
+++ b/tools/build/tests/ex/Build
@@ -0,0 +1,8 @@
+ex-y += ex.o
+ex-y += a.o
+ex-y += b.o
+ex-y += empty/
+
+libex-y += c.o
+libex-y += d.o
+libex-y += arch/
diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile
new file mode 100644
index 0000000..52d2476
--- /dev/null
+++ b/tools/build/tests/ex/Makefile
@@ -0,0 +1,23 @@
+export srctree := ../../../..
+export CC      := gcc
+export LD      := ld
+export AR      := ar
+
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+ex: ex-in.o libex-in.o
+	gcc -o $@ $^
+
+ex.%: FORCE
+	make -f $(srctree)/tools/build/Makefile.build dir=. $@
+
+ex-in.o: FORCE
+	make $(build)=ex
+
+libex-in.o: FORCE
+	make $(build)=libex
+
+clean:
+	find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+	rm -f ex ex.i ex.s
+
+.PHONY: FORCE
diff --git a/tools/build/tests/ex/a.c b/tools/build/tests/ex/a.c
new file mode 100644
index 0000000..8517627
--- /dev/null
+++ b/tools/build/tests/ex/a.c
@@ -0,0 +1,5 @@
+
+int a(void)
+{
+	return 0;
+}
diff --git a/tools/build/tests/ex/arch/Build b/tools/build/tests/ex/arch/Build
new file mode 100644
index 0000000..5550618
--- /dev/null
+++ b/tools/build/tests/ex/arch/Build
@@ -0,0 +1,2 @@
+libex-y += e.o
+libex-y += f.o
diff --git a/tools/build/tests/ex/arch/e.c b/tools/build/tests/ex/arch/e.c
new file mode 100644
index 0000000..beaa4a1
--- /dev/null
+++ b/tools/build/tests/ex/arch/e.c
@@ -0,0 +1,5 @@
+
+int e(void)
+{
+	return 0;
+}
diff --git a/tools/build/tests/ex/arch/f.c b/tools/build/tests/ex/arch/f.c
new file mode 100644
index 0000000..7c3e9e9
--- /dev/null
+++ b/tools/build/tests/ex/arch/f.c
@@ -0,0 +1,5 @@
+
+int f(void)
+{
+	return 0;
+}
diff --git a/tools/build/tests/ex/b.c b/tools/build/tests/ex/b.c
new file mode 100644
index 0000000..c24ff9c
--- /dev/null
+++ b/tools/build/tests/ex/b.c
@@ -0,0 +1,5 @@
+
+int b(void)
+{
+	return 0;
+}
diff --git a/tools/build/tests/ex/c.c b/tools/build/tests/ex/c.c
new file mode 100644
index 0000000..e216d02
--- /dev/null
+++ b/tools/build/tests/ex/c.c
@@ -0,0 +1,5 @@
+
+int c(void)
+{
+	return 0;
+}
diff --git a/tools/build/tests/ex/d.c b/tools/build/tests/ex/d.c
new file mode 100644
index 0000000..80dc0f0
--- /dev/null
+++ b/tools/build/tests/ex/d.c
@@ -0,0 +1,5 @@
+
+int d(void)
+{
+	return 0;
+}
diff --git a/tools/build/tests/ex/empty/Build b/tools/build/tests/ex/empty/Build
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tools/build/tests/ex/empty/Build
diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c
new file mode 100644
index 0000000..dc42eb2
--- /dev/null
+++ b/tools/build/tests/ex/ex.c
@@ -0,0 +1,19 @@
+
+int a(void);
+int b(void);
+int c(void);
+int d(void);
+int e(void);
+int f(void);
+
+int main(void)
+{
+	a();
+	b();
+	c();
+	d();
+	e();
+	f();
+
+	return 0;
+}
diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh
new file mode 100755
index 0000000..5494f8e
--- /dev/null
+++ b/tools/build/tests/run.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+function test_ex {
+	make -C ex V=1 clean > ex.out 2>&1
+	make -C ex V=1 >> ex.out 2>&1
+
+	if [ ! -x ./ex/ex ]; then
+	  echo FAILED
+	  exit -1
+	fi
+
+	make -C ex V=1 clean > /dev/null 2>&1
+	rm -f ex.out
+}
+
+function test_ex_suffix {
+	make -C ex V=1 clean > ex.out 2>&1
+
+	# use -rR to disable make's builtin rules
+	make -rR -C ex V=1 ex.o >> ex.out 2>&1
+	make -rR -C ex V=1 ex.i >> ex.out 2>&1
+	make -rR -C ex V=1 ex.s >> ex.out 2>&1
+
+	if [ -x ./ex/ex ]; then
+	  echo FAILED
+	  exit -1
+	fi
+
+	if [ ! -f ./ex/ex.o -o ! -f ./ex/ex.i -o ! -f ./ex/ex.s ]; then
+	  echo FAILED
+	  exit -1
+	fi
+
+	make -C ex V=1 clean > /dev/null 2>&1
+	rm -f ex.out
+}
+echo -n Testing..
+
+test_ex
+test_ex_suffix
+
+echo OK
diff --git a/tools/lib/api/Build b/tools/lib/api/Build
new file mode 100644
index 0000000..3653965
--- /dev/null
+++ b/tools/lib/api/Build
@@ -0,0 +1,2 @@
+libapi-y += fd/
+libapi-y += fs/
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 36c08b1..d8fe29f 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -1,49 +1,43 @@
 include ../../scripts/Makefile.include
 include ../../perf/config/utilities.mak		# QUIET_CLEAN
 
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
 CC = $(CROSS_COMPILE)gcc
 AR = $(CROSS_COMPILE)ar
 
-# guard against environment variables
-LIB_H=
-LIB_OBJS=
+MAKEFLAGS += --no-print-directory
 
-LIB_H += fs/debugfs.h
-LIB_H += fs/fs.h
-# See comment below about piggybacking...
-LIB_H += fd/array.h
+LIBFILE = $(OUTPUT)libapi.a
 
-LIB_OBJS += $(OUTPUT)fs/debugfs.o
-LIB_OBJS += $(OUTPUT)fs/fs.o
-# XXX piggybacking here, need to introduce libapikfd, or rename this
-# to plain libapik.a and make it have it all api goodies
-LIB_OBJS += $(OUTPUT)fd/array.o
-
-LIBFILE = libapikfs.a
-
-CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -fPIC
-EXTLIBS = -lelf -lpthread -lrt -lm
-ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
-ALL_LDFLAGS = $(LDFLAGS)
+CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 
 RM = rm -f
 
-$(LIBFILE): $(LIB_OBJS)
-	$(QUIET_AR)$(RM) $@ && $(AR) rcs $(OUTPUT)$@ $(LIB_OBJS)
+build  := -f $(srctree)/tools/build/Makefile.build dir=. obj
+API_IN := $(OUTPUT)libapi-in.o
 
-$(LIB_OBJS): $(LIB_H)
+export srctree OUTPUT CC LD CFLAGS V
 
-libapi_dirs:
-	$(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs
+all: $(LIBFILE)
 
-$(OUTPUT)%.o: %.c libapi_dirs
-	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
-$(OUTPUT)%.s: %.c libapi_dirs
-	$(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $<
-$(OUTPUT)%.o: %.S libapi_dirs
-	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
+$(API_IN): FORCE
+	@$(MAKE) $(build)=libapi
+
+$(LIBFILE): $(API_IN)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN)
 
 clean:
-	$(call QUIET_CLEAN, libapi) $(RM) $(LIB_OBJS) $(LIBFILE)
+	$(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \
+	find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o | xargs $(RM)
 
-.PHONY: clean
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build
new file mode 100644
index 0000000..605d99f
--- /dev/null
+++ b/tools/lib/api/fd/Build
@@ -0,0 +1 @@
+libapi-y += array.o
diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build
new file mode 100644
index 0000000..6de5a4f
--- /dev/null
+++ b/tools/lib/api/fs/Build
@@ -0,0 +1,4 @@
+libapi-y += fs.o
+libapi-y += debugfs.o
+libapi-y += findfs.o
+libapi-y += tracefs.o
diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c
index d2b18e8..8305b3e 100644
--- a/tools/lib/api/fs/debugfs.c
+++ b/tools/lib/api/fs/debugfs.c
@@ -3,75 +3,50 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 #include <stdbool.h>
 #include <sys/vfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
 #include <sys/mount.h>
 #include <linux/kernel.h>
 
 #include "debugfs.h"
 
-char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug";
+#ifndef DEBUGFS_DEFAULT_PATH
+#define DEBUGFS_DEFAULT_PATH		"/sys/kernel/debug"
+#endif
+
+char debugfs_mountpoint[PATH_MAX + 1] = DEBUGFS_DEFAULT_PATH;
 
 static const char * const debugfs_known_mountpoints[] = {
-	"/sys/kernel/debug",
+	DEBUGFS_DEFAULT_PATH,
 	"/debug",
 	0,
 };
 
 static bool debugfs_found;
 
+bool debugfs_configured(void)
+{
+	return debugfs_find_mountpoint() != NULL;
+}
+
 /* find the path to the mounted debugfs */
 const char *debugfs_find_mountpoint(void)
 {
-	const char * const *ptr;
-	char type[100];
-	FILE *fp;
+	const char *ret;
 
 	if (debugfs_found)
 		return (const char *)debugfs_mountpoint;
 
-	ptr = debugfs_known_mountpoints;
-	while (*ptr) {
-		if (debugfs_valid_mountpoint(*ptr) == 0) {
-			debugfs_found = true;
-			strcpy(debugfs_mountpoint, *ptr);
-			return debugfs_mountpoint;
-		}
-		ptr++;
-	}
+	ret = find_mountpoint("debugfs", (long) DEBUGFS_MAGIC,
+			      debugfs_mountpoint, PATH_MAX + 1,
+			      debugfs_known_mountpoints);
+	if (ret)
+		debugfs_found = true;
 
-	/* give up and parse /proc/mounts */
-	fp = fopen("/proc/mounts", "r");
-	if (fp == NULL)
-		return NULL;
-
-	while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
-		      debugfs_mountpoint, type) == 2) {
-		if (strcmp(type, "debugfs") == 0)
-			break;
-	}
-	fclose(fp);
-
-	if (strcmp(type, "debugfs") != 0)
-		return NULL;
-
-	debugfs_found = true;
-
-	return debugfs_mountpoint;
-}
-
-/* verify that a mountpoint is actually a debugfs instance */
-
-int debugfs_valid_mountpoint(const char *debugfs)
-{
-	struct statfs st_fs;
-
-	if (statfs(debugfs, &st_fs) < 0)
-		return -ENOENT;
-	else if ((long)st_fs.f_type != (long)DEBUGFS_MAGIC)
-		return -ENOENT;
-
-	return 0;
+	return ret;
 }
 
 /* mount the debugfs somewhere if it's not mounted */
@@ -87,7 +62,7 @@
 		mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT);
 		/* if no environment variable, use default */
 		if (mountpoint == NULL)
-			mountpoint = "/sys/kernel/debug";
+			mountpoint = DEBUGFS_DEFAULT_PATH;
 	}
 
 	if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0)
diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h
index 0739881..4550236 100644
--- a/tools/lib/api/fs/debugfs.h
+++ b/tools/lib/api/fs/debugfs.h
@@ -1,16 +1,7 @@
 #ifndef __API_DEBUGFS_H__
 #define __API_DEBUGFS_H__
 
-#define _STR(x) #x
-#define STR(x) _STR(x)
-
-/*
- * On most systems <limits.h> would have given us this, but  not on some systems
- * (e.g. GNU/Hurd).
- */
-#ifndef PATH_MAX
-#define PATH_MAX 4096
-#endif
+#include "findfs.h"
 
 #ifndef DEBUGFS_MAGIC
 #define DEBUGFS_MAGIC          0x64626720
@@ -20,8 +11,8 @@
 #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
 #endif
 
+bool debugfs_configured(void);
 const char *debugfs_find_mountpoint(void);
-int debugfs_valid_mountpoint(const char *debugfs);
 char *debugfs_mount(const char *mountpoint);
 
 extern char debugfs_mountpoint[];
diff --git a/tools/lib/api/fs/findfs.c b/tools/lib/api/fs/findfs.c
new file mode 100644
index 0000000..49946cb
--- /dev/null
+++ b/tools/lib/api/fs/findfs.c
@@ -0,0 +1,63 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <sys/vfs.h>
+
+#include "findfs.h"
+
+/* verify that a mountpoint is actually the type we want */
+
+int valid_mountpoint(const char *mount, long magic)
+{
+	struct statfs st_fs;
+
+	if (statfs(mount, &st_fs) < 0)
+		return -ENOENT;
+	else if ((long)st_fs.f_type != magic)
+		return -ENOENT;
+
+	return 0;
+}
+
+/* find the path to a mounted file system */
+const char *find_mountpoint(const char *fstype, long magic,
+			    char *mountpoint, int len,
+			    const char * const *known_mountpoints)
+{
+	const char * const *ptr;
+	char format[128];
+	char type[100];
+	FILE *fp;
+
+	if (known_mountpoints) {
+		ptr = known_mountpoints;
+		while (*ptr) {
+			if (valid_mountpoint(*ptr, magic) == 0) {
+				strncpy(mountpoint, *ptr, len - 1);
+				mountpoint[len-1] = 0;
+				return mountpoint;
+			}
+			ptr++;
+		}
+	}
+
+	/* give up and parse /proc/mounts */
+	fp = fopen("/proc/mounts", "r");
+	if (fp == NULL)
+		return NULL;
+
+	snprintf(format, 128, "%%*s %%%ds %%99s %%*s %%*d %%*d\n", len);
+
+	while (fscanf(fp, format, mountpoint, type) == 2) {
+		if (strcmp(type, fstype) == 0)
+			break;
+	}
+	fclose(fp);
+
+	if (strcmp(type, fstype) != 0)
+		return NULL;
+
+	return mountpoint;
+}
diff --git a/tools/lib/api/fs/findfs.h b/tools/lib/api/fs/findfs.h
new file mode 100644
index 0000000..b6f5d05
--- /dev/null
+++ b/tools/lib/api/fs/findfs.h
@@ -0,0 +1,23 @@
+#ifndef __API_FINDFS_H__
+#define __API_FINDFS_H__
+
+#include <stdbool.h>
+
+#define _STR(x) #x
+#define STR(x) _STR(x)
+
+/*
+ * On most systems <limits.h> would have given us this, but  not on some systems
+ * (e.g. GNU/Hurd).
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+const char *find_mountpoint(const char *fstype, long magic,
+			    char *mountpoint, int len,
+			    const char * const *known_mountpoints);
+
+int valid_mountpoint(const char *mount, long magic);
+
+#endif /* __API_FINDFS_H__ */
diff --git a/tools/lib/api/fs/tracefs.c b/tools/lib/api/fs/tracefs.c
new file mode 100644
index 0000000..e4aa968
--- /dev/null
+++ b/tools/lib/api/fs/tracefs.c
@@ -0,0 +1,78 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <sys/vfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <linux/kernel.h>
+
+#include "tracefs.h"
+
+#ifndef TRACEFS_DEFAULT_PATH
+#define TRACEFS_DEFAULT_PATH		"/sys/kernel/tracing"
+#endif
+
+char tracefs_mountpoint[PATH_MAX + 1] = TRACEFS_DEFAULT_PATH;
+
+static const char * const tracefs_known_mountpoints[] = {
+	TRACEFS_DEFAULT_PATH,
+	"/sys/kernel/debug/tracing",
+	"/tracing",
+	"/trace",
+	0,
+};
+
+static bool tracefs_found;
+
+bool tracefs_configured(void)
+{
+	return tracefs_find_mountpoint() != NULL;
+}
+
+/* find the path to the mounted tracefs */
+const char *tracefs_find_mountpoint(void)
+{
+	const char *ret;
+
+	if (tracefs_found)
+		return (const char *)tracefs_mountpoint;
+
+	ret = find_mountpoint("tracefs", (long) TRACEFS_MAGIC,
+			      tracefs_mountpoint, PATH_MAX + 1,
+			      tracefs_known_mountpoints);
+
+	if (ret)
+		tracefs_found = true;
+
+	return ret;
+}
+
+/* mount the tracefs somewhere if it's not mounted */
+char *tracefs_mount(const char *mountpoint)
+{
+	/* see if it's already mounted */
+	if (tracefs_find_mountpoint())
+		goto out;
+
+	/* if not mounted and no argument */
+	if (mountpoint == NULL) {
+		/* see if environment variable set */
+		mountpoint = getenv(PERF_TRACEFS_ENVIRONMENT);
+		/* if no environment variable, use default */
+		if (mountpoint == NULL)
+			mountpoint = TRACEFS_DEFAULT_PATH;
+	}
+
+	if (mount(NULL, mountpoint, "tracefs", 0, NULL) < 0)
+		return NULL;
+
+	/* save the mountpoint */
+	tracefs_found = true;
+	strncpy(tracefs_mountpoint, mountpoint, sizeof(tracefs_mountpoint));
+out:
+	return tracefs_mountpoint;
+}
diff --git a/tools/lib/api/fs/tracefs.h b/tools/lib/api/fs/tracefs.h
new file mode 100644
index 0000000..da780ac
--- /dev/null
+++ b/tools/lib/api/fs/tracefs.h
@@ -0,0 +1,21 @@
+#ifndef __API_TRACEFS_H__
+#define __API_TRACEFS_H__
+
+#include "findfs.h"
+
+#ifndef TRACEFS_MAGIC
+#define TRACEFS_MAGIC          0x74726163
+#endif
+
+#ifndef PERF_TRACEFS_ENVIRONMENT
+#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
+#endif
+
+bool tracefs_configured(void);
+const char *tracefs_find_mountpoint(void);
+int tracefs_valid_mountpoint(const char *debugfs);
+char *tracefs_mount(const char *mountpoint);
+
+extern char tracefs_mountpoint[];
+
+#endif /* __API_DEBUGFS_H__ */
diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build
new file mode 100644
index 0000000..6f66735
--- /dev/null
+++ b/tools/lib/lockdep/Build
@@ -0,0 +1 @@
+liblockdep-y += common.o lockdep.o preload.o rbtree.o
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
index 4b866c5..0c356fb 100644
--- a/tools/lib/lockdep/Makefile
+++ b/tools/lib/lockdep/Makefile
@@ -35,6 +35,10 @@
 
 export DESTDIR DESTDIR_SQ INSTALL
 
+MAKEFLAGS += --no-print-directory
+
+include ../../scripts/Makefile.include
+
 # copy a bit from Linux kbuild
 
 ifeq ("$(origin V)", "command line")
@@ -44,56 +48,21 @@
   VERBOSE = 0
 endif
 
-ifeq ("$(origin O)", "command line")
-  BUILD_OUTPUT := $(O)
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
 endif
 
-ifeq ($(BUILD_SRC),)
-ifneq ($(BUILD_OUTPUT),)
-
-define build_output
-	$(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT)	\
-	BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1
-endef
-
-saved-output := $(BUILD_OUTPUT)
-BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd)
-$(if $(BUILD_OUTPUT),, \
-     $(error output directory "$(saved-output)" does not exist))
-
-all: sub-make
-
-gui: force
-	$(call build_output, all_cmd)
-
-$(filter-out gui,$(MAKECMDGOALS)): sub-make
-
-sub-make: force
-	$(call build_output, $(MAKECMDGOALS))
-
-
-# Leave processing to above invocation of make
-skip-makefile := 1
-
-endif # BUILD_OUTPUT
-endif # BUILD_SRC
-
-# We process the rest of the Makefile if this is the final invocation of make
-ifeq ($(skip-makefile),)
-
-srctree		:= $(realpath $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)))
-objtree		:= $(realpath $(CURDIR))
-src		:= $(srctree)
-obj		:= $(objtree)
-
-export prefix libdir bindir src obj
-
 # Shell quotes
 libdir_SQ = $(subst ','\'',$(libdir))
 bindir_SQ = $(subst ','\'',$(bindir))
 
-LIB_FILE = liblockdep.a liblockdep.so.$(LIBLOCKDEP_VERSION)
+LIB_IN := $(OUTPUT)liblockdep-in.o
+
 BIN_FILE = lockdep
+LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION)
 
 CONFIG_INCLUDES =
 CONFIG_LIBS	=
@@ -108,33 +77,23 @@
 
 # Set compile option CFLAGS if not set elsewhere
 CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
+CFLAGS += -fPIC
 
 override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
 
 ifeq ($(VERBOSE),1)
   Q =
-  print_compile =
-  print_app_build =
-  print_fpic_compile =
   print_shared_lib_compile =
   print_install =
 else
   Q = @
-  print_compile =		echo '  CC                 '$(OBJ);
-  print_app_build =		echo '  BUILD              '$(OBJ);
-  print_fpic_compile =		echo '  CC FPIC            '$(OBJ);
-  print_shared_lib_compile =	echo '  BUILD SHARED LIB   '$(OBJ);
-  print_static_lib_build =	echo '  BUILD STATIC LIB   '$(OBJ);
-  print_install =		echo '  INSTALL     '$1'	to	$(DESTDIR_SQ)$2';
+  print_shared_lib_compile =	echo '  LD       '$(OBJ);
+  print_static_lib_build =	echo '  LD       '$(OBJ);
+  print_install =		echo '  INSTALL  '$1'	to	$(DESTDIR_SQ)$2';
 endif
 
-do_fpic_compile =					\
-	($(print_fpic_compile)				\
-	$(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@)
-
-do_app_build =						\
-	($(print_app_build)				\
-	$(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS))
+export srctree OUTPUT CC LD CFLAGS V
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 
 do_compile_shared_library =			\
 	($(print_shared_lib_compile)		\
@@ -144,22 +103,6 @@
 	($(print_static_lib_build)		\
 	$(RM) $@;  $(AR) rcs $@ $^)
 
-
-define do_compile
-	$(print_compile)						\
-	$(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@;
-endef
-
-$(obj)/%.o: $(src)/%.c
-	$(Q)$(call do_compile)
-
-%.o: $(src)/%.c
-	$(Q)$(call do_compile)
-
-PEVENT_LIB_OBJS = common.o lockdep.o preload.o rbtree.o
-
-ALL_OBJS = $(PEVENT_LIB_OBJS)
-
 CMD_TARGETS = $(LIB_FILE)
 
 TARGETS = $(CMD_TARGETS)
@@ -169,42 +112,15 @@
 
 all_cmd: $(CMD_TARGETS)
 
-liblockdep.so.$(LIBLOCKDEP_VERSION): $(PEVENT_LIB_OBJS)
+$(LIB_IN): force
+	$(Q)$(MAKE) $(build)=liblockdep
+
+liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN)
 	$(Q)$(do_compile_shared_library)
 
-liblockdep.a: $(PEVENT_LIB_OBJS)
+liblockdep.a: $(LIB_IN)
 	$(Q)$(do_build_static_lib)
 
-$(PEVENT_LIB_OBJS): %.o: $(src)/%.c
-	$(Q)$(do_fpic_compile)
-
-## make deps
-
-all_objs := $(sort $(ALL_OBJS))
-all_deps := $(all_objs:%.o=.%.d)
-
-# let .d file also depends on the source and header files
-define check_deps
-		@set -e; $(RM) $@; \
-		$(CC) -MM $(CFLAGS) $< > $@.$$$$; \
-		sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
-		$(RM) $@.$$$$
-endef
-
-$(all_deps): .%.d: $(src)/%.c
-	$(Q)$(call check_deps)
-
-$(all_objs) : %.o : .%.d
-
-dep_includes := $(wildcard $(all_deps))
-
-ifneq ($(dep_includes),)
- include $(dep_includes)
-endif
-
-### Detect environment changes
-TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE)
-
 tags:	force
 	$(RM) tags
 	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
@@ -233,8 +149,6 @@
 	$(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d
 	$(RM) tags TAGS
 
-endif # skip-makefile
-
 PHONY += force
 force:
 
diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build
new file mode 100644
index 0000000..c681d05
--- /dev/null
+++ b/tools/lib/traceevent/Build
@@ -0,0 +1,17 @@
+libtraceevent-y += event-parse.o
+libtraceevent-y += event-plugin.o
+libtraceevent-y += trace-seq.o
+libtraceevent-y += parse-filter.o
+libtraceevent-y += parse-utils.o
+libtraceevent-y += kbuffer-parse.o
+
+plugin_jbd2-y         += plugin_jbd2.o
+plugin_hrtimer-y      += plugin_hrtimer.o
+plugin_kmem-y         += plugin_kmem.o
+plugin_kvm-y          += plugin_kvm.o
+plugin_mac80211-y     += plugin_mac80211.o
+plugin_sched_switch-y += plugin_sched_switch.o
+plugin_function-y     += plugin_function.o
+plugin_xen-y          += plugin_xen.o
+plugin_scsi-y         += plugin_scsi.o
+plugin_cfg80211-y     += plugin_cfg80211.o
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 005c9cc..d410da3 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -67,7 +67,7 @@
 PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
 endif
 
-include $(if $(BUILD_SRC),$(BUILD_SRC)/)../../scripts/Makefile.include
+include ../../scripts/Makefile.include
 
 # copy a bit from Linux kbuild
 
@@ -78,40 +78,13 @@
   VERBOSE = 0
 endif
 
-ifeq ("$(origin O)", "command line")
-  BUILD_OUTPUT := $(O)
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
 endif
 
-ifeq ($(BUILD_SRC),)
-ifneq ($(OUTPUT),)
-
-define build_output
-  $(if $(VERBOSE:1=),@)+$(MAKE) -C $(OUTPUT) \
-  BUILD_SRC=$(CURDIR)/ -f $(CURDIR)/Makefile $1
-endef
-
-all: sub-make
-
-$(MAKECMDGOALS): sub-make
-
-sub-make: force
-	$(call build_output, $(MAKECMDGOALS))
-
-
-# Leave processing to above invocation of make
-skip-makefile := 1
-
-endif # OUTPUT
-endif # BUILD_SRC
-
-# We process the rest of the Makefile if this is the final invocation of make
-ifeq ($(skip-makefile),)
-
-srctree		:= $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))
-objtree		:= $(CURDIR)
-src		:= $(srctree)
-obj		:= $(objtree)
-
 export prefix bindir src obj
 
 # Shell quotes
@@ -132,16 +105,19 @@
 OBJ		= $@
 N		=
 
-export Q VERBOSE
-
 EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
 
-INCLUDES = -I. -I $(srctree)/../../include $(CONFIG_INCLUDES)
+INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
 
-# Set compile option CFLAGS if not set elsewhere
-CFLAGS ?= -g -Wall
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -Wall
+endif
 
 # Append required CFLAGS
+override CFLAGS += -fPIC
 override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
 override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
 
@@ -151,74 +127,58 @@
   Q = @
 endif
 
-do_compile_shared_library =			\
-	($(print_shared_lib_compile)		\
-	$(CC) --shared $^ -o $@)
+# Disable command line variables (CFLAGS) overide from top
+# level Makefile (perf), otherwise build Makefile will get
+# the same command line setup.
+MAKEOVERRIDES=
 
-do_plugin_build =				\
-	($(print_plugin_build)			\
-	$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<)
+export srctree OUTPUT CC LD CFLAGS V
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 
-do_build_static_lib =				\
-	($(print_static_lib_build)		\
-	$(RM) $@;  $(AR) rcs $@ $^)
+PLUGINS  = plugin_jbd2.so
+PLUGINS += plugin_hrtimer.so
+PLUGINS += plugin_kmem.so
+PLUGINS += plugin_kvm.so
+PLUGINS += plugin_mac80211.so
+PLUGINS += plugin_sched_switch.so
+PLUGINS += plugin_function.so
+PLUGINS += plugin_xen.so
+PLUGINS += plugin_scsi.so
+PLUGINS += plugin_cfg80211.so
 
+PLUGINS    := $(addprefix $(OUTPUT),$(PLUGINS))
+PLUGINS_IN := $(PLUGINS:.so=-in.o)
 
-do_compile = $(QUIET_CC)$(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@;
-
-$(obj)/%.o: $(src)/%.c
-	$(call do_compile)
-
-%.o: $(src)/%.c
-	$(call do_compile)
-
-PEVENT_LIB_OBJS  = event-parse.o
-PEVENT_LIB_OBJS += event-plugin.o
-PEVENT_LIB_OBJS += trace-seq.o
-PEVENT_LIB_OBJS += parse-filter.o
-PEVENT_LIB_OBJS += parse-utils.o
-PEVENT_LIB_OBJS += kbuffer-parse.o
-
-PLUGIN_OBJS  = plugin_jbd2.o
-PLUGIN_OBJS += plugin_hrtimer.o
-PLUGIN_OBJS += plugin_kmem.o
-PLUGIN_OBJS += plugin_kvm.o
-PLUGIN_OBJS += plugin_mac80211.o
-PLUGIN_OBJS += plugin_sched_switch.o
-PLUGIN_OBJS += plugin_function.o
-PLUGIN_OBJS += plugin_xen.o
-PLUGIN_OBJS += plugin_scsi.o
-PLUGIN_OBJS += plugin_cfg80211.o
-
-PLUGINS := $(PLUGIN_OBJS:.o=.so)
-
-ALL_OBJS = $(PEVENT_LIB_OBJS) $(PLUGIN_OBJS)
+TE_IN    := $(OUTPUT)libtraceevent-in.o
+LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
 
 CMD_TARGETS = $(LIB_FILE) $(PLUGINS)
 
 TARGETS = $(CMD_TARGETS)
 
-
 all: all_cmd
 
 all_cmd: $(CMD_TARGETS)
 
-libtraceevent.so: $(PEVENT_LIB_OBJS)
+$(TE_IN): force
+	$(Q)$(MAKE) $(build)=libtraceevent
+
+$(OUTPUT)libtraceevent.so: $(TE_IN)
 	$(QUIET_LINK)$(CC) --shared $^ -o $@
 
-libtraceevent.a: $(PEVENT_LIB_OBJS)
+$(OUTPUT)libtraceevent.a: $(TE_IN)
 	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
 plugins: $(PLUGINS)
 
-$(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS
-	$(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@
+__plugin_obj = $(notdir $@)
+  plugin_obj = $(__plugin_obj:-in.o=)
 
-$(PLUGIN_OBJS): %.o : $(src)/%.c
-	$(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) -fPIC -o $@ $<
+$(PLUGINS_IN): force
+	$(Q)$(MAKE) $(build)=$(plugin_obj)
 
-$(PLUGINS): %.so: %.o
-	$(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<
+$(OUTPUT)%.so: $(OUTPUT)%-in.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^
 
 define make_version.h
   (echo '/* This file is automatically generated. Do not modify. */';		\
@@ -255,40 +215,6 @@
    fi);
 endef
 
-## make deps
-
-all_objs := $(sort $(ALL_OBJS))
-all_deps := $(all_objs:%.o=.%.d)
-
-# let .d file also depends on the source and header files
-define check_deps
-  @set -e; $(RM) $@; \
-  $(CC) -MM $(CFLAGS) $< > $@.$$$$; \
-  sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
-  $(RM) $@.$$$$
-endef
-
-$(all_deps): .%.d: $(src)/%.c
-	$(Q)$(call check_deps)
-
-$(all_objs) : %.o : .%.d
-
-dep_includes := $(wildcard $(all_deps))
-
-ifneq ($(dep_includes),)
- include $(dep_includes)
-endif
-
-### Detect environment changes
-TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE)
-
-TRACEEVENT-CFLAGS: force
-	@FLAGS='$(TRACK_CFLAGS)'; \
-	    if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \
-		echo 1>&2 "  FLAGS:   * new build flags or cross compiler"; \
-		echo "$$FLAGS" >TRACEEVENT-CFLAGS; \
-            fi
-
 tags:	force
 	$(RM) tags
 	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
@@ -327,14 +253,9 @@
 		$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \
 		$(RM) TRACEEVENT-CFLAGS tags TAGS
 
-endif # skip-makefile
-
 PHONY += force plugins
 force:
 
-plugins:
-	@echo > /dev/null
-
 # Declare the contents of the .PHONY variable as phony.  We keep that
 # information in a variable so we can use it in if_changed and friends.
 .PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index afe20ed..e0917c0 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -304,7 +304,10 @@
 	if (!item)
 		return -1;
 
-	item->comm = strdup(comm);
+	if (comm)
+		item->comm = strdup(comm);
+	else
+		item->comm = strdup("<...>");
 	if (!item->comm) {
 		free(item);
 		return -1;
@@ -318,9 +321,14 @@
 	return 0;
 }
 
-void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock)
+int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock)
 {
-	pevent->trace_clock = trace_clock;
+	pevent->trace_clock = strdup(trace_clock);
+	if (!pevent->trace_clock) {
+		errno = ENOMEM;
+		return -1;
+	}
+	return 0;
 }
 
 struct func_map {
@@ -758,6 +766,11 @@
 		free_arg(arg->hex.field);
 		free_arg(arg->hex.size);
 		break;
+	case PRINT_INT_ARRAY:
+		free_arg(arg->int_array.field);
+		free_arg(arg->int_array.count);
+		free_arg(arg->int_array.el_size);
+		break;
 	case PRINT_TYPE:
 		free(arg->typecast.type);
 		free_arg(arg->typecast.item);
@@ -1926,7 +1939,22 @@
 			goto out_warn_free;
 
 		type = process_arg_token(event, right, tok, type);
-		arg->op.right = right;
+
+		if (right->type == PRINT_OP &&
+		    get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
+			struct print_arg tmp;
+
+			/* rotate ops according to the priority */
+			arg->op.right = right->op.left;
+
+			tmp = *arg;
+			*arg = *right;
+			*right = tmp;
+
+			arg->op.left = right;
+		} else {
+			arg->op.right = right;
+		}
 
 	} else if (strcmp(token, "[") == 0) {
 
@@ -2014,6 +2042,38 @@
 	return EVENT_ERROR;
 }
 
+static int alloc_and_process_delim(struct event_format *event, char *next_token,
+				   struct print_arg **print_arg)
+{
+	struct print_arg *field;
+	enum event_type type;
+	char *token;
+	int ret = 0;
+
+	field = alloc_arg();
+	if (!field) {
+		do_warning_event(event, "%s: not enough memory!", __func__);
+		errno = ENOMEM;
+		return -1;
+	}
+
+	type = process_arg(event, field, &token);
+
+	if (test_type_token(type, token, EVENT_DELIM, next_token)) {
+		errno = EINVAL;
+		ret = -1;
+		free_arg(field);
+		goto out_free_token;
+	}
+
+	*print_arg = field;
+
+out_free_token:
+	free_token(token);
+
+	return ret;
+}
+
 static char *arg_eval (struct print_arg *arg);
 
 static unsigned long long
@@ -2486,49 +2546,46 @@
 static enum event_type
 process_hex(struct event_format *event, struct print_arg *arg, char **tok)
 {
-	struct print_arg *field;
-	enum event_type type;
-	char *token = NULL;
-
 	memset(arg, 0, sizeof(*arg));
 	arg->type = PRINT_HEX;
 
-	field = alloc_arg();
-	if (!field) {
-		do_warning_event(event, "%s: not enough memory!", __func__);
-		goto out_free;
-	}
+	if (alloc_and_process_delim(event, ",", &arg->hex.field))
+		goto out;
 
-	type = process_arg(event, field, &token);
+	if (alloc_and_process_delim(event, ")", &arg->hex.size))
+		goto free_field;
 
-	if (test_type_token(type, token, EVENT_DELIM, ","))
-		goto out_free;
+	return read_token_item(tok);
 
-	arg->hex.field = field;
+free_field:
+	free_arg(arg->hex.field);
+out:
+	*tok = NULL;
+	return EVENT_ERROR;
+}
 
-	free_token(token);
+static enum event_type
+process_int_array(struct event_format *event, struct print_arg *arg, char **tok)
+{
+	memset(arg, 0, sizeof(*arg));
+	arg->type = PRINT_INT_ARRAY;
 
-	field = alloc_arg();
-	if (!field) {
-		do_warning_event(event, "%s: not enough memory!", __func__);
-		*tok = NULL;
-		return EVENT_ERROR;
-	}
+	if (alloc_and_process_delim(event, ",", &arg->int_array.field))
+		goto out;
 
-	type = process_arg(event, field, &token);
+	if (alloc_and_process_delim(event, ",", &arg->int_array.count))
+		goto free_field;
 
-	if (test_type_token(type, token, EVENT_DELIM, ")"))
-		goto out_free;
+	if (alloc_and_process_delim(event, ")", &arg->int_array.el_size))
+		goto free_size;
 
-	arg->hex.size = field;
+	return read_token_item(tok);
 
-	free_token(token);
-	type = read_token_item(tok);
-	return type;
-
- out_free:
-	free_arg(field);
-	free_token(token);
+free_size:
+	free_arg(arg->int_array.count);
+free_field:
+	free_arg(arg->int_array.field);
+out:
 	*tok = NULL;
 	return EVENT_ERROR;
 }
@@ -2828,6 +2885,10 @@
 		free_token(token);
 		return process_hex(event, arg, tok);
 	}
+	if (strcmp(token, "__print_array") == 0) {
+		free_token(token);
+		return process_int_array(event, arg, tok);
+	}
 	if (strcmp(token, "__get_str") == 0) {
 		free_token(token);
 		return process_str(event, arg, tok);
@@ -3356,6 +3417,7 @@
 		break;
 	case PRINT_FLAGS:
 	case PRINT_SYMBOL:
+	case PRINT_INT_ARRAY:
 	case PRINT_HEX:
 		break;
 	case PRINT_TYPE:
@@ -3568,7 +3630,7 @@
 	{ "HRTIMER_RESTART", 1 },
 };
 
-static unsigned long long eval_flag(const char *flag)
+static long long eval_flag(const char *flag)
 {
 	int i;
 
@@ -3584,7 +3646,7 @@
 		if (strcmp(flags[i].name, flag) == 0)
 			return flags[i].value;
 
-	return 0;
+	return -1LL;
 }
 
 static void print_str_to_seq(struct trace_seq *s, const char *format,
@@ -3658,7 +3720,7 @@
 	struct print_flag_sym *flag;
 	struct format_field *field;
 	struct printk_map *printk;
-	unsigned long long val, fval;
+	long long val, fval;
 	unsigned long addr;
 	char *str;
 	unsigned char *hex;
@@ -3717,11 +3779,11 @@
 		print = 0;
 		for (flag = arg->flags.flags; flag; flag = flag->next) {
 			fval = eval_flag(flag->value);
-			if (!val && !fval) {
+			if (!val && fval < 0) {
 				print_str_to_seq(s, format, len_arg, flag->str);
 				break;
 			}
-			if (fval && (val & fval) == fval) {
+			if (fval > 0 && (val & fval) == fval) {
 				if (print && arg->flags.delim)
 					trace_seq_puts(s, arg->flags.delim);
 				print_str_to_seq(s, format, len_arg, flag->str);
@@ -3766,6 +3828,54 @@
 		}
 		break;
 
+	case PRINT_INT_ARRAY: {
+		void *num;
+		int el_size;
+
+		if (arg->int_array.field->type == PRINT_DYNAMIC_ARRAY) {
+			unsigned long offset;
+			struct format_field *field =
+				arg->int_array.field->dynarray.field;
+			offset = pevent_read_number(pevent,
+						    data + field->offset,
+						    field->size);
+			num = data + (offset & 0xffff);
+		} else {
+			field = arg->int_array.field->field.field;
+			if (!field) {
+				str = arg->int_array.field->field.name;
+				field = pevent_find_any_field(event, str);
+				if (!field)
+					goto out_warning_field;
+				arg->int_array.field->field.field = field;
+			}
+			num = data + field->offset;
+		}
+		len = eval_num_arg(data, size, event, arg->int_array.count);
+		el_size = eval_num_arg(data, size, event,
+				       arg->int_array.el_size);
+		for (i = 0; i < len; i++) {
+			if (i)
+				trace_seq_putc(s, ' ');
+
+			if (el_size == 1) {
+				trace_seq_printf(s, "%u", *(uint8_t *)num);
+			} else if (el_size == 2) {
+				trace_seq_printf(s, "%u", *(uint16_t *)num);
+			} else if (el_size == 4) {
+				trace_seq_printf(s, "%u", *(uint32_t *)num);
+			} else if (el_size == 8) {
+				trace_seq_printf(s, "%lu", *(uint64_t *)num);
+			} else {
+				trace_seq_printf(s, "BAD SIZE:%d 0x%x",
+						 el_size, *(uint8_t *)num);
+				el_size = 1;
+			}
+
+			num += el_size;
+		}
+		break;
+	}
 	case PRINT_TYPE:
 		break;
 	case PRINT_STRING: {
@@ -3976,7 +4086,7 @@
 	if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
 		goto out_free;
 
-	/* skip the first "%pf: " */
+	/* skip the first "%ps: " */
 	for (ptr = fmt + 5, bptr = data + field->offset;
 	     bptr < data + size && *ptr; ptr++) {
 		int ls = 0;
@@ -3997,6 +4107,10 @@
 				goto process_again;
 			case '.':
 				goto process_again;
+			case 'z':
+			case 'Z':
+				ls = 1;
+				goto process_again;
 			case 'p':
 				ls = 1;
 				/* fall through */
@@ -4939,6 +5053,96 @@
 	return comm;
 }
 
+static struct cmdline *
+pid_from_cmdlist(struct pevent *pevent, const char *comm, struct cmdline *next)
+{
+	struct cmdline_list *cmdlist = (struct cmdline_list *)next;
+
+	if (cmdlist)
+		cmdlist = cmdlist->next;
+	else
+		cmdlist = pevent->cmdlist;
+
+	while (cmdlist && strcmp(cmdlist->comm, comm) != 0)
+		cmdlist = cmdlist->next;
+
+	return (struct cmdline *)cmdlist;
+}
+
+/**
+ * pevent_data_pid_from_comm - return the pid from a given comm
+ * @pevent: a handle to the pevent
+ * @comm: the cmdline to find the pid from
+ * @next: the cmdline structure to find the next comm
+ *
+ * This returns the cmdline structure that holds a pid for a given
+ * comm, or NULL if none found. As there may be more than one pid for
+ * a given comm, the result of this call can be passed back into
+ * a recurring call in the @next paramater, and then it will find the
+ * next pid.
+ * Also, it does a linear seach, so it may be slow.
+ */
+struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm,
+					  struct cmdline *next)
+{
+	struct cmdline *cmdline;
+
+	/*
+	 * If the cmdlines have not been converted yet, then use
+	 * the list.
+	 */
+	if (!pevent->cmdlines)
+		return pid_from_cmdlist(pevent, comm, next);
+
+	if (next) {
+		/*
+		 * The next pointer could have been still from
+		 * a previous call before cmdlines were created
+		 */
+		if (next < pevent->cmdlines ||
+		    next >= pevent->cmdlines + pevent->cmdline_count)
+			next = NULL;
+		else
+			cmdline  = next++;
+	}
+
+	if (!next)
+		cmdline = pevent->cmdlines;
+
+	while (cmdline < pevent->cmdlines + pevent->cmdline_count) {
+		if (strcmp(cmdline->comm, comm) == 0)
+			return cmdline;
+		cmdline++;
+	}
+	return NULL;
+}
+
+/**
+ * pevent_cmdline_pid - return the pid associated to a given cmdline
+ * @cmdline: The cmdline structure to get the pid from
+ *
+ * Returns the pid for a give cmdline. If @cmdline is NULL, then
+ * -1 is returned.
+ */
+int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline)
+{
+	struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline;
+
+	if (!cmdline)
+		return -1;
+
+	/*
+	 * If cmdlines have not been created yet, or cmdline is
+	 * not part of the array, then treat it as a cmdlist instead.
+	 */
+	if (!pevent->cmdlines ||
+	    cmdline < pevent->cmdlines ||
+	    cmdline >= pevent->cmdlines + pevent->cmdline_count)
+		return cmdlist->pid;
+
+	return cmdline->pid;
+}
+
 /**
  * pevent_data_comm_from_pid - parse the data into the print format
  * @s: the trace_seq to write to
@@ -5256,6 +5460,15 @@
 		print_args(args->hex.size);
 		printf(")");
 		break;
+	case PRINT_INT_ARRAY:
+		printf("__print_array(");
+		print_args(args->int_array.field);
+		printf(", ");
+		print_args(args->int_array.count);
+		printf(", ");
+		print_args(args->int_array.el_size);
+		printf(")");
+		break;
 	case PRINT_STRING:
 	case PRINT_BSTRING:
 		printf("__get_str(%s)", args->string.string);
@@ -6228,15 +6441,20 @@
 	pevent->ref_count++;
 }
 
+void pevent_free_format_field(struct format_field *field)
+{
+	free(field->type);
+	free(field->name);
+	free(field);
+}
+
 static void free_format_fields(struct format_field *field)
 {
 	struct format_field *next;
 
 	while (field) {
 		next = field->next;
-		free(field->type);
-		free(field->name);
-		free(field);
+		pevent_free_format_field(field);
 		field = next;
 	}
 }
@@ -6341,6 +6559,7 @@
 		free_handler(handle);
 	}
 
+	free(pevent->trace_clock);
 	free(pevent->events);
 	free(pevent->sort_events);
 
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 7a3873f..86a5839 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -22,6 +22,7 @@
 
 #include <stdbool.h>
 #include <stdarg.h>
+#include <stdio.h>
 #include <regex.h>
 #include <string.h>
 
@@ -91,6 +92,7 @@
 
 extern void trace_seq_terminate(struct trace_seq *s);
 
+extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp);
 extern int trace_seq_do_printf(struct trace_seq *s);
 
 
@@ -114,7 +116,7 @@
 	char				*name;
 	char				*plugin_alias;
 	char				*description;
-	char				*value;
+	const char			*value;
 	void				*priv;
 	int				set;
 };
@@ -152,6 +154,10 @@
  *   .plugin_alias is used to give a shorter name to access
  *   the vairable. Useful if a plugin handles more than one event.
  *
+ *   If .value is not set, then it is considered a boolean and only
+ *   .set will be processed. If .value is defined, then it is considered
+ *   a string option and .set will be ignored.
+ *
  * PEVENT_PLUGIN_ALIAS: (optional)
  *   The name to use for finding options (uses filename if not defined)
  */
@@ -245,6 +251,12 @@
 	struct print_arg	*size;
 };
 
+struct print_arg_int_array {
+	struct print_arg	*field;
+	struct print_arg	*count;
+	struct print_arg	*el_size;
+};
+
 struct print_arg_dynarray {
 	struct format_field	*field;
 	struct print_arg	*index;
@@ -273,6 +285,7 @@
 	PRINT_FLAGS,
 	PRINT_SYMBOL,
 	PRINT_HEX,
+	PRINT_INT_ARRAY,
 	PRINT_TYPE,
 	PRINT_STRING,
 	PRINT_BSTRING,
@@ -292,6 +305,7 @@
 		struct print_arg_flags		flags;
 		struct print_arg_symbol		symbol;
 		struct print_arg_hex		hex;
+		struct print_arg_int_array	int_array;
 		struct print_arg_func		func;
 		struct print_arg_string		string;
 		struct print_arg_bitmask	bitmask;
@@ -597,7 +611,7 @@
 };
 
 int pevent_register_comm(struct pevent *pevent, const char *comm, int pid);
-void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock);
+int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock);
 int pevent_register_function(struct pevent *pevent, char *name,
 			     unsigned long long addr, char *mod);
 int pevent_register_print_string(struct pevent *pevent, const char *fmt,
@@ -617,6 +631,7 @@
 				      const char *buf,
 				      unsigned long size, const char *sys);
 void pevent_free_format(struct event_format *event);
+void pevent_free_format_field(struct format_field *field);
 
 void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event,
 			   const char *name, struct pevent_record *record,
@@ -675,6 +690,11 @@
 struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type);
 int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec);
 const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid);
+struct cmdline;
+struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm,
+					  struct cmdline *next);
+int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline);
+
 void pevent_event_info(struct trace_seq *s, struct event_format *event,
 		       struct pevent_record *record);
 int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum,
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index 136162c..a16756a 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -18,6 +18,7 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
+#include <ctype.h>
 #include <stdio.h>
 #include <string.h>
 #include <dlfcn.h>
@@ -49,6 +50,52 @@
 	void			*handle;
 };
 
+static void lower_case(char *str)
+{
+	if (!str)
+		return;
+	for (; *str; str++)
+		*str = tolower(*str);
+}
+
+static int update_option_value(struct pevent_plugin_option *op, const char *val)
+{
+	char *op_val;
+
+	if (!val) {
+		/* toggle, only if option is boolean */
+		if (op->value)
+			/* Warn? */
+			return 0;
+		op->set ^= 1;
+		return 0;
+	}
+
+	/*
+	 * If the option has a value then it takes a string
+	 * otherwise the option is a boolean.
+	 */
+	if (op->value) {
+		op->value = val;
+		return 0;
+	}
+
+	/* Option is boolean, must be either "1", "0", "true" or "false" */
+
+	op_val = strdup(val);
+	if (!op_val)
+		return -1;
+	lower_case(op_val);
+
+	if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0)
+		op->set = 1;
+	else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0)
+		op->set = 0;
+	free(op_val);
+
+	return 0;
+}
+
 /**
  * traceevent_plugin_list_options - get list of plugin options
  *
@@ -120,6 +167,7 @@
 {
 	struct trace_plugin_options *op;
 	char *plugin;
+	int ret = 0;
 
 	if (option->plugin_alias) {
 		plugin = strdup(option->plugin_alias);
@@ -144,9 +192,10 @@
 		if (strcmp(op->option, option->name) != 0)
 			continue;
 
-		option->value = op->value;
-		option->set ^= 1;
-		goto out;
+		ret = update_option_value(option, op->value);
+		if (ret)
+			goto out;
+		break;
 	}
 
 	/* first look for unnamed options */
@@ -156,14 +205,13 @@
 		if (strcmp(op->option, option->name) != 0)
 			continue;
 
-		option->value = op->value;
-		option->set ^= 1;
+		ret = update_option_value(option, op->value);
 		break;
 	}
 
  out:
 	free(plugin);
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
index dcc6652..3bcada3 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -372,7 +372,6 @@
 	switch (type_len) {
 	case KBUFFER_TYPE_PADDING:
 		*length = read_4(kbuf, data);
-		data += *length;
 		break;
 
 	case KBUFFER_TYPE_TIME_EXTEND:
@@ -730,3 +729,14 @@
 
 	kbuf->next_event = __old_next_event;
 }
+
+/**
+ * kbuffer_start_of_data - return offset of where data starts on subbuffer
+ * @kbuf:	The kbuffer
+ *
+ * Returns the location on the subbuffer where the data starts.
+ */
+int kbuffer_start_of_data(struct kbuffer *kbuf)
+{
+	return kbuf->start;
+}
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
index c831f64..03dce75 100644
--- a/tools/lib/traceevent/kbuffer.h
+++ b/tools/lib/traceevent/kbuffer.h
@@ -63,5 +63,6 @@
 int kbuffer_subbuffer_size(struct kbuffer *kbuf);
 
 void kbuffer_set_old_format(struct kbuffer *kbuf);
+int kbuffer_start_of_data(struct kbuffer *kbuf);
 
 #endif /* _K_BUFFER_H */
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index b502344..0144b3d 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1058,6 +1058,7 @@
 					*parg = current_op;
 				else
 					*parg = current_exp;
+				free(token);
 				return PEVENT_ERRNO__UNBALANCED_PAREN;
 			}
 			break;
@@ -1168,6 +1169,7 @@
 
 	*parg = current_op;
 
+	free(token);
 	return 0;
 
  fail_alloc:
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
index ec3bd16..292dc9f 100644
--- a/tools/lib/traceevent/trace-seq.c
+++ b/tools/lib/traceevent/trace-seq.c
@@ -231,19 +231,24 @@
 	s->buffer[s->len] = 0;
 }
 
-int trace_seq_do_printf(struct trace_seq *s)
+int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp)
 {
 	TRACE_SEQ_CHECK(s);
 
 	switch (s->state) {
 	case TRACE_SEQ__GOOD:
-		return printf("%.*s", s->len, s->buffer);
+		return fprintf(fp, "%.*s", s->len, s->buffer);
 	case TRACE_SEQ__BUFFER_POISONED:
-		puts("Usage of trace_seq after it was destroyed");
+		fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed");
 		break;
 	case TRACE_SEQ__MEM_ALLOC_FAILED:
-		puts("Can't allocate trace_seq buffer memory");
+		fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory");
 		break;
 	}
 	return -1;
 }
+
+int trace_seq_do_printf(struct trace_seq *s)
+{
+	return trace_seq_do_fprintf(s, stdout);
+}
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 40399c3..812f904 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -1,6 +1,7 @@
 PERF-CFLAGS
 PERF-GUI-VARS
 PERF-VERSION-FILE
+FEATURE-DUMP
 perf
 perf-read-vdso32
 perf-read-vdsox32
diff --git a/tools/perf/Build b/tools/perf/Build
new file mode 100644
index 0000000..b77370e
--- /dev/null
+++ b/tools/perf/Build
@@ -0,0 +1,44 @@
+perf-y += builtin-bench.o
+perf-y += builtin-annotate.o
+perf-y += builtin-diff.o
+perf-y += builtin-evlist.o
+perf-y += builtin-help.o
+perf-y += builtin-sched.o
+perf-y += builtin-buildid-list.o
+perf-y += builtin-buildid-cache.o
+perf-y += builtin-list.o
+perf-y += builtin-record.o
+perf-y += builtin-report.o
+perf-y += builtin-stat.o
+perf-y += builtin-timechart.o
+perf-y += builtin-top.o
+perf-y += builtin-script.o
+perf-y += builtin-kmem.o
+perf-y += builtin-lock.o
+perf-y += builtin-kvm.o
+perf-y += builtin-inject.o
+perf-y += builtin-mem.o
+perf-y += builtin-data.o
+
+perf-$(CONFIG_AUDIT) += builtin-trace.o
+perf-$(CONFIG_LIBELF) += builtin-probe.o
+
+perf-y += bench/
+perf-y += tests/
+
+perf-y += perf.o
+
+paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))"
+paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))"
+paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))"
+
+CFLAGS_builtin-help.o      += $(paths)
+CFLAGS_builtin-timechart.o += $(paths)
+CFLAGS_perf.o              += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE
+
+libperf-y += util/
+libperf-y += arch/
+libperf-y += ui/
+libperf-y += scripts/
+
+gtk-y += ui/gtk/
diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt
new file mode 100644
index 0000000..f6fc650
--- /dev/null
+++ b/tools/perf/Documentation/Build.txt
@@ -0,0 +1,49 @@
+
+1) perf build
+=============
+The perf build process consists of several separated building blocks,
+which are linked together to form the perf binary:
+  - libperf library (static)
+  - perf builtin commands
+  - traceevent library (static)
+  - GTK ui library
+
+Several makefiles govern the perf build:
+
+  - Makefile
+    top level Makefile working as a wrapper that calls the main
+    Makefile.perf with a -j option to do parallel builds.
+
+  - Makefile.perf
+    main makefile that triggers build of all perf objects including
+    installation and documentation processing.
+
+  - tools/build/Makefile.build
+    main makefile of the build framework
+
+  - tools/build/Build.include
+    build framework generic definitions
+
+  - Build makefiles
+    makefiles that defines build objects
+
+Please refer to tools/build/Documentation/Build.txt for more
+information about build framework.
+
+
+2) perf build
+=============
+The Makefile.perf triggers the build framework for build objects:
+   perf, libperf, gtk
+
+resulting in following objects:
+  $ ls  *-in.o
+  gtk-in.o  libperf-in.o  perf-in.o
+
+Those objects are then used in final linking:
+  libperf-gtk.so <- gtk-in.o  libperf-in.o
+  perf           <- perf-in.o libperf-in.o
+
+
+NOTE this description is omitting other libraries involved, only
+     focusing on build framework outcomes
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index 0294c57..dd07b55 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -12,9 +12,9 @@
 
 DESCRIPTION
 -----------
-This command manages the build-id cache. It can add and remove files to/from
-the cache. In the future it should as well purge older entries, set upper
-limits for the space used by the cache, etc.
+This command manages the build-id cache. It can add, remove, update and purge
+files to/from the cache. In the future it should as well set upper limits for
+the space used by the cache, etc.
 
 OPTIONS
 -------
@@ -36,14 +36,24 @@
         actually made.
 -r::
 --remove=::
-        Remove specified file from the cache.
+        Remove a cached binary which has same build-id of specified file
+        from the cache.
+-p::
+--purge=::
+        Purge all cached binaries including older caches which have specified
+	path from the cache.
 -M::
 --missing=::
 	List missing build ids in the cache for the specified file.
 -u::
---update::
-	Update specified file of the cache. It can be used to update kallsyms
-	kernel dso to vmlinux in order to support annotation.
+--update=::
+	Update specified file of the cache. Note that this doesn't remove
+	older entires since those may be still needed for annotating old
+	(or remote) perf.data. Only if there is already a cache which has
+	exactly same build-id, that is replaced by new one. It can be used
+	to update kallsyms and kernel dso to vmlinux in order to support
+	annotation.
+
 -v::
 --verbose::
 	Be more verbose.
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
new file mode 100644
index 0000000..be8fa1a
--- /dev/null
+++ b/tools/perf/Documentation/perf-data.txt
@@ -0,0 +1,40 @@
+perf-data(1)
+==============
+
+NAME
+----
+perf-data - Data file related processing
+
+SYNOPSIS
+--------
+[verse]
+'perf data' [<common options>] <command> [<options>]",
+
+DESCRIPTION
+-----------
+Data file related processing.
+
+COMMANDS
+--------
+convert::
+	Converts perf data file into another format (only CTF [1] format is support by now).
+	It's possible to set data-convert debug variable to get debug messages from conversion,
+	like:
+	  perf --debug data-convert data convert ...
+
+OPTIONS for 'convert'
+---------------------
+--to-ctf::
+	Triggers the CTF conversion, specify the path of CTF data directory.
+
+-i::
+	Specify input perf data file path.
+
+-v::
+--verbose::
+        Be more verbose (show counter open errors, etc).
+
+SEE ALSO
+--------
+linkperf:perf[1]
+[1] Common Trace Format - http://www.efficios.com/ctf
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index e463caa..d1deb57 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -20,12 +20,20 @@
 The differential profile is displayed only for events matching both
 specified perf.data files.
 
+If no parameters are passed the samples will be sorted by dso and symbol.
+As the perf.data files could come from different binaries, the symbols addresses
+could vary. So perf diff is based on the comparison of the files and
+symbols name.
+
 OPTIONS
 -------
 -D::
 --dump-raw-trace::
         Dump raw trace in ASCII.
 
+--kallsyms=<file>::
+        kallsyms pathname
+
 -m::
 --modules::
         Load module symbols. WARNING: use only with -k and LIVE kernel
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 7c8fbbf..150253c 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -25,6 +25,10 @@
 --input=<file>::
 	Select the input file (default: perf.data unless stdin is a fifo)
 
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 --caller::
 	Show per-callsite statistics
 
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 3e2aec9..bada893 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -26,6 +26,7 @@
  u - user-space counting
  k - kernel counting
  h - hypervisor counting
+ I - non idle counting
  G - guest counting (in KVM guests)
  H - host counting (not in KVM guests)
  p - precise level
@@ -127,6 +128,12 @@
 One or more types can be used at the same time, listing the events for the
 types specified.
 
+Support raw format:
+
+. '--raw-dump', shows the raw-dump of all the events.
+. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of
+  a certain kind of events.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-top[1],
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index aaa869b..239609c 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -47,6 +47,12 @@
 -v::
 --verbose::
         Be more verbose (show parsed arguments, etc).
+	Can not use with -q.
+
+-q::
+--quiet::
+	Be quiet (do not show any messages including errors).
+	Can not use with -v.
 
 -a::
 --add=::
@@ -96,7 +102,7 @@
 	Dry run. With this option, --add and --del doesn't execute actual
 	adding and removal operations.
 
---max-probes::
+--max-probes=NUM::
 	Set the maximum number of probe points for an event. Default is 128.
 
 -x::
@@ -104,8 +110,13 @@
 	Specify path to the executable or shared library file for user
 	space tracing. Can also be used with --funcs option.
 
+--demangle::
+	Demangle application symbols. --no-demangle is also available
+	for disabling demangling.
+
 --demangle-kernel::
-	Demangle kernel symbols.
+	Demangle kernel symbols. --no-demangle-kernel is also available
+	for disabling kernel demangling.
 
 In absence of -m/-x options, perf probe checks if the first argument after
 the options is an absolute path name. If its an absolute path, perf probe
@@ -137,6 +148,7 @@
  [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
+'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point.
 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
 
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 31e9774..4847a79 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -55,6 +55,11 @@
           If you want to profile write accesses in [0x1000~1008), just set
           'mem:0x1000/8:w'.
 
+	- a group of events surrounded by a pair of brace ("{event1,event2,...}").
+	  Each event is separated by commas and the group should be quoted to
+	  prevent the shell interpretation.  You also need to use --group on
+	  "perf report" to view group events together.
+
 --filter=<filter>::
         Event filter.
 
@@ -62,9 +67,6 @@
 --all-cpus::
         System-wide collection from all CPUs.
 
--l::
-        Scale counter values.
-
 -p::
 --pid=::
 	Record events on existing process ID (comma separated list).
@@ -107,6 +109,10 @@
 	specification with appended unit character - B/K/M/G. The
 	size is rounded up to have nearest pages power of two value.
 
+--group::
+	Put all events in a single event group.  This precedes the --event
+	option and remains only for backward compatibility.  See --event.
+
 -g::
 	Enables call-graph (stack chain/backtrace) recording.
 
@@ -115,13 +121,19 @@
 	implies -g.
 
 	Allows specifying "fp" (frame pointer) or "dwarf"
-	(DWARF's CFI - Call Frame Information) as the method to collect
+	(DWARF's CFI - Call Frame Information) or "lbr"
+	(Hardware Last Branch Record facility) as the method to collect
 	the information used to show the call graphs.
 
 	In some systems, where binaries are build with gcc
 	--fomit-frame-pointer, using the "fp" method will produce bogus
 	call graphs, using "dwarf", if available (perf tools linked to
 	the libunwind library) should be used instead.
+	Using the "lbr" method doesn't require any compiler options. It
+	will produce call graphs from the hardware LBR registers. The
+	main limition is that it is only available on new Intel
+	platforms, such as Haswell. It can only get user call chain. It
+	doesn't work with branch stack sampling at the same time.
 
 -q::
 --quiet::
@@ -235,6 +247,16 @@
 each sample. List of captured registers depends on the architecture. This option
 is off by default.
 
+--running-time::
+Record running and enabled time for read events (:S)
+
+-k::
+--clockid::
+Sets the clock id to use for the various time fields in the perf_event_type
+records. See clock_gettime(). In particular CLOCK_MONOTONIC and
+CLOCK_MONOTONIC_RAW are supported, some events might also allow
+CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index dd7cccd..4879cf6 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -40,6 +40,11 @@
 	Only consider symbols in these comms. CSV that understands
 	file://filename entries.  This option will affect the percentage of
 	the overhead column.  See --percentage for more info.
+--pid=::
+        Only show events for given process ID (comma separated list).
+
+--tid=::
+        Only show events for given thread ID (comma separated list).
 -d::
 --dsos=::
 	Only consider symbols in these dsos. CSV that understands
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index a21eec0..7944575 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -193,6 +193,12 @@
 	Only display events for these comms. CSV that understands
 	file://filename entries.
 
+--pid=::
+	Only show events for given process ID (comma separated list).
+
+--tid=::
+	Only show events for given thread ID (comma separated list).
+
 -I::
 --show-info::
 	Display extended information about the perf.data file. This adds
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 7e1b1f2..ba03fd5 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -55,6 +55,9 @@
 --uid=::
         Record events in threads owned by uid. Name or number.
 
+--filter-pids=::
+	Filter out events for these pids and for 'trace' itself (comma separated list).
+
 -v::
 --verbose=::
         Verbosity level.
@@ -115,6 +118,9 @@
 --syscalls::
 	Trace system calls. This options is enabled by default.
 
+--event::
+	Trace other events, see 'perf list' for a complete list.
+
 PAGEFAULTS
 ----------
 
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index 1e8e400..2b13177 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -13,11 +13,16 @@
 OPTIONS
 -------
 --debug::
-	Setup debug variable (just verbose for now) in value
+	Setup debug variable (see list below) in value
 	range (0, 10). Use like:
 	  --debug verbose   # sets verbose = 1
 	  --debug verbose=2 # sets verbose = 2
 
+	List of debug variables allowed to set:
+	  verbose          - general debug messages
+	  ordered-events   - ordered events object debug messages
+	  data-convert     - data convert command debug messages
+
 --buildid-dir::
 	Setup buildid cache directory. It has higher priority than
 	buildid.dir config file option.
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index fbbfdc3..11ccbb2 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,5 +1,6 @@
 tools/perf
 tools/scripts
+tools/build
 tools/lib/traceevent
 tools/lib/api
 tools/lib/symbol/kallsyms.c
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index cb2e586..c699dc3 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -24,8 +24,8 @@
 # (To override it, run 'make JOBS=1' and similar.)
 #
 ifeq ($(JOBS),)
-  JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null)
-  ifeq ($(JOBS),)
+  JOBS := $(shell egrep -c '^processor|^CPU' /proc/cpuinfo 2>/dev/null)
+  ifeq ($(JOBS),0)
     JOBS := 1
   endif
 endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index aa6a504..c43a205 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -68,7 +68,11 @@
 # for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode
 #
 # Define NO_ZLIB if you do not want to support compressed kernel modules
-
+#
+# Define LIBBABELTRACE if you DO want libbabeltrace support
+# for CTF data format.
+#
+# Define NO_LZMA if you do not want to support compressed (xz) kernel modules
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(shell pwd)))
@@ -82,13 +86,29 @@
 
 ifneq ($(OUTPUT),)
 #$(info Determined 'OUTPUT' to be $(OUTPUT))
+# Adding $(OUTPUT) as a directory to look for source files,
+# because use generated output files as sources dependency
+# for flex/bison parsers.
+VPATH += $(OUTPUT)
+export VPATH
 endif
 
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
 $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
-	@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
-	@touch $(OUTPUT)PERF-VERSION-FILE
+	$(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
+	$(Q)touch $(OUTPUT)PERF-VERSION-FILE
 
 CC = $(CROSS_COMPILE)gcc
+LD = $(CROSS_COMPILE)ld
 AR = $(CROSS_COMPILE)ar
 PKG_CONFIG = $(CROSS_COMPILE)pkg-config
 
@@ -127,10 +147,6 @@
 SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
 
 # Guard against environment variables
-BUILTIN_OBJS =
-LIB_H =
-LIB_OBJS =
-GTK_OBJS =
 PYRF_OBJS =
 SCRIPT_SH =
 
@@ -155,8 +171,8 @@
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
 export LIBTRACEEVENT
 
-LIBAPIKFS = $(LIB_PATH)libapikfs.a
-export LIBAPIKFS
+LIBAPI = $(LIB_PATH)libapi.a
+export LIBAPI
 
 # python extension build directories
 PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
@@ -167,7 +183,7 @@
 python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
 
 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
-PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPIKFS)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
 
 $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
 	$(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
@@ -206,297 +222,9 @@
 
 export PERL_PATH
 
-$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
-	$(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
-
-$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
-	$(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
-
-$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
-	$(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
-
-$(OUTPUT)util/pmu-bison.c: util/pmu.y
-	$(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
-
-$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
-$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
-
 LIB_FILE=$(OUTPUT)libperf.a
 
-LIB_H += ../lib/symbol/kallsyms.h
-LIB_H += ../../include/uapi/linux/perf_event.h
-LIB_H += ../../include/linux/rbtree.h
-LIB_H += ../../include/linux/list.h
-LIB_H += ../../include/uapi/linux/const.h
-LIB_H += ../include/linux/hash.h
-LIB_H += ../../include/linux/stringify.h
-LIB_H += util/include/linux/bitmap.h
-LIB_H += ../include/linux/bitops.h
-LIB_H += ../include/asm-generic/bitops/arch_hweight.h
-LIB_H += ../include/asm-generic/bitops/atomic.h
-LIB_H += ../include/asm-generic/bitops/const_hweight.h
-LIB_H += ../include/asm-generic/bitops/find.h
-LIB_H += ../include/asm-generic/bitops/fls64.h
-LIB_H += ../include/asm-generic/bitops/fls.h
-LIB_H += ../include/asm-generic/bitops/__ffs.h
-LIB_H += ../include/asm-generic/bitops/__fls.h
-LIB_H += ../include/asm-generic/bitops/hweight.h
-LIB_H += ../include/asm-generic/bitops.h
-LIB_H += ../include/linux/compiler.h
-LIB_H += ../include/linux/log2.h
-LIB_H += util/include/linux/const.h
-LIB_H += util/include/linux/ctype.h
-LIB_H += util/include/linux/kernel.h
-LIB_H += util/include/linux/list.h
-LIB_H += ../include/linux/export.h
-LIB_H += util/include/linux/poison.h
-LIB_H += util/include/linux/rbtree.h
-LIB_H += util/include/linux/rbtree_augmented.h
-LIB_H += util/include/linux/string.h
-LIB_H += ../include/linux/types.h
-LIB_H += util/include/linux/linkage.h
-LIB_H += util/include/asm/asm-offsets.h
-LIB_H += ../include/asm/bug.h
-LIB_H += util/include/asm/byteorder.h
-LIB_H += util/include/asm/swab.h
-LIB_H += util/include/asm/system.h
-LIB_H += util/include/asm/uaccess.h
-LIB_H += util/include/dwarf-regs.h
-LIB_H += util/include/asm/dwarf2.h
-LIB_H += util/include/asm/cpufeature.h
-LIB_H += util/include/asm/unistd_32.h
-LIB_H += util/include/asm/unistd_64.h
-LIB_H += perf.h
-LIB_H += util/annotate.h
-LIB_H += util/cache.h
-LIB_H += util/callchain.h
-LIB_H += util/build-id.h
-LIB_H += util/db-export.h
-LIB_H += util/debug.h
-LIB_H += util/pmu.h
-LIB_H += util/event.h
-LIB_H += util/evsel.h
-LIB_H += util/evlist.h
-LIB_H += util/exec_cmd.h
-LIB_H += util/find-vdso-map.c
-LIB_H += util/levenshtein.h
-LIB_H += util/machine.h
-LIB_H += util/map.h
-LIB_H += util/parse-options.h
-LIB_H += util/parse-events.h
-LIB_H += util/quote.h
-LIB_H += util/util.h
-LIB_H += util/xyarray.h
-LIB_H += util/header.h
-LIB_H += util/help.h
-LIB_H += util/session.h
-LIB_H += util/ordered-events.h
-LIB_H += util/strbuf.h
-LIB_H += util/strlist.h
-LIB_H += util/strfilter.h
-LIB_H += util/svghelper.h
-LIB_H += util/tool.h
-LIB_H += util/run-command.h
-LIB_H += util/sigchain.h
-LIB_H += util/dso.h
-LIB_H += util/symbol.h
-LIB_H += util/color.h
-LIB_H += util/values.h
-LIB_H += util/sort.h
-LIB_H += util/hist.h
-LIB_H += util/comm.h
-LIB_H += util/thread.h
-LIB_H += util/thread_map.h
-LIB_H += util/trace-event.h
-LIB_H += util/probe-finder.h
-LIB_H += util/dwarf-aux.h
-LIB_H += util/probe-event.h
-LIB_H += util/pstack.h
-LIB_H += util/cpumap.h
-LIB_H += util/top.h
-LIB_H += $(ARCH_INCLUDE)
-LIB_H += util/cgroup.h
-LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h
-LIB_H += util/target.h
-LIB_H += util/rblist.h
-LIB_H += util/intlist.h
-LIB_H += util/perf_regs.h
-LIB_H += util/unwind.h
-LIB_H += util/vdso.h
-LIB_H += util/tsc.h
-LIB_H += ui/helpline.h
-LIB_H += ui/progress.h
-LIB_H += ui/util.h
-LIB_H += ui/ui.h
-LIB_H += util/data.h
-LIB_H += util/kvm-stat.h
-LIB_H += util/thread-stack.h
-
-LIB_OBJS += $(OUTPUT)util/abspath.o
-LIB_OBJS += $(OUTPUT)util/alias.o
-LIB_OBJS += $(OUTPUT)util/annotate.o
-LIB_OBJS += $(OUTPUT)util/build-id.o
-LIB_OBJS += $(OUTPUT)util/config.o
-LIB_OBJS += $(OUTPUT)util/ctype.o
-LIB_OBJS += $(OUTPUT)util/db-export.o
-LIB_OBJS += $(OUTPUT)util/pmu.o
-LIB_OBJS += $(OUTPUT)util/environment.o
-LIB_OBJS += $(OUTPUT)util/event.o
-LIB_OBJS += $(OUTPUT)util/evlist.o
-LIB_OBJS += $(OUTPUT)util/evsel.o
-LIB_OBJS += $(OUTPUT)util/exec_cmd.o
-LIB_OBJS += $(OUTPUT)util/find_next_bit.o
-LIB_OBJS += $(OUTPUT)util/help.o
-LIB_OBJS += $(OUTPUT)util/kallsyms.o
-LIB_OBJS += $(OUTPUT)util/levenshtein.o
-LIB_OBJS += $(OUTPUT)util/parse-options.o
-LIB_OBJS += $(OUTPUT)util/parse-events.o
-LIB_OBJS += $(OUTPUT)util/path.o
-LIB_OBJS += $(OUTPUT)util/rbtree.o
-LIB_OBJS += $(OUTPUT)util/bitmap.o
-LIB_OBJS += $(OUTPUT)util/hweight.o
-LIB_OBJS += $(OUTPUT)util/run-command.o
-LIB_OBJS += $(OUTPUT)util/quote.o
-LIB_OBJS += $(OUTPUT)util/strbuf.o
-LIB_OBJS += $(OUTPUT)util/string.o
-LIB_OBJS += $(OUTPUT)util/strlist.o
-LIB_OBJS += $(OUTPUT)util/strfilter.o
-LIB_OBJS += $(OUTPUT)util/top.o
-LIB_OBJS += $(OUTPUT)util/usage.o
-LIB_OBJS += $(OUTPUT)util/wrapper.o
-LIB_OBJS += $(OUTPUT)util/sigchain.o
-LIB_OBJS += $(OUTPUT)util/dso.o
-LIB_OBJS += $(OUTPUT)util/symbol.o
-LIB_OBJS += $(OUTPUT)util/symbol-elf.o
-LIB_OBJS += $(OUTPUT)util/color.o
-LIB_OBJS += $(OUTPUT)util/pager.o
-LIB_OBJS += $(OUTPUT)util/header.o
-LIB_OBJS += $(OUTPUT)util/callchain.o
-LIB_OBJS += $(OUTPUT)util/values.o
-LIB_OBJS += $(OUTPUT)util/debug.o
-LIB_OBJS += $(OUTPUT)util/machine.o
-LIB_OBJS += $(OUTPUT)util/map.o
-LIB_OBJS += $(OUTPUT)util/pstack.o
-LIB_OBJS += $(OUTPUT)util/session.o
-LIB_OBJS += $(OUTPUT)util/ordered-events.o
-LIB_OBJS += $(OUTPUT)util/comm.o
-LIB_OBJS += $(OUTPUT)util/thread.o
-LIB_OBJS += $(OUTPUT)util/thread_map.o
-LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
-LIB_OBJS += $(OUTPUT)util/parse-events-flex.o
-LIB_OBJS += $(OUTPUT)util/parse-events-bison.o
-LIB_OBJS += $(OUTPUT)util/pmu-flex.o
-LIB_OBJS += $(OUTPUT)util/pmu-bison.o
-LIB_OBJS += $(OUTPUT)util/trace-event-read.o
-LIB_OBJS += $(OUTPUT)util/trace-event-info.o
-LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o
-LIB_OBJS += $(OUTPUT)util/trace-event.o
-LIB_OBJS += $(OUTPUT)util/svghelper.o
-LIB_OBJS += $(OUTPUT)util/sort.o
-LIB_OBJS += $(OUTPUT)util/hist.o
-LIB_OBJS += $(OUTPUT)util/probe-event.o
-LIB_OBJS += $(OUTPUT)util/util.o
-LIB_OBJS += $(OUTPUT)util/xyarray.o
-LIB_OBJS += $(OUTPUT)util/cpumap.o
-LIB_OBJS += $(OUTPUT)util/cgroup.o
-LIB_OBJS += $(OUTPUT)util/target.o
-LIB_OBJS += $(OUTPUT)util/rblist.o
-LIB_OBJS += $(OUTPUT)util/intlist.o
-LIB_OBJS += $(OUTPUT)util/vdso.o
-LIB_OBJS += $(OUTPUT)util/stat.o
-LIB_OBJS += $(OUTPUT)util/record.o
-LIB_OBJS += $(OUTPUT)util/srcline.o
-LIB_OBJS += $(OUTPUT)util/data.o
-LIB_OBJS += $(OUTPUT)util/tsc.o
-LIB_OBJS += $(OUTPUT)util/cloexec.o
-LIB_OBJS += $(OUTPUT)util/thread-stack.o
-
-LIB_OBJS += $(OUTPUT)ui/setup.o
-LIB_OBJS += $(OUTPUT)ui/helpline.o
-LIB_OBJS += $(OUTPUT)ui/progress.o
-LIB_OBJS += $(OUTPUT)ui/util.o
-LIB_OBJS += $(OUTPUT)ui/hist.o
-LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
-
-LIB_OBJS += $(OUTPUT)arch/common.o
-
-LIB_OBJS += $(OUTPUT)tests/parse-events.o
-LIB_OBJS += $(OUTPUT)tests/dso-data.o
-LIB_OBJS += $(OUTPUT)tests/attr.o
-LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o
-LIB_OBJS += $(OUTPUT)tests/open-syscall.o
-LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o
-LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o
-LIB_OBJS += $(OUTPUT)tests/mmap-basic.o
-LIB_OBJS += $(OUTPUT)tests/perf-record.o
-LIB_OBJS += $(OUTPUT)tests/rdpmc.o
-LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
-LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
-LIB_OBJS += $(OUTPUT)tests/fdarray.o
-LIB_OBJS += $(OUTPUT)tests/pmu.o
-LIB_OBJS += $(OUTPUT)tests/hists_common.o
-LIB_OBJS += $(OUTPUT)tests/hists_link.o
-LIB_OBJS += $(OUTPUT)tests/hists_filter.o
-LIB_OBJS += $(OUTPUT)tests/hists_output.o
-LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
-LIB_OBJS += $(OUTPUT)tests/python-use.o
-LIB_OBJS += $(OUTPUT)tests/bp_signal.o
-LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
-LIB_OBJS += $(OUTPUT)tests/task-exit.o
-LIB_OBJS += $(OUTPUT)tests/sw-clock.o
-ifeq ($(ARCH),x86)
-LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
-endif
-LIB_OBJS += $(OUTPUT)tests/code-reading.o
-LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
-LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
-ifndef NO_DWARF_UNWIND
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
-LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
-endif
-endif
-LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
-LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
-LIB_OBJS += $(OUTPUT)tests/switch-tracking.o
-
-BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
-BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
-# Benchmark modules
-BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
-BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
-ifeq ($(ARCH), x86)
-ifeq ($(IS_64_BIT), 1)
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
-endif
-endif
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
-BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
-BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
-BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
-
-BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
-BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
-BUILTIN_OBJS += $(OUTPUT)builtin-help.o
-BUILTIN_OBJS += $(OUTPUT)builtin-sched.o
-BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o
-BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o
-BUILTIN_OBJS += $(OUTPUT)builtin-list.o
-BUILTIN_OBJS += $(OUTPUT)builtin-record.o
-BUILTIN_OBJS += $(OUTPUT)builtin-report.o
-BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
-BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
-BUILTIN_OBJS += $(OUTPUT)builtin-top.o
-BUILTIN_OBJS += $(OUTPUT)builtin-script.o
-BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
-BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
-BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
-BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
-BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
-BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
-BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
-
-PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT)
+PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT)
 
 # We choose to avoid "if .. else if .. else .. endif endif"
 # because maintaining the nesting to match is a pain.  If
@@ -508,67 +236,9 @@
   CFLAGS += -I$(OUTPUT)
 endif
 
-ifdef NO_LIBELF
-# Remove ELF/DWARF dependent codes
-LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS))
-
-BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS))
-
-# Use minimal symbol handling
-LIB_OBJS += $(OUTPUT)util/symbol-minimal.o
-
-else # NO_LIBELF
-ifndef NO_DWARF
-  LIB_OBJS += $(OUTPUT)util/probe-finder.o
-  LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
-endif # NO_DWARF
-endif # NO_LIBELF
-
-ifndef NO_LIBDW_DWARF_UNWIND
-  LIB_OBJS += $(OUTPUT)util/unwind-libdw.o
-  LIB_H += util/unwind-libdw.h
-endif
-
-ifndef NO_LIBUNWIND
-  LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o
-endif
-LIB_OBJS += $(OUTPUT)tests/keep-tracking.o
-
-ifndef NO_LIBAUDIT
-  BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
-endif
-
-ifndef NO_SLANG
-  LIB_OBJS += $(OUTPUT)ui/browser.o
-  LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
-  LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
-  LIB_OBJS += $(OUTPUT)ui/browsers/map.o
-  LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
-  LIB_OBJS += $(OUTPUT)ui/browsers/header.o
-  LIB_OBJS += $(OUTPUT)ui/tui/setup.o
-  LIB_OBJS += $(OUTPUT)ui/tui/util.o
-  LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
-  LIB_OBJS += $(OUTPUT)ui/tui/progress.o
-  LIB_H += ui/tui/tui.h
-  LIB_H += ui/browser.h
-  LIB_H += ui/browsers/map.h
-  LIB_H += ui/keysyms.h
-  LIB_H += ui/libslang.h
-endif
-
 ifndef NO_GTK2
   ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
-
-  GTK_OBJS += $(OUTPUT)ui/gtk/browser.o
-  GTK_OBJS += $(OUTPUT)ui/gtk/hists.o
-  GTK_OBJS += $(OUTPUT)ui/gtk/setup.o
-  GTK_OBJS += $(OUTPUT)ui/gtk/util.o
-  GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o
-  GTK_OBJS += $(OUTPUT)ui/gtk/progress.o
-  GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o
+  GTK_IN := $(OUTPUT)gtk-in.o
 
 install-gtk: $(OUTPUT)libperf-gtk.so
 	$(call QUIET_INSTALL, 'GTK UI') \
@@ -576,31 +246,6 @@
 		$(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)'
 endif
 
-ifndef NO_LIBPERL
-  LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
-  LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
-endif
-
-ifndef NO_LIBPYTHON
-  LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
-  LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
-endif
-
-ifeq ($(NO_PERF_REGS),0)
-  ifeq ($(ARCH),x86)
-    LIB_H += arch/x86/include/perf_regs.h
-  endif
-  LIB_OBJS += $(OUTPUT)util/perf_regs.o
-endif
-
-ifndef NO_LIBNUMA
-  BUILTIN_OBJS += $(OUTPUT)bench/numa.o
-endif
-
-ifndef NO_ZLIB
-  LIB_OBJS += $(OUTPUT)util/zlib.o
-endif
-
 ifdef ASCIIDOC8
   export ASCIIDOC8
 endif
@@ -616,40 +261,30 @@
 all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
 
 please_set_SHELL_PATH_to_a_more_modern_shell:
-	@$$(:)
+	$(Q)$$(:)
 
 shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
 
 strip: $(PROGRAMS) $(OUTPUT)perf
 	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
 
-$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
-		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
-		$(CFLAGS) -c $(filter %.c,$^) -o $@
+PERF_IN := $(OUTPUT)perf-in.o
 
-$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \
-               $(BUILTIN_OBJS) $(LIBS) -o $@
+export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 
-$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H)
-	$(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $<
+$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
+	$(Q)$(MAKE) $(build)=perf
 
-$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS)
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@
+
+$(GTK_IN): FORCE
+	$(Q)$(MAKE) $(build)=gtk
+
+$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
 	$(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
 
-$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
-		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
-		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-
-$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
-		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
-		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-
 $(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
 
 $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
@@ -659,8 +294,7 @@
 	$(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
 
 # These can record PERF_VERSION
-$(OUTPUT)perf.o perf.spec \
-	$(SCRIPTS) \
+perf.spec $(SCRIPTS) \
 	: $(OUTPUT)PERF-VERSION-FILE
 
 .SUFFIXES:
@@ -683,90 +317,33 @@
 # These two need to be here so that when O= is not used they take precedence
 # over the general rule for .o
 
-$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $<
+# get relative building directory (to $(OUTPUT))
+# and '.' if it's $(OUTPUT) itself
+__build-dir = $(subst $(OUTPUT),,$(dir $@))
+build-dir   = $(if $(__build-dir),$(__build-dir),.)
 
-$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $<
+single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
 
-$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
-$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
-$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $<
-$(OUTPUT)%.o: %.S
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
-$(OUTPUT)%.s: %.S
-	$(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
+$(OUTPUT)%.o: %.c single_dep FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
-		'-DPREFIX="$(prefix_SQ)"' \
-		$<
+$(OUTPUT)%.i: %.c single_dep FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		'-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
-		$<
+$(OUTPUT)%.s: %.c single_dep FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		-DPYTHONPATH='"$(OUTPUT)python"' \
-		-DPYTHON='"$(PYTHON_WORD)"' \
-		$<
+$(OUTPUT)%-bison.o: %.c single_dep FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $<
+$(OUTPUT)%-flex.o: %.c single_dep FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+$(OUTPUT)%.o: %.S single_dep FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $<
-
-$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
-
-$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-$(OUTPUT)util/hweight.o: ../../lib/hweight.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
-
-$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $<
-
-$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $<
-
-$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
-
-$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
+$(OUTPUT)%.i: %.S single_dep FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
 $(OUTPUT)perf-%: %.o $(PERFLIBS)
 	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
@@ -781,58 +358,34 @@
 	$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
 endif
 
-$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
-$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
+$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
-# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
-# we depend the various files onto their directories.
-DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS)
-DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
-# no need to add flex objects, because they depend on bison ones
-DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c
-DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c
+LIBPERF_IN := $(OUTPUT)libperf-in.o
 
-OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS)))
+$(LIBPERF_IN): FORCE
+	$(Q)$(MAKE) $(build)=libperf
 
-$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES)
-# In the second step, we make a rule to actually create these directories
-$(OUTPUT_DIRECTORIES):
-	$(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
+$(LIB_FILE): $(LIBPERF_IN)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
 
-$(LIB_FILE): $(LIB_OBJS)
-	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
-
-# libtraceevent.a
-TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch])
-
-LIBTRACEEVENT_FLAGS  = $(QUIET_SUBDIR1) O=$(OUTPUT)
-LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)"
 LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
 
-$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS
-	$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins
+$(LIBTRACEEVENT): FORCE
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins
 
 $(LIBTRACEEVENT)-clean:
 	$(call QUIET_CLEAN, libtraceevent)
-	@$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
 
 install-traceevent-plugins: $(LIBTRACEEVENT)
-	$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
 
-LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch])
+$(LIBAPI): FORCE
+	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
 
-# if subdir is set, we've been called from above so target has been built
-# already
-$(LIBAPIKFS): $(LIBAPIKFS_SOURCES)
-ifeq ($(subdir),)
-	$(QUIET_SUBDIR0)$(LIB_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libapikfs.a
-endif
-
-$(LIBAPIKFS)-clean:
-ifeq ($(subdir),)
-	$(call QUIET_CLEAN, libapikfs)
-	@$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
-endif
+$(LIBAPI)-clean:
+	$(call QUIET_CLEAN, libapi)
+	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
 
 help:
 	@echo 'Perf make targets:'
@@ -888,17 +441,6 @@
 	$(QUIET_GEN)$(RM) cscope*; \
 	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
 
-### Detect prefix changes
-TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
-             $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ)
-
-$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
-	@FLAGS='$(TRACK_CFLAGS)'; \
-	    if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
-		echo 1>&2 "  FLAGS:   * new build flags or prefix"; \
-		echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
-            fi
-
 ### Testing rules
 
 # GNU make supports exporting all variables by "export" without parameters.
@@ -981,12 +523,14 @@
 #
 config-clean:
 	$(call QUIET_CLEAN, config)
-	@$(MAKE) -C config/feature-checks clean >/dev/null
+	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
 
-clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean
-	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS)
+clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean
+	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
+	$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+	$(Q)$(RM) .config-detected
 	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
-	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
+	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
 	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
 	$(python-clean)
 
@@ -1000,7 +544,9 @@
     GIT-HEAD-PHONY =
 endif
 
+FORCE:
+
 .PHONY: all install clean config-clean strip install-gtk
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
-.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
+.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep
 
diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build
new file mode 100644
index 0000000..109eb75
--- /dev/null
+++ b/tools/perf/arch/Build
@@ -0,0 +1,2 @@
+libperf-y += common.o
+libperf-y += $(ARCH)/
diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build
new file mode 100644
index 0000000..41bf61d
--- /dev/null
+++ b/tools/perf/arch/arm/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
index 09d6215..7fbca17 100644
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile
@@ -1,14 +1,3 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
-endif
-ifndef NO_LIBUNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
-endif
-ifndef NO_LIBDW_DWARF_UNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
-endif
-ifndef NO_DWARF_UNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
 endif
diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build
new file mode 100644
index 0000000..b30eff9
--- /dev/null
+++ b/tools/perf/arch/arm/tests/Build
@@ -0,0 +1,2 @@
+libperf-y += regs_load.o
+libperf-y += dwarf-unwind.o
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
new file mode 100644
index 0000000..d22e3d0
--- /dev/null
+++ b/tools/perf/arch/arm/util/Build
@@ -0,0 +1,4 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+
+libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/tools/perf/arch/arm64/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index 67e9b3d..7fbca17 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -1,7 +1,3 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
-endif
-ifndef NO_LIBUNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
 endif
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
new file mode 100644
index 0000000..e58123a8
--- /dev/null
+++ b/tools/perf/arch/arm64/util/Build
@@ -0,0 +1,2 @@
+libperf-$(CONFIG_DWARF)     += dwarf-regs.o
+libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/tools/perf/arch/powerpc/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 6f7782b..7fbca17 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,6 +1,3 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o
 endif
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
new file mode 100644
index 0000000..0af6e9b
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/Build
@@ -0,0 +1,4 @@
+libperf-y += header.o
+
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/tools/perf/arch/s390/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
index 798ac73..21322e0 100644
--- a/tools/perf/arch/s390/Makefile
+++ b/tools/perf/arch/s390/Makefile
@@ -1,7 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
 endif
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
 HAVE_KVM_STAT_SUPPORT := 1
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
new file mode 100644
index 0000000..8a61372
--- /dev/null
+++ b/tools/perf/arch/s390/util/Build
@@ -0,0 +1,4 @@
+libperf-y += header.o
+libperf-y += kvm-stat.o
+
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/tools/perf/arch/sh/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile
index 15130b50..7fbca17 100644
--- a/tools/perf/arch/sh/Makefile
+++ b/tools/perf/arch/sh/Makefile
@@ -1,4 +1,3 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
 endif
diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build
new file mode 100644
index 0000000..954e287
--- /dev/null
+++ b/tools/perf/arch/sh/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/tools/perf/arch/sparc/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
index 15130b50..7fbca17 100644
--- a/tools/perf/arch/sparc/Makefile
+++ b/tools/perf/arch/sparc/Makefile
@@ -1,4 +1,3 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
 endif
diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build
new file mode 100644
index 0000000..954e287
--- /dev/null
+++ b/tools/perf/arch/sparc/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build
new file mode 100644
index 0000000..41bf61d
--- /dev/null
+++ b/tools/perf/arch/x86/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 9b21881..21322e0 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -1,19 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
 endif
-ifndef NO_LIBUNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
-endif
-ifndef NO_LIBDW_DWARF_UNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
-endif
-ifndef NO_DWARF_UNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
-endif
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o
-LIB_H += arch/$(ARCH)/util/tsc.h
 HAVE_KVM_STAT_SUPPORT := 1
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
new file mode 100644
index 0000000..b30eff9
--- /dev/null
+++ b/tools/perf/arch/x86/tests/Build
@@ -0,0 +1,2 @@
+libperf-y += regs_load.o
+libperf-y += dwarf-unwind.o
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
new file mode 100644
index 0000000..cfbccc4
--- /dev/null
+++ b/tools/perf/arch/x86/util/Build
@@ -0,0 +1,8 @@
+libperf-y += header.o
+libperf-y += tsc.o
+libperf-y += kvm-stat.o
+
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+
+libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
new file mode 100644
index 0000000..5ce9802
--- /dev/null
+++ b/tools/perf/bench/Build
@@ -0,0 +1,11 @@
+perf-y += sched-messaging.o
+perf-y += sched-pipe.o
+perf-y += mem-memcpy.o
+perf-y += futex-hash.o
+perf-y += futex-wake.o
+perf-y += futex-requeue.o
+
+perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
+perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
+
+perf-$(CONFIG_NUMA) += numa.o
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 747f861..71bf745 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -208,7 +208,7 @@
 			goto out;
 	}
 
-	ret = perf_session__process_events(session, &ann->tool);
+	ret = perf_session__process_events(session);
 	if (ret)
 		goto out;
 
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 50e6b66..d47a0cd 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -125,8 +125,7 @@
 	return ret;
 }
 
-static int build_id_cache__add_kcore(const char *filename, const char *debugdir,
-				     bool force)
+static int build_id_cache__add_kcore(const char *filename, bool force)
 {
 	char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1];
 	char from_dir[PATH_MAX], to_dir[PATH_MAX];
@@ -143,7 +142,7 @@
 		return -1;
 
 	scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s",
-		  debugdir, sbuildid);
+		  buildid_dir, sbuildid);
 
 	if (!force &&
 	    !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) {
@@ -155,7 +154,7 @@
 		return -1;
 
 	scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s",
-		  debugdir, sbuildid, dir);
+		  buildid_dir, sbuildid, dir);
 
 	if (mkdir_p(to_dir, 0755))
 		return -1;
@@ -183,7 +182,7 @@
 	return 0;
 }
 
-static int build_id_cache__add_file(const char *filename, const char *debugdir)
+static int build_id_cache__add_file(const char *filename)
 {
 	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 	u8 build_id[BUILD_ID_SIZE];
@@ -195,16 +194,14 @@
 	}
 
 	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
-	err = build_id_cache__add_s(sbuild_id, debugdir, filename,
+	err = build_id_cache__add_s(sbuild_id, filename,
 				    false, false);
-	if (verbose)
-		pr_info("Adding %s %s: %s\n", sbuild_id, filename,
-			err ? "FAIL" : "Ok");
+	pr_debug("Adding %s %s: %s\n", sbuild_id, filename,
+		 err ? "FAIL" : "Ok");
 	return err;
 }
 
-static int build_id_cache__remove_file(const char *filename,
-				       const char *debugdir)
+static int build_id_cache__remove_file(const char *filename)
 {
 	u8 build_id[BUILD_ID_SIZE];
 	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
@@ -217,10 +214,34 @@
 	}
 
 	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
-	err = build_id_cache__remove_s(sbuild_id, debugdir);
-	if (verbose)
-		pr_info("Removing %s %s: %s\n", sbuild_id, filename,
-			err ? "FAIL" : "Ok");
+	err = build_id_cache__remove_s(sbuild_id);
+	pr_debug("Removing %s %s: %s\n", sbuild_id, filename,
+		 err ? "FAIL" : "Ok");
+
+	return err;
+}
+
+static int build_id_cache__purge_path(const char *pathname)
+{
+	struct strlist *list;
+	struct str_node *pos;
+	int err;
+
+	err = build_id_cache__list_build_ids(pathname, &list);
+	if (err)
+		goto out;
+
+	strlist__for_each(pos, list) {
+		err = build_id_cache__remove_s(pos->s);
+		pr_debug("Removing %s %s: %s\n", pos->s, pathname,
+			 err ? "FAIL" : "Ok");
+		if (err)
+			break;
+	}
+	strlist__delete(list);
+
+out:
+	pr_debug("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok");
 
 	return err;
 }
@@ -252,13 +273,12 @@
 	return 0;
 }
 
-static int build_id_cache__update_file(const char *filename,
-				       const char *debugdir)
+static int build_id_cache__update_file(const char *filename)
 {
 	u8 build_id[BUILD_ID_SIZE];
 	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
-	int err;
+	int err = 0;
 
 	if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
 		pr_debug("Couldn't read a build-id in %s\n", filename);
@@ -266,14 +286,14 @@
 	}
 
 	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
-	err = build_id_cache__remove_s(sbuild_id, debugdir);
-	if (!err) {
-		err = build_id_cache__add_s(sbuild_id, debugdir, filename,
-					    false, false);
-	}
-	if (verbose)
-		pr_info("Updating %s %s: %s\n", sbuild_id, filename,
-			err ? "FAIL" : "Ok");
+	if (build_id_cache__cached(sbuild_id))
+		err = build_id_cache__remove_s(sbuild_id);
+
+	if (!err)
+		err = build_id_cache__add_s(sbuild_id, filename, false, false);
+
+	pr_debug("Updating %s %s: %s\n", sbuild_id, filename,
+		 err ? "FAIL" : "Ok");
 
 	return err;
 }
@@ -287,6 +307,7 @@
 	bool force = false;
 	char const *add_name_list_str = NULL,
 		   *remove_name_list_str = NULL,
+		   *purge_name_list_str = NULL,
 		   *missing_filename = NULL,
 		   *update_name_list_str = NULL,
 		   *kcore_filename = NULL;
@@ -304,6 +325,8 @@
 		   "file", "kcore file to add"),
 	OPT_STRING('r', "remove", &remove_name_list_str, "file list",
 		    "file(s) to remove"),
+	OPT_STRING('p', "purge", &purge_name_list_str, "path list",
+		    "path(s) to remove (remove old caches too)"),
 	OPT_STRING('M', "missing", &missing_filename, "file",
 		   "to find missing build ids in the cache"),
 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -320,6 +343,11 @@
 	argc = parse_options(argc, argv, buildid_cache_options,
 			     buildid_cache_usage, 0);
 
+	if (argc || (!add_name_list_str && !kcore_filename &&
+		     !remove_name_list_str && !purge_name_list_str &&
+		     !missing_filename && !update_name_list_str))
+		usage_with_options(buildid_cache_usage, buildid_cache_options);
+
 	if (missing_filename) {
 		file.path = missing_filename;
 		file.force = force;
@@ -338,7 +366,7 @@
 		list = strlist__new(true, add_name_list_str);
 		if (list) {
 			strlist__for_each(pos, list)
-				if (build_id_cache__add_file(pos->s, buildid_dir)) {
+				if (build_id_cache__add_file(pos->s)) {
 					if (errno == EEXIST) {
 						pr_debug("%s already in the cache\n",
 							 pos->s);
@@ -356,7 +384,25 @@
 		list = strlist__new(true, remove_name_list_str);
 		if (list) {
 			strlist__for_each(pos, list)
-				if (build_id_cache__remove_file(pos->s, buildid_dir)) {
+				if (build_id_cache__remove_file(pos->s)) {
+					if (errno == ENOENT) {
+						pr_debug("%s wasn't in the cache\n",
+							 pos->s);
+						continue;
+					}
+					pr_warning("Couldn't remove %s: %s\n",
+						   pos->s, strerror_r(errno, sbuf, sizeof(sbuf)));
+				}
+
+			strlist__delete(list);
+		}
+	}
+
+	if (purge_name_list_str) {
+		list = strlist__new(true, purge_name_list_str);
+		if (list) {
+			strlist__for_each(pos, list)
+				if (build_id_cache__purge_path(pos->s)) {
 					if (errno == ENOENT) {
 						pr_debug("%s wasn't in the cache\n",
 							 pos->s);
@@ -377,7 +423,7 @@
 		list = strlist__new(true, update_name_list_str);
 		if (list) {
 			strlist__for_each(pos, list)
-				if (build_id_cache__update_file(pos->s, buildid_dir)) {
+				if (build_id_cache__update_file(pos->s)) {
 					if (errno == ENOENT) {
 						pr_debug("%s wasn't in the cache\n",
 							 pos->s);
@@ -391,8 +437,7 @@
 		}
 	}
 
-	if (kcore_filename &&
-	    build_id_cache__add_kcore(kcore_filename, buildid_dir, force))
+	if (kcore_filename && build_id_cache__add_kcore(kcore_filename, force))
 		pr_warning("Couldn't add %s\n", kcore_filename);
 
 out:
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index ed3873b..feb420f 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -74,7 +74,7 @@
 	 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
 	 */
 	if (with_hits || perf_data_file__is_pipe(&file))
-		perf_session__process_events(session, &build_id__mark_dso_hit_ops);
+		perf_session__process_events(session);
 
 	perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
 	perf_session__delete(session);
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
new file mode 100644
index 0000000..d6525bc
--- /dev/null
+++ b/tools/perf/builtin-data.c
@@ -0,0 +1,123 @@
+#include <linux/compiler.h>
+#include "builtin.h"
+#include "perf.h"
+#include "debug.h"
+#include "parse-options.h"
+#include "data-convert-bt.h"
+
+typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix);
+
+struct data_cmd {
+	const char	*name;
+	const char	*summary;
+	data_cmd_fn_t	fn;
+};
+
+static struct data_cmd data_cmds[];
+
+#define for_each_cmd(cmd) \
+	for (cmd = data_cmds; cmd && cmd->name; cmd++)
+
+static const struct option data_options[] = {
+	OPT_END()
+};
+
+static const char * const data_subcommands[] = { "convert", NULL };
+
+static const char *data_usage[] = {
+	"perf data [<common options>] <command> [<options>]",
+	NULL
+};
+
+static void print_usage(void)
+{
+	struct data_cmd *cmd;
+
+	printf("Usage:\n");
+	printf("\t%s\n\n", data_usage[0]);
+	printf("\tAvailable commands:\n");
+
+	for_each_cmd(cmd) {
+		printf("\t %s\t- %s\n", cmd->name, cmd->summary);
+	}
+
+	printf("\n");
+}
+
+static const char * const data_convert_usage[] = {
+	"perf data convert [<options>]",
+	NULL
+};
+
+static int cmd_data_convert(int argc, const char **argv,
+			    const char *prefix __maybe_unused)
+{
+	const char *to_ctf     = NULL;
+	bool force = false;
+	const struct option options[] = {
+		OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+		OPT_STRING('i', "input", &input_name, "file", "input file name"),
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+		OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
+#endif
+		OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+		OPT_END()
+	};
+
+#ifndef HAVE_LIBBABELTRACE_SUPPORT
+	pr_err("No conversion support compiled in.\n");
+	return -1;
+#endif
+
+	argc = parse_options(argc, argv, options,
+			     data_convert_usage, 0);
+	if (argc) {
+		usage_with_options(data_convert_usage, options);
+		return -1;
+	}
+
+	if (to_ctf) {
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+		return bt_convert__perf2ctf(input_name, to_ctf, force);
+#else
+		pr_err("The libbabeltrace support is not compiled in.\n");
+		return -1;
+#endif
+	}
+
+	return 0;
+}
+
+static struct data_cmd data_cmds[] = {
+	{ "convert", "converts data file between formats", cmd_data_convert },
+	{ .name = NULL, },
+};
+
+int cmd_data(int argc, const char **argv, const char *prefix)
+{
+	struct data_cmd *cmd;
+	const char *cmdstr;
+
+	/* No command specified. */
+	if (argc < 2)
+		goto usage;
+
+	argc = parse_options_subcommand(argc, argv, data_options, data_subcommands, data_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+	if (argc < 1)
+		goto usage;
+
+	cmdstr = argv[0];
+
+	for_each_cmd(cmd) {
+		if (strcmp(cmd->name, cmdstr))
+			continue;
+
+		return cmd->fn(argc, argv, prefix);
+	}
+
+	pr_err("Unknown command: %s\n", cmdstr);
+usage:
+	print_usage();
+	return -1;
+}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 74aada5..df6307b 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -747,7 +747,7 @@
 			goto out_delete;
 		}
 
-		ret = perf_session__process_events(d->session, &tool);
+		ret = perf_session__process_events(d->session);
 		if (ret) {
 			pr_err("Failed to process %s\n", d->file.path);
 			goto out_delete;
@@ -791,6 +791,8 @@
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+		   "file", "kallsyms pathname"),
 	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
 		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
@@ -802,7 +804,7 @@
 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
 		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
 		   " Please refer the man page for the complete list."),
-	OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
+	OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator",
 		   "separator for columns, no spaces will be added between "
 		   "columns '.' is reserved."),
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 0f93f85..695ec5a 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -24,6 +24,7 @@
 	struct perf_data_file file = {
 		.path = file_name,
 		.mode = PERF_DATA_MODE_READ,
+		.force = details->force,
 	};
 
 	session = perf_session__new(&file, 0, NULL);
@@ -47,6 +48,7 @@
 		    "Show all event attr details"),
 	OPT_BOOLEAN('g', "group", &details.event_group,
 		    "Show event group information"),
+	OPT_BOOLEAN('f', "force", &details.force, "don't complain, do it"),
 	OPT_END()
 	};
 	const char * const evlist_usage[] = {
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 25d2062..36486ea 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -437,7 +437,18 @@
 			HELP_FORMAT_INFO),
 	OPT_END(),
 	};
-	const char * const builtin_help_usage[] = {
+	const char * const builtin_help_subcommands[] = {
+		"buildid-cache", "buildid-list", "diff", "evlist", "help", "list",
+		"record", "report", "bench", "stat", "timechart", "top", "annotate",
+		"script", "sched", "kmem", "lock", "kvm", "test", "inject", "mem", "data",
+#ifdef HAVE_LIBELF_SUPPORT
+		"probe",
+#endif
+#ifdef HAVE_LIBAUDIT_SUPPORT
+		"trace",
+#endif
+	NULL };
+	const char *builtin_help_usage[] = {
 		"perf help [--all] [--man|--web|--info] [command]",
 		NULL
 	};
@@ -448,8 +459,8 @@
 
 	perf_config(perf_help_config, &help_format);
 
-	argc = parse_options(argc, argv, builtin_help_options,
-			builtin_help_usage, 0);
+	argc = parse_options_subcommand(argc, argv, builtin_help_options,
+			builtin_help_subcommands, builtin_help_usage, 0);
 
 	if (show_all) {
 		printf("\n usage: %s\n\n", perf_usage_string);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index a13641e..40a33d7 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -53,6 +53,13 @@
 	return 0;
 }
 
+static int perf_event__repipe_oe_synth(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct ordered_events *oe __maybe_unused)
+{
+	return perf_event__repipe_synth(tool, event);
+}
+
 static int perf_event__repipe_op2_synth(struct perf_tool *tool,
 					union perf_event *event,
 					struct perf_session *session
@@ -359,8 +366,6 @@
 	} else if (inject->sched_stat) {
 		struct perf_evsel *evsel;
 
-		inject->tool.ordered_events = true;
-
 		evlist__for_each(session->evlist, evsel) {
 			const char *name = perf_evsel__name(evsel);
 
@@ -379,7 +384,7 @@
 	if (!file_out->is_pipe)
 		lseek(fd, session->header.data_offset, SEEK_SET);
 
-	ret = perf_session__process_events(session, &inject->tool);
+	ret = perf_session__process_events(session);
 
 	if (!file_out->is_pipe) {
 		if (inject->build_ids)
@@ -408,7 +413,7 @@
 			.unthrottle	= perf_event__repipe,
 			.attr		= perf_event__repipe_attr,
 			.tracing_data	= perf_event__repipe_op2_synth,
-			.finished_round	= perf_event__repipe_op2_synth,
+			.finished_round	= perf_event__repipe_oe_synth,
 			.build_id	= perf_event__repipe_op2_synth,
 			.id_index	= perf_event__repipe_op2_synth,
 		},
@@ -438,6 +443,7 @@
 			 "be more verbose (show build ids, etc)"),
 		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
 			   "kallsyms pathname"),
+		OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
 		OPT_END()
 	};
 	const char * const inject_usage[] = {
@@ -458,6 +464,8 @@
 		return -1;
 	}
 
+	inject.tool.ordered_events = inject.sched_stat;
+
 	file.path = inject.input_name;
 	inject.session = perf_session__new(&file, true, &inject.tool);
 	if (inject.session == NULL)
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index f295141..4ebf65c 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -20,6 +20,7 @@
 
 #include <linux/rbtree.h>
 #include <linux/string.h>
+#include <locale.h>
 
 struct alloc_stat;
 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
@@ -275,10 +276,10 @@
 	struct rb_node *next;
 	struct machine *machine = &session->machines.host;
 
-	printf("%.102s\n", graph_dotted_line);
+	printf("%.105s\n", graph_dotted_line);
 	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
 	printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
-	printf("%.102s\n", graph_dotted_line);
+	printf("%.105s\n", graph_dotted_line);
 
 	next = rb_first(root);
 
@@ -304,7 +305,7 @@
 			snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
 		printf(" %-34s |", buf);
 
-		printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n",
+		printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
 		       (unsigned long long)data->bytes_alloc,
 		       (unsigned long)data->bytes_alloc / data->hit,
 		       (unsigned long long)data->bytes_req,
@@ -317,21 +318,21 @@
 	}
 
 	if (n_lines == -1)
-		printf(" ...                                | ...             | ...             | ...    | ...      | ...   \n");
+		printf(" ...                                | ...             | ...             | ...      | ...       | ...   \n");
 
-	printf("%.102s\n", graph_dotted_line);
+	printf("%.105s\n", graph_dotted_line);
 }
 
 static void print_summary(void)
 {
 	printf("\nSUMMARY\n=======\n");
-	printf("Total bytes requested: %lu\n", total_requested);
-	printf("Total bytes allocated: %lu\n", total_allocated);
-	printf("Total bytes wasted on internal fragmentation: %lu\n",
+	printf("Total bytes requested: %'lu\n", total_requested);
+	printf("Total bytes allocated: %'lu\n", total_allocated);
+	printf("Total bytes wasted on internal fragmentation: %'lu\n",
 	       total_allocated - total_requested);
 	printf("Internal fragmentation: %f%%\n",
 	       fragmentation(total_requested, total_allocated));
-	printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
+	printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
 }
 
 static void print_result(struct perf_session *session)
@@ -426,7 +427,7 @@
 	}
 
 	setup_pager();
-	err = perf_session__process_events(session, &perf_kmem);
+	err = perf_session__process_events(session);
 	if (err != 0)
 		goto out;
 	sort_result();
@@ -559,6 +560,7 @@
 {
 	char *tok;
 	char *str = strdup(arg);
+	char *pos = str;
 
 	if (!str) {
 		pr_err("%s: strdup failed\n", __func__);
@@ -566,7 +568,7 @@
 	}
 
 	while (true) {
-		tok = strsep(&str, ",");
+		tok = strsep(&pos, ",");
 		if (!tok)
 			break;
 		if (sort_dimension__add(tok, sort_list) < 0) {
@@ -660,8 +662,13 @@
 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char * const default_sort_order = "frag,hit,bytes";
+	struct perf_data_file file = {
+		.mode = PERF_DATA_MODE_READ,
+	};
 	const struct option kmem_options[] = {
 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
 	OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
 			   "show per-callsite statistics", parse_caller_opt),
 	OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
@@ -671,6 +678,7 @@
 		     parse_sort_opt),
 	OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
 	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
+	OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
 	OPT_END()
 	};
 	const char *const kmem_subcommands[] = { "record", "stat", NULL };
@@ -679,10 +687,6 @@
 		NULL
 	};
 	struct perf_session *session;
-	struct perf_data_file file = {
-		.path = input_name,
-		.mode = PERF_DATA_MODE_READ,
-	};
 	int ret = -1;
 
 	argc = parse_options_subcommand(argc, argv, kmem_options,
@@ -696,6 +700,8 @@
 		return __cmd_record(argc, argv);
 	}
 
+	file.path = input_name;
+
 	session = perf_session__new(&file, false, &perf_kmem);
 	if (session == NULL)
 		return -1;
@@ -703,6 +709,8 @@
 	symbol__init(&session->header.env);
 
 	if (!strcmp(argv[0], "stat")) {
+		setlocale(LC_ALL, "");
+
 		if (cpu__setup_cpunode_map())
 			goto out_delete;
 
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 0894a81..1f9338f 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -18,6 +18,7 @@
 #include "util/stat.h"
 #include "util/top.h"
 #include "util/data.h"
+#include "util/ordered-events.h"
 
 #include <sys/prctl.h>
 #ifdef HAVE_TIMERFD_SUPPORT
@@ -730,9 +731,9 @@
 			return -1;
 		}
 
-		err = perf_session_queue_event(kvm->session, event, &kvm->tool, &sample, 0);
+		err = perf_session__queue_event(kvm->session, event, &sample, 0);
 		/*
-		 * FIXME: Here we can't consume the event, as perf_session_queue_event will
+		 * FIXME: Here we can't consume the event, as perf_session__queue_event will
 		 *        point to it, and it'll get possibly overwritten by the kernel.
 		 */
 		perf_evlist__mmap_consume(kvm->evlist, idx);
@@ -783,8 +784,10 @@
 
 	/* flush queue after each round in which we processed events */
 	if (ntotal) {
-		kvm->session->ordered_events.next_flush = flush_time;
-		err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session);
+		struct ordered_events *oe = &kvm->session->ordered_events;
+
+		oe->next_flush = flush_time;
+		err = ordered_events__flush(oe, OE_FLUSH__ROUND);
 		if (err) {
 			if (kvm->lost_events)
 				pr_info("\nLost events: %" PRIu64 "\n\n",
@@ -1044,6 +1047,7 @@
 	struct perf_data_file file = {
 		.path = kvm->file_name,
 		.mode = PERF_DATA_MODE_READ,
+		.force = kvm->force,
 	};
 
 	kvm->tool = eops;
@@ -1066,7 +1070,7 @@
 	if (ret < 0)
 		return ret;
 
-	return perf_session__process_events(kvm->session, &kvm->tool);
+	return perf_session__process_events(kvm->session);
 }
 
 static int parse_target_str(struct perf_kvm_stat *kvm)
@@ -1201,6 +1205,7 @@
 			    " time (sort by avg time)"),
 		OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
 			   "analyze events only for given process id(s)"),
+		OPT_BOOLEAN('f', "force", &kvm->force, "don't complain, do it"),
 		OPT_END()
 	};
 
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 198f3c3..af5bd05 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -36,38 +36,36 @@
 
 	setup_pager();
 
-	if (raw_dump) {
-		print_events(NULL, true);
-		return 0;
-	}
+	if (!raw_dump)
+		printf("\nList of pre-defined events (to be used in -e):\n\n");
 
 	if (argc == 0) {
-		print_events(NULL, false);
+		print_events(NULL, raw_dump);
 		return 0;
 	}
 
 	for (i = 0; i < argc; ++i) {
-		if (i)
-			putchar('\n');
-		if (strncmp(argv[i], "tracepoint", 10) == 0)
-			print_tracepoint_events(NULL, NULL, false);
+		if (strcmp(argv[i], "tracepoint") == 0)
+			print_tracepoint_events(NULL, NULL, raw_dump);
 		else if (strcmp(argv[i], "hw") == 0 ||
 			 strcmp(argv[i], "hardware") == 0)
-			print_events_type(PERF_TYPE_HARDWARE);
+			print_symbol_events(NULL, PERF_TYPE_HARDWARE,
+					event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
 		else if (strcmp(argv[i], "sw") == 0 ||
 			 strcmp(argv[i], "software") == 0)
-			print_events_type(PERF_TYPE_SOFTWARE);
+			print_symbol_events(NULL, PERF_TYPE_SOFTWARE,
+					event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
 		else if (strcmp(argv[i], "cache") == 0 ||
 			 strcmp(argv[i], "hwcache") == 0)
-			print_hwcache_events(NULL, false);
+			print_hwcache_events(NULL, raw_dump);
 		else if (strcmp(argv[i], "pmu") == 0)
-			print_pmu_events(NULL, false);
+			print_pmu_events(NULL, raw_dump);
 		else {
 			char *sep = strchr(argv[i], ':'), *s;
 			int sep_idx;
 
 			if (sep == NULL) {
-				print_events(argv[i], false);
+				print_events(argv[i], raw_dump);
 				continue;
 			}
 			sep_idx = sep - argv[i];
@@ -76,7 +74,7 @@
 				return -1;
 
 			s[sep_idx] = '\0';
-			print_tracepoint_events(s, s + sep_idx + 1, false);
+			print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
 			free(s);
 		}
 	}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index e7ec715..d49c2ab 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -846,6 +846,8 @@
 	{ "lock:lock_release",	 perf_evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
 };
 
+static bool force;
+
 static int __cmd_report(bool display_info)
 {
 	int err = -EINVAL;
@@ -857,6 +859,7 @@
 	struct perf_data_file file = {
 		.path = input_name,
 		.mode = PERF_DATA_MODE_READ,
+		.force = force,
 	};
 
 	session = perf_session__new(&file, false, &eops);
@@ -878,7 +881,7 @@
 	if (select_key())
 		goto out_delete;
 
-	err = perf_session__process_events(session, &eops);
+	err = perf_session__process_events(session);
 	if (err)
 		goto out_delete;
 
@@ -945,6 +948,7 @@
 		    "dump thread list in perf.data"),
 	OPT_BOOLEAN('m', "map", &info_map,
 		    "map of lock instances (address:name table)"),
+	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
 	OPT_END()
 	};
 	const struct option lock_options[] = {
@@ -956,6 +960,7 @@
 	const struct option report_options[] = {
 	OPT_STRING('k', "key", &sort_key, "acquired",
 		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
+	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
 	/* TODO: type */
 	OPT_END()
 	};
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 9b56639..675216e 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -15,6 +15,7 @@
 	char const		*input_name;
 	bool			hide_unresolved;
 	bool			dump_raw;
+	bool			force;
 	int			operation;
 	const char		*cpu_list;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -120,6 +121,7 @@
 	struct perf_data_file file = {
 		.path = input_name,
 		.mode = PERF_DATA_MODE_READ,
+		.force = mem->force,
 	};
 	int err = -EINVAL;
 	int ret;
@@ -141,7 +143,7 @@
 
 	printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
 
-	err = perf_session__process_events(session, &mem->tool);
+	err = perf_session__process_events(session);
 	if (err)
 		return err;
 
@@ -286,10 +288,11 @@
 		   "input file name"),
 	OPT_STRING('C', "cpu", &mem.cpu_list, "cpu",
 		   "list of cpus to profile"),
-	OPT_STRING('x', "field-separator", &symbol_conf.field_sep,
+	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep,
 		   "separator",
 		   "separator for columns, no spaces will be added"
 		   " between columns '.' is reserved."),
+	OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
 	OPT_END()
 	};
 	const char *const mem_subcommands[] = { "record", "report", NULL };
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 921bb69..f7b1af6 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -56,6 +56,7 @@
 	bool mod_events;
 	bool uprobes;
 	bool quiet;
+	bool target_used;
 	int nevents;
 	struct perf_probe_event events[MAX_PROBES];
 	struct strlist *dellist;
@@ -78,6 +79,12 @@
 	}
 
 	pev->uprobes = params.uprobes;
+	if (params.target) {
+		pev->target = strdup(params.target);
+		if (!pev->target)
+			return -ENOMEM;
+		params.target_used = true;
+	}
 
 	/* Parse a perf-probe command into event */
 	ret = parse_perf_probe_command(str, pev);
@@ -102,6 +109,7 @@
 		params.target = strdup(ptr);
 		if (!params.target)
 			return -ENOMEM;
+		params.target_used = false;
 
 		found = 1;
 		buf = ptr + (strlen(ptr) - 3);
@@ -178,7 +186,7 @@
 	int ret = -ENOENT;
 	char *tmp;
 
-	if  (str && !params.target) {
+	if  (str) {
 		if (!strcmp(opt->long_name, "exec"))
 			params.uprobes = true;
 #ifdef HAVE_DWARF_SUPPORT
@@ -200,7 +208,9 @@
 			if (!tmp)
 				return -ENOMEM;
 		}
+		free(params.target);
 		params.target = tmp;
+		params.target_used = false;
 		ret = 0;
 	}
 
@@ -485,9 +495,14 @@
 	}
 
 	if (params.nevents) {
+		/* Ensure the last given target is used */
+		if (params.target && !params.target_used) {
+			pr_warning("  Error: -x/-m must follow the probe definitions.\n");
+			usage_with_options(probe_usage, options);
+		}
+
 		ret = add_perf_probe_events(params.events, params.nevents,
 					    params.max_probe_points,
-					    params.target,
 					    params.force_add);
 		if (ret < 0) {
 			pr_err_with_code("  Error: Failed to add events.", ret);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 404ab34..c3efdfb 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -70,8 +70,8 @@
 static int record__mmap_read(struct record *rec, int idx)
 {
 	struct perf_mmap *md = &rec->evlist->mmap[idx];
-	unsigned int head = perf_mmap__read_head(md);
-	unsigned int old = md->prev;
+	u64 head = perf_mmap__read_head(md);
+	u64 old = md->prev;
 	unsigned char *data = md->base + page_size;
 	unsigned long size;
 	void *buf;
@@ -161,8 +161,9 @@
 		}
 	}
 
-	if (perf_evlist__apply_filters(evlist)) {
-		error("failed to set filter with %d (%s)\n", errno,
+	if (perf_evlist__apply_filters(evlist, &pos)) {
+		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
+			pos->filter, perf_evsel__name(pos), errno,
 			strerror_r(errno, msg, sizeof(msg)));
 		rc = -1;
 		goto out;
@@ -225,7 +226,7 @@
 	 */
 	symbol_conf.ignore_vmlinux_buildid = true;
 
-	return perf_session__process_events(session, &rec->tool);
+	return perf_session__process_events(session);
 }
 
 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
@@ -343,7 +344,7 @@
 	signal(SIGINT, sig_handler);
 	signal(SIGTERM, sig_handler);
 
-	session = perf_session__new(file, false, NULL);
+	session = perf_session__new(file, false, tool);
 	if (session == NULL) {
 		pr_err("Perf session creation failed.\n");
 		return -1;
@@ -658,7 +659,7 @@
 
 static void callchain_debug(void)
 {
-	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
+	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
 
 	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
 
@@ -710,6 +711,90 @@
 	return perf_default_config(var, value, cb);
 }
 
+struct clockid_map {
+	const char *name;
+	int clockid;
+};
+
+#define CLOCKID_MAP(n, c)	\
+	{ .name = n, .clockid = (c), }
+
+#define CLOCKID_END	{ .name = NULL, }
+
+
+/*
+ * Add the missing ones, we need to build on many distros...
+ */
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+#ifndef CLOCK_BOOTTIME
+#define CLOCK_BOOTTIME 7
+#endif
+#ifndef CLOCK_TAI
+#define CLOCK_TAI 11
+#endif
+
+static const struct clockid_map clockids[] = {
+	/* available for all events, NMI safe */
+	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
+	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
+
+	/* available for some events */
+	CLOCKID_MAP("realtime", CLOCK_REALTIME),
+	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
+	CLOCKID_MAP("tai", CLOCK_TAI),
+
+	/* available for the lazy */
+	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
+	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
+	CLOCKID_MAP("real", CLOCK_REALTIME),
+	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
+
+	CLOCKID_END,
+};
+
+static int parse_clockid(const struct option *opt, const char *str, int unset)
+{
+	struct record_opts *opts = (struct record_opts *)opt->value;
+	const struct clockid_map *cm;
+	const char *ostr = str;
+
+	if (unset) {
+		opts->use_clockid = 0;
+		return 0;
+	}
+
+	/* no arg passed */
+	if (!str)
+		return 0;
+
+	/* no setting it twice */
+	if (opts->use_clockid)
+		return -1;
+
+	opts->use_clockid = true;
+
+	/* if its a number, we're done */
+	if (sscanf(str, "%d", &opts->clockid) == 1)
+		return 0;
+
+	/* allow a "CLOCK_" prefix to the name */
+	if (!strncasecmp(str, "CLOCK_", 6))
+		str += 6;
+
+	for (cm = clockids; cm->name; cm++) {
+		if (!strcasecmp(str, cm->name)) {
+			opts->clockid = cm->clockid;
+			return 0;
+		}
+	}
+
+	opts->use_clockid = false;
+	ui__warning("unknown clockid %s, check man page\n", ostr);
+	return -1;
+}
+
 static const char * const __record_usage[] = {
 	"perf record [<options>] [<command>]",
 	"perf record [<options>] -- <command> [<options>]",
@@ -751,9 +836,9 @@
 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
 #else
-const char record_callchain_help[] = CALLCHAIN_HELP "fp";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
 #endif
 
 /*
@@ -839,6 +924,11 @@
 		    "use per-thread mmaps"),
 	OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
 		    "Sample machine registers on interrupt"),
+	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
+		    "Record running/enabled time of read (:S) events"),
+	OPT_CALLBACK('k', "clockid", &record.opts,
+	"clockid", "clockid to use for events, see clock_gettime()",
+	parse_clockid),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2f91094..476cdf7 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -249,6 +249,8 @@
 		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
 		    (sample_type & PERF_SAMPLE_STACK_USER))
 			callchain_param.record_mode = CALLCHAIN_DWARF;
+		else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+			callchain_param.record_mode = CALLCHAIN_LBR;
 		else
 			callchain_param.record_mode = CALLCHAIN_FP;
 	}
@@ -302,7 +304,7 @@
 
 	if (rep->mem_mode) {
 		ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
-		ret += fprintf(fp, "\n# Sort order   : %s", sort_order);
+		ret += fprintf(fp, "\n# Sort order   : %s", sort_order ? : default_mem_sort_order);
 	} else
 		ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events);
 	return ret + fprintf(fp, "\n#\n");
@@ -345,7 +347,7 @@
 static void report__warn_kptr_restrict(const struct report *rep)
 {
 	struct map *kernel_map = rep->session->machines.host.vmlinux_maps[MAP__FUNCTION];
-	struct kmap *kernel_kmap = map__kmap(kernel_map);
+	struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL;
 
 	if (kernel_map == NULL ||
 	    (kernel_map->dso->hit &&
@@ -480,7 +482,7 @@
 	if (ret)
 		return ret;
 
-	ret = perf_session__process_events(session, &rep->tool);
+	ret = perf_session__process_events(session);
 	if (ret)
 		return ret;
 
@@ -667,6 +669,10 @@
 		   "only consider symbols in these dsos"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
 		   "only consider symbols in these comms"),
+	OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
+		   "only consider symbols in these pids"),
+	OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
+		   "only consider symbols in these tids"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
 		   "only consider these symbols"),
 	OPT_STRING(0, "symbol-filter", &report.symbol_filter_str, "filter",
@@ -674,7 +680,7 @@
 	OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
 		   "width[,width...]",
 		   "don't try to adjust column width, use these fixed values"),
-	OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
+	OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator",
 		   "separator for columns, no spaces will be added between "
 		   "columns '.' is reserved."),
 	OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved,
@@ -766,7 +772,7 @@
 	 * 0/1 means the user chose a mode.
 	 */
 	if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) &&
-	    branch_call_mode == -1) {
+	    !branch_call_mode) {
 		sort__mode = SORT_MODE__BRANCH;
 		symbol_conf.cumulate_callchain = false;
 	}
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 891c393..5275bab 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -23,12 +23,13 @@
 #include <semaphore.h>
 #include <pthread.h>
 #include <math.h>
+#include <api/fs/fs.h>
 
 #define PR_SET_NAME		15               /* Set process name */
 #define MAX_CPUS		4096
 #define COMM_LEN		20
 #define SYM_LEN			129
-#define MAX_PID			65536
+#define MAX_PID			1024000
 
 struct sched_atom;
 
@@ -124,7 +125,7 @@
 	struct perf_tool tool;
 	const char	 *sort_order;
 	unsigned long	 nr_tasks;
-	struct task_desc *pid_to_task[MAX_PID];
+	struct task_desc **pid_to_task;
 	struct task_desc **tasks;
 	const struct trace_sched_handler *tp_handler;
 	pthread_mutex_t	 start_work_mutex;
@@ -169,6 +170,7 @@
 	u64		 cpu_last_switched[MAX_CPUS];
 	struct rb_root	 atom_root, sorted_atom_root;
 	struct list_head sort_list, cmp_pid;
+	bool force;
 };
 
 static u64 get_nsecs(void)
@@ -326,8 +328,19 @@
 				      unsigned long pid, const char *comm)
 {
 	struct task_desc *task;
+	static int pid_max;
 
-	BUG_ON(pid >= MAX_PID);
+	if (sched->pid_to_task == NULL) {
+		if (sysctl__read_int("kernel/pid_max", &pid_max) < 0)
+			pid_max = MAX_PID;
+		BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL);
+	}
+	if (pid >= (unsigned long)pid_max) {
+		BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) *
+			sizeof(struct task_desc *))) == NULL);
+		while (pid >= (unsigned long)pid_max)
+			sched->pid_to_task[pid_max++] = NULL;
+	}
 
 	task = sched->pid_to_task[pid];
 
@@ -346,7 +359,7 @@
 
 	sched->pid_to_task[pid] = task;
 	sched->nr_tasks++;
-	sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_task *));
+	sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *));
 	BUG_ON(!sched->tasks);
 	sched->tasks[task->nr] = task;
 
@@ -425,24 +438,45 @@
 	return sum;
 }
 
-static int self_open_counters(void)
+static int self_open_counters(struct perf_sched *sched, unsigned long cur_task)
 {
 	struct perf_event_attr attr;
-	char sbuf[STRERR_BUFSIZE];
+	char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE];
 	int fd;
+	struct rlimit limit;
+	bool need_privilege = false;
 
 	memset(&attr, 0, sizeof(attr));
 
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.config = PERF_COUNT_SW_TASK_CLOCK;
 
+force_again:
 	fd = sys_perf_event_open(&attr, 0, -1, -1,
 				 perf_event_open_cloexec_flag());
 
-	if (fd < 0)
+	if (fd < 0) {
+		if (errno == EMFILE) {
+			if (sched->force) {
+				BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1);
+				limit.rlim_cur += sched->nr_tasks - cur_task;
+				if (limit.rlim_cur > limit.rlim_max) {
+					limit.rlim_max = limit.rlim_cur;
+					need_privilege = true;
+				}
+				if (setrlimit(RLIMIT_NOFILE, &limit) == -1) {
+					if (need_privilege && errno == EPERM)
+						strcpy(info, "Need privilege\n");
+				} else
+					goto force_again;
+			} else
+				strcpy(info, "Have a try with -f option\n");
+		}
 		pr_err("Error: sys_perf_event_open() syscall returned "
-		       "with %d (%s)\n", fd,
-		       strerror_r(errno, sbuf, sizeof(sbuf)));
+		       "with %d (%s)\n%s", fd,
+		       strerror_r(errno, sbuf, sizeof(sbuf)), info);
+		exit(EXIT_FAILURE);
+	}
 	return fd;
 }
 
@@ -460,6 +494,7 @@
 struct sched_thread_parms {
 	struct task_desc  *task;
 	struct perf_sched *sched;
+	int fd;
 };
 
 static void *thread_func(void *ctx)
@@ -470,13 +505,12 @@
 	u64 cpu_usage_0, cpu_usage_1;
 	unsigned long i, ret;
 	char comm2[22];
-	int fd;
+	int fd = parms->fd;
 
 	zfree(&parms);
 
 	sprintf(comm2, ":%s", this_task->comm);
 	prctl(PR_SET_NAME, comm2);
-	fd = self_open_counters();
 	if (fd < 0)
 		return NULL;
 again:
@@ -528,6 +562,7 @@
 		BUG_ON(parms == NULL);
 		parms->task = task = sched->tasks[i];
 		parms->sched = sched;
+		parms->fd = self_open_counters(sched, i);
 		sem_init(&task->sleep_sem, 0, 0);
 		sem_init(&task->ready_for_work, 0, 0);
 		sem_init(&task->work_done_sem, 0, 0);
@@ -572,13 +607,13 @@
 	cpu_usage_1 = get_cpu_usage_nsec_parent();
 	if (!sched->runavg_cpu_usage)
 		sched->runavg_cpu_usage = sched->cpu_usage;
-	sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10;
+	sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat;
 
 	sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
 	if (!sched->runavg_parent_cpu_usage)
 		sched->runavg_parent_cpu_usage = sched->parent_cpu_usage;
-	sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 +
-					 sched->parent_cpu_usage)/10;
+	sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) +
+					 sched->parent_cpu_usage)/sched->replay_repeat;
 
 	ret = pthread_mutex_lock(&sched->start_work_mutex);
 	BUG_ON(ret);
@@ -610,7 +645,7 @@
 	sched->sum_fluct += fluct;
 	if (!sched->run_avg)
 		sched->run_avg = delta;
-	sched->run_avg = (sched->run_avg * 9 + delta) / 10;
+	sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat;
 
 	printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0);
 
@@ -831,7 +866,7 @@
 		return -1;
 	}
 
-	atoms->thread = thread;
+	atoms->thread = thread__get(thread);
 	INIT_LIST_HEAD(&atoms->work_list);
 	__thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid);
 	return 0;
@@ -1439,8 +1474,7 @@
 	return err;
 }
 
-static int perf_sched__read_events(struct perf_sched *sched,
-				   struct perf_session **psession)
+static int perf_sched__read_events(struct perf_sched *sched)
 {
 	const struct perf_evsel_str_handler handlers[] = {
 		{ "sched:sched_switch",	      process_sched_switch_event, },
@@ -1453,7 +1487,9 @@
 	struct perf_data_file file = {
 		.path = input_name,
 		.mode = PERF_DATA_MODE_READ,
+		.force = sched->force,
 	};
+	int rc = -1;
 
 	session = perf_session__new(&file, false, &sched->tool);
 	if (session == NULL) {
@@ -1467,27 +1503,21 @@
 		goto out_delete;
 
 	if (perf_session__has_traces(session, "record -R")) {
-		int err = perf_session__process_events(session, &sched->tool);
+		int err = perf_session__process_events(session);
 		if (err) {
 			pr_err("Failed to process events, error %d", err);
 			goto out_delete;
 		}
 
-		sched->nr_events      = session->stats.nr_events[0];
-		sched->nr_lost_events = session->stats.total_lost;
-		sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST];
+		sched->nr_events      = session->evlist->stats.nr_events[0];
+		sched->nr_lost_events = session->evlist->stats.total_lost;
+		sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST];
 	}
 
-	if (psession)
-		*psession = session;
-	else
-		perf_session__delete(session);
-
-	return 0;
-
+	rc = 0;
 out_delete:
 	perf_session__delete(session);
-	return -1;
+	return rc;
 }
 
 static void print_bad_events(struct perf_sched *sched)
@@ -1515,12 +1545,10 @@
 static int perf_sched__lat(struct perf_sched *sched)
 {
 	struct rb_node *next;
-	struct perf_session *session;
 
 	setup_pager();
 
-	/* save session -- references to threads are held in work_list */
-	if (perf_sched__read_events(sched, &session))
+	if (perf_sched__read_events(sched))
 		return -1;
 
 	perf_sched__sort_lat(sched);
@@ -1537,6 +1565,7 @@
 		work_list = rb_entry(next, struct work_atoms, node);
 		output_lat_thread(sched, work_list);
 		next = rb_next(next);
+		thread__zput(work_list->thread);
 	}
 
 	printf(" -----------------------------------------------------------------------------------------------------------------\n");
@@ -1548,7 +1577,6 @@
 	print_bad_events(sched);
 	printf("\n");
 
-	perf_session__delete(session);
 	return 0;
 }
 
@@ -1557,7 +1585,7 @@
 	sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
 
 	setup_pager();
-	if (perf_sched__read_events(sched, NULL))
+	if (perf_sched__read_events(sched))
 		return -1;
 	print_bad_events(sched);
 	return 0;
@@ -1572,7 +1600,7 @@
 
 	test_calibrations(sched);
 
-	if (perf_sched__read_events(sched, NULL))
+	if (perf_sched__read_events(sched))
 		return -1;
 
 	printf("nr_run_events:        %ld\n", sched->nr_run_events);
@@ -1693,6 +1721,7 @@
 		    "be more verbose (show symbol address, etc)"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
+	OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
 	OPT_END()
 	};
 	const struct option sched_options[] = {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ce304df..58f10b8 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -446,9 +446,9 @@
 }
 
 static void process_event(union perf_event *event, struct perf_sample *sample,
-			  struct perf_evsel *evsel, struct thread *thread,
-			  struct addr_location *al)
+			  struct perf_evsel *evsel, struct addr_location *al)
 {
+	struct thread *thread = al->thread;
 	struct perf_event_attr *attr = &evsel->attr;
 
 	if (output[attr->type].fields == 0)
@@ -549,14 +549,6 @@
 				struct machine *machine)
 {
 	struct addr_location al;
-	struct thread *thread = machine__findnew_thread(machine, sample->pid,
-							sample->tid);
-
-	if (thread == NULL) {
-		pr_debug("problem processing %d event, skipping it.\n",
-			 event->header.type);
-		return -1;
-	}
 
 	if (debug_mode) {
 		if (sample->time < last_timestamp) {
@@ -581,7 +573,7 @@
 	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
 		return 0;
 
-	scripting_ops->process_event(event, sample, evsel, thread, &al);
+	scripting_ops->process_event(event, sample, evsel, &al);
 
 	return 0;
 }
@@ -800,7 +792,7 @@
 		script->tool.mmap2 = process_mmap2_event;
 	}
 
-	ret = perf_session__process_events(script->session, &script->tool);
+	ret = perf_session__process_events(script->session);
 
 	if (debug_mode)
 		pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
@@ -1523,6 +1515,9 @@
 			.ordering_requires_timestamps = true,
 		},
 	};
+	struct perf_data_file file = {
+		.mode = PERF_DATA_MODE_READ,
+	};
 	const struct option options[] = {
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
@@ -1550,7 +1545,7 @@
 		    "When printing symbols do not display call chain"),
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		    "Look for files with symbols relative to this directory"),
-	OPT_CALLBACK('f', "fields", NULL, "str",
+	OPT_CALLBACK('F', "fields", NULL, "str",
 		     "comma separated output fields prepend with 'type:'. "
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
@@ -1562,6 +1557,10 @@
 	OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
 		   "only display events for these comms"),
+	OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
+		   "only consider symbols in these pids"),
+	OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
+		   "only consider symbols in these tids"),
 	OPT_BOOLEAN('I', "show-info", &show_full_info,
 		    "display extended information from perf.data file"),
 	OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@@ -1570,9 +1569,11 @@
 		    "Show the fork/comm/exit events"),
 	OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
 		    "Show the mmap events"),
+	OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
 	OPT_END()
 	};
-	const char * const script_usage[] = {
+	const char * const script_subcommands[] = { "record", "report", NULL };
+	const char *script_usage[] = {
 		"perf script [<options>]",
 		"perf script [<options>] record <script> [<record-options>] <command>",
 		"perf script [<options>] report <script> [script-args]",
@@ -1580,13 +1581,10 @@
 		"perf script [<options>] <top-script> [script-args]",
 		NULL
 	};
-	struct perf_data_file file = {
-		.mode = PERF_DATA_MODE_READ,
-	};
 
 	setup_scripting();
 
-	argc = parse_options(argc, argv, options, script_usage,
+	argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 
 	file.path = input_name;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e598e4e..f7b8218 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -353,39 +353,40 @@
  * more semantic information such as miss/hit ratios,
  * instruction rates, etc:
  */
-static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
+static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
+				int cpu)
 {
 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
-		update_stats(&runtime_nsecs_stats[0], count[0]);
+		update_stats(&runtime_nsecs_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_stats(&runtime_cycles_stats[0], count[0]);
+		update_stats(&runtime_cycles_stats[cpu], count[0]);
 	else if (transaction_run &&
 		 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
-		update_stats(&runtime_cycles_in_tx_stats[0], count[0]);
+		update_stats(&runtime_cycles_in_tx_stats[cpu], count[0]);
 	else if (transaction_run &&
 		 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
-		update_stats(&runtime_transaction_stats[0], count[0]);
+		update_stats(&runtime_transaction_stats[cpu], count[0]);
 	else if (transaction_run &&
 		 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
-		update_stats(&runtime_elision_stats[0], count[0]);
+		update_stats(&runtime_elision_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
+		update_stats(&runtime_stalled_cycles_front_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
-		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
+		update_stats(&runtime_stalled_cycles_back_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-		update_stats(&runtime_branches_stats[0], count[0]);
+		update_stats(&runtime_branches_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
-		update_stats(&runtime_cacherefs_stats[0], count[0]);
+		update_stats(&runtime_cacherefs_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
-		update_stats(&runtime_l1_dcache_stats[0], count[0]);
+		update_stats(&runtime_l1_dcache_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
-		update_stats(&runtime_l1_icache_stats[0], count[0]);
+		update_stats(&runtime_l1_icache_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
-		update_stats(&runtime_ll_cache_stats[0], count[0]);
+		update_stats(&runtime_ll_cache_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
-		update_stats(&runtime_dtlb_cache_stats[0], count[0]);
+		update_stats(&runtime_dtlb_cache_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
-		update_stats(&runtime_itlb_cache_stats[0], count[0]);
+		update_stats(&runtime_itlb_cache_stats[cpu], count[0]);
 }
 
 static void zero_per_pkg(struct perf_evsel *counter)
@@ -447,7 +448,8 @@
 			perf_evsel__compute_deltas(evsel, cpu, count);
 		perf_counts_values__scale(count, scale, NULL);
 		evsel->counts->cpu[cpu] = *count;
-		update_shadow_stats(evsel, count->values);
+		if (aggr_mode == AGGR_NONE)
+			update_shadow_stats(evsel, count->values, cpu);
 		break;
 	case AGGR_GLOBAL:
 		aggr->val += count->val;
@@ -495,7 +497,7 @@
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	update_shadow_stats(counter, count);
+	update_shadow_stats(counter, count, 0);
 
 	return 0;
 }
@@ -510,6 +512,9 @@
 	int ncpus = perf_evsel__nr_cpus(counter);
 	int cpu, thread;
 
+	if (!counter->supported)
+		return -ENOENT;
+
 	if (counter->system_wide)
 		nthreads = 1;
 
@@ -679,8 +684,9 @@
 			unit_width = l;
 	}
 
-	if (perf_evlist__apply_filters(evsel_list)) {
-		error("failed to set filter with %d (%s)\n", errno,
+	if (perf_evlist__apply_filters(evsel_list, &counter)) {
+		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
+			counter->filter, perf_evsel__name(counter), errno,
 			strerror_r(errno, msg, sizeof(msg)));
 		return -1;
 	}
@@ -766,6 +772,19 @@
 	return ret;
 }
 
+static void print_running(u64 run, u64 ena)
+{
+	if (csv_output) {
+		fprintf(output, "%s%" PRIu64 "%s%.2f",
+					csv_sep,
+					run,
+					csv_sep,
+					ena ? 100.0 * run / ena : 100.0);
+	} else if (run != ena) {
+		fprintf(output, "  (%.2f%%)", 100.0 * run / ena);
+	}
+}
+
 static void print_noise_pct(double total, double avg)
 {
 	double pct = rel_stddev_stats(total, avg);
@@ -1076,6 +1095,8 @@
 		if (total) {
 			ratio = avg / total;
 			fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
+		} else {
+			fprintf(output, "                                   ");
 		}
 		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
@@ -1145,6 +1166,8 @@
 		if (total) {
 			ratio = avg / total;
 			fprintf(output, " # %8.3f GHz                    ", ratio);
+		} else {
+			fprintf(output, "                                   ");
 		}
 	} else if (transaction_run &&
 		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
@@ -1249,6 +1272,7 @@
 					fprintf(output, "%s%s",
 						csv_sep, counter->cgrp->name);
 
+				print_running(run, ena);
 				fputc('\n', output);
 				continue;
 			}
@@ -1259,13 +1283,10 @@
 			else
 				abs_printout(id, nr, counter, uval);
 
-			if (!csv_output) {
+			if (!csv_output)
 				print_noise(counter, 1.0);
 
-				if (run != ena)
-					fprintf(output, "  (%.2f%%)",
-						100.0 * run / ena);
-			}
+			print_running(run, ena);
 			fputc('\n', output);
 		}
 	}
@@ -1281,11 +1302,15 @@
 	double avg = avg_stats(&ps->res_stats[0]);
 	int scaled = counter->counts->scaled;
 	double uval;
+	double avg_enabled, avg_running;
+
+	avg_enabled = avg_stats(&ps->res_stats[1]);
+	avg_running = avg_stats(&ps->res_stats[2]);
 
 	if (prefix)
 		fprintf(output, "%s", prefix);
 
-	if (scaled == -1) {
+	if (scaled == -1 || !counter->supported) {
 		fprintf(output, "%*s%s",
 			csv_output ? 0 : 18,
 			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
@@ -1300,6 +1325,7 @@
 		if (counter->cgrp)
 			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
 
+		print_running(avg_running, avg_enabled);
 		fputc('\n', output);
 		return;
 	}
@@ -1313,19 +1339,7 @@
 
 	print_noise(counter, avg);
 
-	if (csv_output) {
-		fputc('\n', output);
-		return;
-	}
-
-	if (scaled) {
-		double avg_enabled, avg_running;
-
-		avg_enabled = avg_stats(&ps->res_stats[1]);
-		avg_running = avg_stats(&ps->res_stats[2]);
-
-		fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
-	}
+	print_running(avg_running, avg_enabled);
 	fprintf(output, "\n");
 }
 
@@ -1367,6 +1381,7 @@
 				fprintf(output, "%s%s",
 					csv_sep, counter->cgrp->name);
 
+			print_running(run, ena);
 			fputc('\n', output);
 			continue;
 		}
@@ -1378,13 +1393,10 @@
 		else
 			abs_printout(cpu, 0, counter, uval);
 
-		if (!csv_output) {
+		if (!csv_output)
 			print_noise(counter, 1.0);
+		print_running(run, ena);
 
-			if (run != ena)
-				fprintf(output, "  (%.2f%%)",
-					100.0 * run / ena);
-		}
 		fputc('\n', output);
 	}
 }
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index f3bb1a4..e50fe11 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -67,6 +67,7 @@
 				skip_eagain;
 	u64			min_time,
 				merge_dist;
+	bool			force;
 };
 
 struct per_pidcomm;
@@ -1598,6 +1599,7 @@
 	struct perf_data_file file = {
 		.path = input_name,
 		.mode = PERF_DATA_MODE_READ,
+		.force = tchart->force,
 	};
 
 	struct perf_session *session = perf_session__new(&file, false,
@@ -1623,7 +1625,7 @@
 		goto out_delete;
 	}
 
-	ret = perf_session__process_events(session, &tchart->tool);
+	ret = perf_session__process_events(session);
 	if (ret)
 		goto out_delete;
 
@@ -1956,9 +1958,11 @@
 	OPT_CALLBACK(0, "io-merge-dist", &tchart.merge_dist, "time",
 		     "merge events that are merge-dist us apart",
 		     parse_time),
+	OPT_BOOLEAN('f', "force", &tchart.force, "don't complain, do it"),
 	OPT_END()
 	};
-	const char * const timechart_usage[] = {
+	const char * const timechart_subcommands[] = { "record", NULL };
+	const char *timechart_usage[] = {
 		"perf timechart [<options>] {record}",
 		NULL
 	};
@@ -1976,8 +1980,8 @@
 		"perf timechart record [<options>]",
 		NULL
 	};
-	argc = parse_options(argc, argv, timechart_options, timechart_usage,
-			PARSE_OPT_STOP_AT_NON_OPTION);
+	argc = parse_options_subcommand(argc, argv, timechart_options, timechart_subcommands,
+			timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 
 	if (tchart.power_only && tchart.tasks_only) {
 		pr_err("-P and -T options cannot be used at the same time.\n");
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c4c7eac..1cb3436 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -716,7 +716,7 @@
 
 	if (!machine) {
 		pr_err("%u unprocessable samples recorded.\r",
-		       top->session->stats.nr_unprocessable_samples++);
+		       top->session->evlist->stats.nr_unprocessable_samples++);
 		return;
 	}
 
@@ -757,8 +757,10 @@
 		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
 		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
 			if (symbol_conf.vmlinux_name) {
-				ui__warning("The %s file can't be used.\n%s",
-					    symbol_conf.vmlinux_name, msg);
+				char serr[256];
+				dso__strerror_load(al.map->dso, serr, sizeof(serr));
+				ui__warning("The %s file can't be used: %s\n%s",
+					    symbol_conf.vmlinux_name, serr, msg);
 			} else {
 				ui__warning("A vmlinux file was not found.\n%s",
 					    msg);
@@ -856,7 +858,7 @@
 			hists__inc_nr_events(evsel__hists(evsel), event->header.type);
 			machine__process_event(machine, event, &sample);
 		} else
-			++session->stats.nr_unknown_events;
+			++session->evlist->stats.nr_unknown_events;
 next_event:
 		perf_evlist__mmap_consume(top->evlist, idx);
 	}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 7e935f10..e124741 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -52,7 +52,9 @@
 #define TP_UINT_FIELD(bits) \
 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 { \
-	return *(u##bits *)(sample->raw_data + field->offset); \
+	u##bits value; \
+	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
+	return value;  \
 }
 
 TP_UINT_FIELD(8);
@@ -63,7 +65,8 @@
 #define TP_UINT_FIELD__SWAPPED(bits) \
 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 { \
-	u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
+	u##bits value; \
+	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 	return bswap_##bits(value);\
 }
 
@@ -1132,6 +1135,8 @@
 
 struct syscall {
 	struct event_format *tp_format;
+	int		    nr_args;
+	struct format_field *args;
 	const char	    *name;
 	bool		    filtered;
 	bool		    is_exit;
@@ -1219,7 +1224,9 @@
 		struct syscall  *table;
 	} syscalls;
 	struct record_opts	opts;
+	struct perf_evlist	*evlist;
 	struct machine		*host;
+	struct thread		*current;
 	u64			base_time;
 	FILE			*output;
 	unsigned long		nr_events;
@@ -1227,6 +1234,10 @@
 	const char 		*last_vfs_getname;
 	struct intlist		*tid_list;
 	struct intlist		*pid_list;
+	struct {
+		size_t		nr;
+		pid_t		*entries;
+	}			filter_pids;
 	double			duration_filter;
 	double			runtime_ms;
 	struct {
@@ -1243,6 +1254,7 @@
 	bool			show_comm;
 	bool			show_tool_stats;
 	bool			trace_syscalls;
+	bool			force;
 	int			trace_pgfaults;
 };
 
@@ -1433,14 +1445,14 @@
 	struct format_field *field;
 	int idx = 0;
 
-	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
+	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
 	if (sc->arg_scnprintf == NULL)
 		return -1;
 
 	if (sc->fmt)
 		sc->arg_parm = sc->fmt->arg_parm;
 
-	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
+	for (field = sc->args; field; field = field->next) {
 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
 		else if (field->flags & FIELD_IS_POINTER)
@@ -1506,18 +1518,37 @@
 	if (sc->tp_format == NULL)
 		return -1;
 
+	sc->args = sc->tp_format->format.fields;
+	sc->nr_args = sc->tp_format->format.nr_fields;
+	/* drop nr field - not relevant here; does not exist on older kernels */
+	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
+		sc->args = sc->args->next;
+		--sc->nr_args;
+	}
+
 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
 
 	return syscall__set_arg_fmts(sc);
 }
 
+/*
+ * args is to be interpreted as a series of longs but we need to handle
+ * 8-byte unaligned accesses. args points to raw_data within the event
+ * and raw_data is guaranteed to be 8-byte unaligned because it is
+ * preceded by raw_size which is a u32. So we need to copy args to a temp
+ * variable to read it. Most notably this avoids extended load instructions
+ * on unaligned addresses
+ */
+
 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
-				      unsigned long *args, struct trace *trace,
+				      unsigned char *args, struct trace *trace,
 				      struct thread *thread)
 {
 	size_t printed = 0;
+	unsigned char *p;
+	unsigned long val;
 
-	if (sc->tp_format != NULL) {
+	if (sc->args != NULL) {
 		struct format_field *field;
 		u8 bit = 1;
 		struct syscall_arg arg = {
@@ -1527,16 +1558,21 @@
 			.thread = thread,
 		};
 
-		for (field = sc->tp_format->format.fields->next; field;
+		for (field = sc->args; field;
 		     field = field->next, ++arg.idx, bit <<= 1) {
 			if (arg.mask & bit)
 				continue;
+
+			/* special care for unaligned accesses */
+			p = args + sizeof(unsigned long) * arg.idx;
+			memcpy(&val, p, sizeof(val));
+
 			/*
  			 * Suppress this argument if its value is zero and
  			 * and we don't have a string associated in an
  			 * strarray for it.
  			 */
-			if (args[arg.idx] == 0 &&
+			if (val == 0 &&
 			    !(sc->arg_scnprintf &&
 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
 			      sc->arg_parm[arg.idx]))
@@ -1545,23 +1581,26 @@
 			printed += scnprintf(bf + printed, size - printed,
 					     "%s%s: ", printed ? ", " : "", field->name);
 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
-				arg.val = args[arg.idx];
+				arg.val = val;
 				if (sc->arg_parm)
 					arg.parm = sc->arg_parm[arg.idx];
 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
 								      size - printed, &arg);
 			} else {
 				printed += scnprintf(bf + printed, size - printed,
-						     "%ld", args[arg.idx]);
+						     "%ld", val);
 			}
 		}
 	} else {
 		int i = 0;
 
 		while (i < 6) {
+			/* special care for unaligned accesses */
+			p = args + sizeof(unsigned long) * i;
+			memcpy(&val, p, sizeof(val));
 			printed += scnprintf(bf + printed, size - printed,
 					     "%sarg%d: %ld",
-					     printed ? ", " : "", i, args[i]);
+					     printed ? ", " : "", i, val);
 			++i;
 		}
 	}
@@ -1642,6 +1681,29 @@
 	update_stats(stats, duration);
 }
 
+static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
+{
+	struct thread_trace *ttrace;
+	u64 duration;
+	size_t printed;
+
+	if (trace->current == NULL)
+		return 0;
+
+	ttrace = thread__priv(trace->current);
+
+	if (!ttrace->entry_pending)
+		return 0;
+
+	duration = sample->time - ttrace->entry_time;
+
+	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
+	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
+	ttrace->entry_pending = false;
+
+	return printed;
+}
+
 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 			    union perf_event *event __maybe_unused,
 			    struct perf_sample *sample)
@@ -1673,6 +1735,9 @@
 			return -1;
 	}
 
+	if (!trace->summary_only)
+		trace__printf_interrupted_entry(trace, sample);
+
 	ttrace->entry_time = sample->time;
 	msg = ttrace->entry_str;
 	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
@@ -1688,6 +1753,11 @@
 	} else
 		ttrace->entry_pending = true;
 
+	if (trace->current != thread) {
+		thread__put(trace->current);
+		trace->current = thread__get(thread);
+	}
+
 	return 0;
 }
 
@@ -1805,6 +1875,28 @@
 	return 0;
 }
 
+static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample)
+{
+	trace__printf_interrupted_entry(trace, sample);
+	trace__fprintf_tstamp(trace, sample->time, trace->output);
+
+	if (trace->trace_syscalls)
+		fprintf(trace->output, "(         ): ");
+
+	fprintf(trace->output, "%s:", evsel->name);
+
+	if (evsel->tp_format) {
+		event_format__fprintf(evsel->tp_format, sample->cpu,
+				      sample->raw_data, sample->raw_size,
+				      trace->output);
+	}
+
+	fprintf(trace->output, ")\n");
+	return 0;
+}
+
 static void print_location(FILE *f, struct perf_sample *sample,
 			   struct addr_location *al,
 			   bool print_dso, bool print_sym)
@@ -2037,10 +2129,39 @@
 	return 0;
 }
 
+static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
+{
+	const u32 type = event->header.type;
+	struct perf_evsel *evsel;
+
+	if (!trace->full_time && trace->base_time == 0)
+		trace->base_time = sample->time;
+
+	if (type != PERF_RECORD_SAMPLE) {
+		trace__process_event(trace, trace->host, event, sample);
+		return;
+	}
+
+	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
+	if (evsel == NULL) {
+		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
+		return;
+	}
+
+	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
+	    sample->raw_data == NULL) {
+		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
+		       perf_evsel__name(evsel), sample->tid,
+		       sample->cpu, sample->raw_size);
+	} else {
+		tracepoint_handler handler = evsel->handler;
+		handler(trace, evsel, event, sample);
+	}
+}
+
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
-	struct perf_evlist *evlist = perf_evlist__new();
-	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = trace->evlist;
 	int err = -1, i;
 	unsigned long before;
 	const bool forks = argc > 0;
@@ -2048,11 +2169,6 @@
 
 	trace->live = true;
 
-	if (evlist == NULL) {
-		fprintf(trace->output, "Not enough memory to run!\n");
-		goto out;
-	}
-
 	if (trace->trace_syscalls &&
 	    perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
 					   trace__sys_exit))
@@ -2105,16 +2221,34 @@
 	if (err < 0)
 		goto out_error_open;
 
+	/*
+	 * Better not use !target__has_task() here because we need to cover the
+	 * case where no threads were specified in the command line, but a
+	 * workload was, and in that case we will fill in the thread_map when
+	 * we fork the workload in perf_evlist__prepare_workload.
+	 */
+	if (trace->filter_pids.nr > 0)
+		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
+	else if (evlist->threads->map[0] == -1)
+		err = perf_evlist__set_filter_pid(evlist, getpid());
+
+	if (err < 0) {
+		printf("err=%d,%s\n", -err, strerror(-err));
+		exit(1);
+	}
+
 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
 	if (err < 0)
 		goto out_error_mmap;
 
-	perf_evlist__enable(evlist);
-
 	if (forks)
 		perf_evlist__start_workload(evlist);
+	else
+		perf_evlist__enable(evlist);
 
-	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
+	trace->multiple_threads = evlist->threads->map[0] == -1 ||
+				  evlist->threads->nr > 1 ||
+				  perf_evlist__first(evlist)->attr.inherit;
 again:
 	before = trace->nr_events;
 
@@ -2122,8 +2256,6 @@
 		union perf_event *event;
 
 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
-			const u32 type = event->header.type;
-			tracepoint_handler handler;
 			struct perf_sample sample;
 
 			++trace->nr_events;
@@ -2134,30 +2266,7 @@
 				goto next_event;
 			}
 
-			if (!trace->full_time && trace->base_time == 0)
-				trace->base_time = sample.time;
-
-			if (type != PERF_RECORD_SAMPLE) {
-				trace__process_event(trace, trace->host, event, &sample);
-				continue;
-			}
-
-			evsel = perf_evlist__id2evsel(evlist, sample.id);
-			if (evsel == NULL) {
-				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
-				goto next_event;
-			}
-
-			if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
-			    sample.raw_data == NULL) {
-				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
-				       perf_evsel__name(evsel), sample.tid,
-				       sample.cpu, sample.raw_size);
-				goto next_event;
-			}
-
-			handler = evsel->handler;
-			handler(trace, evsel, event, &sample);
+			trace__handle_event(trace, event, &sample);
 next_event:
 			perf_evlist__mmap_consume(evlist, i);
 
@@ -2180,6 +2289,8 @@
 	}
 
 out_disable:
+	thread__zput(trace->current);
+
 	perf_evlist__disable(evlist);
 
 	if (!err) {
@@ -2197,7 +2308,7 @@
 
 out_delete_evlist:
 	perf_evlist__delete(evlist);
-out:
+	trace->evlist = NULL;
 	trace->live = false;
 	return err;
 {
@@ -2235,6 +2346,7 @@
 	struct perf_data_file file = {
 		.path  = input_name,
 		.mode  = PERF_DATA_MODE_READ,
+		.force = trace->force,
 	};
 	struct perf_session *session;
 	struct perf_evsel *evsel;
@@ -2309,7 +2421,7 @@
 
 	setup_pager();
 
-	err = perf_session__process_events(session, &trace->tool);
+	err = perf_session__process_events(session);
 	if (err)
 		pr_err("Failed to process events, error %d", err);
 
@@ -2434,6 +2546,38 @@
 	return 0;
 }
 
+static int trace__set_filter_pids(const struct option *opt, const char *str,
+				  int unset __maybe_unused)
+{
+	int ret = -1;
+	size_t i;
+	struct trace *trace = opt->value;
+	/*
+	 * FIXME: introduce a intarray class, plain parse csv and create a
+	 * { int nr, int entries[] } struct...
+	 */
+	struct intlist *list = intlist__new(str);
+
+	if (list == NULL)
+		return -1;
+
+	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
+	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
+
+	if (trace->filter_pids.entries == NULL)
+		goto out;
+
+	trace->filter_pids.entries[0] = getpid();
+
+	for (i = 1; i < trace->filter_pids.nr; ++i)
+		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
+
+	intlist__delete(list);
+	ret = 0;
+out:
+	return ret;
+}
+
 static int trace__open_output(struct trace *trace, const char *filename)
 {
 	struct stat st;
@@ -2468,9 +2612,17 @@
 	return 0;
 }
 
+static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each(evlist, evsel)
+		evsel->handler = handler;
+}
+
 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	const char * const trace_usage[] = {
+	const char *trace_usage[] = {
 		"perf trace [<options>] [<command>]",
 		"perf trace [<options>] -- <command> [<options>]",
 		"perf trace record [<options>] [<command>]",
@@ -2502,6 +2654,9 @@
 	const char *output_name = NULL;
 	const char *ev_qualifier_str = NULL;
 	const struct option trace_options[] = {
+	OPT_CALLBACK(0, "event", &trace.evlist, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events_option),
 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
 		    "show the thread COMM next to its id"),
 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
@@ -2513,6 +2668,8 @@
 		    "trace events on existing process id"),
 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
 		    "trace events on existing thread id"),
+	OPT_CALLBACK(0, "filter-pids", &trace, "float",
+		     "show only events with duration > N.M ms", trace__set_filter_pids),
 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
@@ -2538,19 +2695,36 @@
 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
 		     "Trace pagefaults", parse_pagefaults, "maj"),
 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
+	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
 	OPT_END()
 	};
+	const char * const trace_subcommands[] = { "record", NULL };
 	int err;
 	char bf[BUFSIZ];
 
-	argc = parse_options(argc, argv, trace_options, trace_usage,
-			     PARSE_OPT_STOP_AT_NON_OPTION);
+	signal(SIGSEGV, sighandler_dump_stack);
+	signal(SIGFPE, sighandler_dump_stack);
+
+	trace.evlist = perf_evlist__new();
+	if (trace.evlist == NULL)
+		return -ENOMEM;
+
+	if (trace.evlist == NULL) {
+		pr_err("Not enough memory to run!\n");
+		goto out;
+	}
+
+	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
+				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 
 	if (trace.trace_pgfaults) {
 		trace.opts.sample_address = true;
 		trace.opts.sample_time = true;
 	}
 
+	if (trace.evlist->nr_entries > 0)
+		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
+
 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
 		return trace__record(&trace, argc-1, &argv[1]);
 
@@ -2558,7 +2732,8 @@
 	if (trace.summary_only)
 		trace.summary = trace.summary_only;
 
-	if (!trace.trace_syscalls && !trace.trace_pgfaults) {
+	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
+	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
 		pr_err("Please specify something to trace.\n");
 		return -1;
 	}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index b210d62..3688ad2 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -37,6 +37,7 @@
 extern int cmd_trace(int argc, const char **argv, const char *prefix);
 extern int cmd_inject(int argc, const char **argv, const char *prefix);
 extern int cmd_mem(int argc, const char **argv, const char *prefix);
+extern int cmd_data(int argc, const char **argv, const char *prefix);
 
 extern int find_scripts(char **scripts_array, char **scripts_path_array);
 #endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 0906fc4..00fcaf8 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -7,6 +7,7 @@
 perf-bench			mainporcelain common
 perf-buildid-cache		mainporcelain common
 perf-buildid-list		mainporcelain common
+perf-data			mainporcelain common
 perf-diff			mainporcelain common
 perf-evlist			mainporcelain common
 perf-inject			mainporcelain common
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index cc22408..59a98c6 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -11,19 +11,26 @@
 obj-perf := $(abspath $(obj-perf))/
 endif
 
-LIB_INCLUDE := $(srctree)/tools/lib/
+$(shell echo -n > .config-detected)
+detected     = $(shell echo "$(1)=y"       >> .config-detected)
+detected_var = $(shell echo "$(1)=$($(1))" >> .config-detected)
+
 CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
 
 include $(src-perf)/config/Makefile.arch
 
+$(call detected_var,ARCH)
+
 NO_PERF_REGS := 1
 
 # Additional ARCH settings for x86
 ifeq ($(ARCH),x86)
+  $(call detected,CONFIG_X86)
   ifeq (${IS_64_BIT}, 1)
     CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
     ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
     LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
+    $(call detected,CONFIG_X86_64)
   else
     LIBUNWIND_LIBS = -lunwind -lunwind-x86
   endif
@@ -40,6 +47,10 @@
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
 
+ifeq ($(NO_PERF_REGS),0)
+  $(call detected,CONFIG_PERF_REGS)
+endif
+
 # So far there's only x86 and arm libdw unwind support merged in perf.
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
@@ -84,6 +95,17 @@
   FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
 endif
 
+ifdef LIBBABELTRACE
+  # for linking with debug library, run like:
+  # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
+  ifdef LIBBABELTRACE_DIR
+    LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
+    LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
+  endif
+  FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
+  FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
+endif
+
 # include ARCH specific config
 -include $(src-perf)/arch/$(ARCH)/Makefile
 
@@ -114,6 +136,8 @@
   PARSER_DEBUG_BISON := -t
   PARSER_DEBUG_FLEX  := -d
   CFLAGS             += -DPARSER_DEBUG
+  $(call detected_var,PARSER_DEBUG_BISON)
+  $(call detected_var,PARSER_DEBUG_FLEX)
 endif
 
 ifndef NO_LIBPYTHON
@@ -152,121 +176,7 @@
 
 EXTLIBS = -lpthread -lrt -lm -ldl
 
-ifneq ($(OUTPUT),)
-  OUTPUT_FEATURES = $(OUTPUT)config/feature-checks/
-  $(shell mkdir -p $(OUTPUT_FEATURES))
-endif
-
-feature_check = $(eval $(feature_check_code))
-define feature_check_code
-  feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C config/feature-checks test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
-endef
-
-feature_set = $(eval $(feature_set_code))
-define feature_set_code
-  feature-$(1) := 1
-endef
-
-#
-# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output:
-#
-
-#
-# Note that this is not a complete list of all feature tests, just
-# those that are typically built on a fully configured system.
-#
-# [ Feature tests not mentioned here have to be built explicitly in
-#   the rule that uses them - an example for that is the 'bionic'
-#   feature check. ]
-#
-CORE_FEATURE_TESTS =			\
-	backtrace			\
-	dwarf				\
-	fortify-source			\
-	sync-compare-and-swap		\
-	glibc				\
-	gtk2				\
-	gtk2-infobar			\
-	libaudit			\
-	libbfd				\
-	libelf				\
-	libelf-getphdrnum		\
-	libelf-mmap			\
-	libnuma				\
-	libperl				\
-	libpython			\
-	libpython-version		\
-	libslang			\
-	libunwind			\
-	pthread-attr-setaffinity-np	\
-	stackprotector-all		\
-	timerfd				\
-	libdw-dwarf-unwind		\
-	zlib
-
-LIB_FEATURE_TESTS =			\
-	dwarf				\
-	glibc				\
-	gtk2				\
-	libaudit			\
-	libbfd				\
-	libelf				\
-	libnuma				\
-	libperl				\
-	libpython			\
-	libslang			\
-	libunwind			\
-	libdw-dwarf-unwind		\
-	zlib
-
-VF_FEATURE_TESTS =			\
-	backtrace			\
-	fortify-source			\
-	sync-compare-and-swap		\
-	gtk2-infobar			\
-	libelf-getphdrnum		\
-	libelf-mmap			\
-	libpython-version		\
-	pthread-attr-setaffinity-np	\
-	stackprotector-all		\
-	timerfd				\
-	libunwind-debug-frame		\
-	bionic				\
-	liberty				\
-	liberty-z			\
-	cplus-demangle			\
-	compile-32			\
-	compile-x32
-
-# Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features.
-# If in the future we need per-feature checks/flags for features not
-# mentioned in this list we need to refactor this ;-).
-set_test_all_flags = $(eval $(set_test_all_flags_code))
-define set_test_all_flags_code
-  FEATURE_CHECK_CFLAGS-all  += $(FEATURE_CHECK_CFLAGS-$(1))
-  FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1))
-endef
-
-$(foreach feat,$(CORE_FEATURE_TESTS),$(call set_test_all_flags,$(feat)))
-
-#
-# Special fast-path for the 'all features are available' case:
-#
-$(call feature_check,all,$(MSG))
-
-#
-# Just in case the build freshly failed, make sure we print the
-# feature matrix:
-#
-ifeq ($(feature-all), 1)
-  #
-  # test-all.c passed - just set all the core feature flags to 1:
-  #
-  $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat)))
-else
-  $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(addsuffix .bin,$(CORE_FEATURE_TESTS)) >/dev/null 2>&1)
-  $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat)))
-endif
+include $(srctree)/tools/build/Makefile.feature
 
 ifeq ($(feature-stackprotector-all), 1)
   CFLAGS += -fstack-protector-all
@@ -295,7 +205,7 @@
 
 CFLAGS += -I$(src-perf)/util
 CFLAGS += -I$(src-perf)
-CFLAGS += -I$(LIB_INCLUDE)
+CFLAGS += -I$(srctree)/tools/lib/
 
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 
@@ -361,6 +271,7 @@
 ifndef NO_LIBELF
   CFLAGS += -DHAVE_LIBELF_SUPPORT
   EXTLIBS += -lelf
+  $(call detected,CONFIG_LIBELF)
 
   ifeq ($(feature-libelf-mmap), 1)
     CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
@@ -381,6 +292,7 @@
       CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
       LDFLAGS += $(LIBDW_LDFLAGS)
       EXTLIBS += -ldw
+      $(call detected,CONFIG_DWARF)
     endif # PERF_HAVE_DWARF_REGS
   endif # NO_DWARF
 endif # NO_LIBELF
@@ -408,9 +320,11 @@
     dwarf-post-unwind := 0
   else
     dwarf-post-unwind-text := libdw
+    $(call detected,CONFIG_LIBDW_DWARF_UNWIND)
   endif
 else
   dwarf-post-unwind-text := libunwind
+  $(call detected,CONFIG_LIBUNWIND)
   # Enable libunwind support by default.
   ifndef NO_LIBDW_DWARF_UNWIND
     NO_LIBDW_DWARF_UNWIND := 1
@@ -419,6 +333,7 @@
 
 ifeq ($(dwarf-post-unwind),1)
   CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT
+  $(call detected,CONFIG_DWARF_UNWIND)
 else
   NO_DWARF_UNWIND := 1
 endif
@@ -447,6 +362,7 @@
   else
     CFLAGS += -DHAVE_LIBAUDIT_SUPPORT
     EXTLIBS += -laudit
+    $(call detected,CONFIG_AUDIT)
   endif
 endif
 
@@ -463,6 +379,7 @@
     CFLAGS += -I/usr/include/slang
     CFLAGS += -DHAVE_SLANG_SUPPORT
     EXTLIBS += -lslang
+    $(call detected,CONFIG_SLANG)
   endif
 endif
 
@@ -497,10 +414,11 @@
   ifneq ($(feature-libperl), 1)
     CFLAGS += -DNO_LIBPERL
     NO_LIBPERL := 1
-    msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed);
+    msg := $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev);
   else
     LDFLAGS += $(PERL_EMBED_LDFLAGS)
     EXTLIBS += $(PERL_EMBED_LIBADD)
+    $(call detected,CONFIG_LIBPERL)
   endif
 endif
 
@@ -513,22 +431,21 @@
 disable-python = $(eval $(disable-python_code))
 define disable-python_code
   CFLAGS += -DNO_LIBPYTHON
-  $(if $(1),$(warning No $(1) was found))
-  $(warning Python support will not be built)
+  $(warning $1)
   NO_LIBPYTHON := 1
 endef
 
 ifdef NO_LIBPYTHON
-  $(call disable-python)
+  $(call disable-python,Python support disabled by user)
 else
 
   ifndef PYTHON
-    $(call disable-python,python interpreter)
+    $(call disable-python,No python interpreter was found: disables Python support - please install python-devel/python-dev)
   else
     PYTHON_WORD := $(call shell-wordify,$(PYTHON))
 
     ifndef PYTHON_CONFIG
-      $(call disable-python,python-config tool)
+      $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev)
     else
 
       PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
@@ -540,7 +457,7 @@
       FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 
       ifneq ($(feature-libpython), 1)
-        $(call disable-python,Python.h (for Python 2.x))
+        $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev)
       else
 
         ifneq ($(feature-libpython-version), 1)
@@ -560,6 +477,7 @@
           LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
           EXTLIBS += $(PYTHON_EMBED_LIBADD)
           LANG_BINDINGS += $(obj-perf)python/perf.so
+          $(call detected,CONFIG_LIBPYTHON)
         endif
       endif
     endif
@@ -600,7 +518,7 @@
             EXTLIBS += -liberty
             CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
           else
-            msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
+            msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling)
             CFLAGS += -DNO_DEMANGLE
           endif
         endif
@@ -617,11 +535,23 @@
   ifeq ($(feature-zlib), 1)
     CFLAGS += -DHAVE_ZLIB_SUPPORT
     EXTLIBS += -lz
+    $(call detected,CONFIG_ZLIB)
   else
     NO_ZLIB := 1
   endif
 endif
 
+ifndef NO_LZMA
+  ifeq ($(feature-lzma), 1)
+    CFLAGS += -DHAVE_LZMA_SUPPORT
+    EXTLIBS += -llzma
+    $(call detected,CONFIG_LZMA)
+  else
+    msg := $(warning No liblzma found, disables xz kernel module decompression, please install xz-devel/liblzma-dev);
+    NO_LZMA := 1
+  endif
+endif
+
 ifndef NO_BACKTRACE
   ifeq ($(feature-backtrace), 1)
     CFLAGS += -DHAVE_BACKTRACE_SUPPORT
@@ -635,6 +565,7 @@
   else
     CFLAGS += -DHAVE_LIBNUMA_SUPPORT
     EXTLIBS += -lnuma
+    $(call detected,CONFIG_NUMA)
   endif
 endif
 
@@ -651,7 +582,7 @@
       NO_PERF_READ_VDSO32 := 1
     endif
   endif
-  ifneq (${IS_X86_64}, 1)
+  ifneq ($(ARCH), x86)
     NO_PERF_READ_VDSOX32 := 1
   endif
   ifndef NO_PERF_READ_VDSOX32
@@ -667,6 +598,18 @@
   NO_PERF_READ_VDSOX32 := 1
 endif
 
+ifdef LIBBABELTRACE
+  $(call feature_check,libbabeltrace)
+  ifeq ($(feature-libbabeltrace), 1)
+    CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS)
+    LDFLAGS += $(LIBBABELTRACE_LDFLAGS)
+    EXTLIBS += -lbabeltrace-ctf
+    $(call detected,CONFIG_LIBBABELTRACE)
+  else
+    msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev);
+  endif
+endif
+
 # Among the variables below, these:
 #   perfexecdir
 #   template_dir
@@ -699,7 +642,7 @@
 ETC_PERFCONFIG = etc/perfconfig
 endif
 ifndef lib
-ifeq ($(IS_X86_64),1)
+ifeq ($(ARCH)$(IS_64_BIT), x861)
 lib = lib64
 else
 lib = lib
@@ -735,83 +678,33 @@
 plugindir_SQ= $(subst ','\'',$(plugindir))
 endif
 
-#
-# Print the result of the feature test:
-#
-feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG))
-
-define feature_print_status_code
-  ifeq ($(feature-$(1)), 1)
-    MSG = $(shell printf '...%30s: [ \033[32mon\033[m  ]' $(1))
-  else
-    MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1))
-  endif
-endef
-
-feature_print_var = $(eval $(feature_print_var_code)) $(info $(MSG))
-define feature_print_var_code
+print_var = $(eval $(print_var_code)) $(info $(MSG))
+define print_var_code
     MSG = $(shell printf '...%30s: %s' $(1) $($(1)))
 endef
 
-feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG))
-define feature_print_text_code
-    MSG = $(shell printf '...%30s: %s' $(1) $(2))
-endef
-
-PERF_FEATURES := $(foreach feat,$(LIB_FEATURE_TESTS),feature-$(feat)($(feature-$(feat))))
-PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES)
-
-ifeq ($(dwarf-post-unwind),1)
-  PERF_FEATURES += dwarf-post-unwind($(dwarf-post-unwind-text))
-endif
-
-# The $(display_lib) controls the default detection message
-# output. It's set if:
-# - detected features differes from stored features from
-#   last build (in PERF-FEATURES file)
-# - one of the $(LIB_FEATURE_TESTS) is not detected
-# - VF is enabled
-
-ifneq ("$(PERF_FEATURES)","$(PERF_FEATURES_FILE)")
-  $(shell echo "$(PERF_FEATURES)" > $(OUTPUT)PERF-FEATURES)
-  display_lib := 1
-endif
-
-feature_check = $(eval $(feature_check_code))
-define feature_check_code
-  ifneq ($(feature-$(1)), 1)
-    display_lib := 1
-  endif
-endef
-
-$(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_check,$(feat)))
-
 ifeq ($(VF),1)
-  display_lib := 1
-  display_vf := 1
-endif
-
-ifeq ($(display_lib),1)
-  $(info )
-  $(info Auto-detecting system features:)
-  $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print_status,$(feat),))
-
-  ifeq ($(dwarf-post-unwind),1)
-    $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
-  endif
-endif
-
-ifeq ($(display_vf),1)
-  $(foreach feat,$(VF_FEATURE_TESTS),$(call feature_print_status,$(feat),))
-  $(info )
-  $(call feature_print_var,prefix)
-  $(call feature_print_var,bindir)
-  $(call feature_print_var,libdir)
-  $(call feature_print_var,sysconfdir)
-  $(call feature_print_var,LIBUNWIND_DIR)
-  $(call feature_print_var,LIBDW_DIR)
-endif
-
-ifeq ($(display_lib),1)
+  $(call print_var,prefix)
+  $(call print_var,bindir)
+  $(call print_var,libdir)
+  $(call print_var,sysconfdir)
+  $(call print_var,LIBUNWIND_DIR)
+  $(call print_var,LIBDW_DIR)
   $(info )
 endif
+
+$(call detected_var,bindir_SQ)
+$(call detected_var,PYTHON_WORD)
+ifneq ($(OUTPUT),)
+$(call detected_var,OUTPUT)
+endif
+$(call detected_var,htmldir_SQ)
+$(call detected_var,infodir_SQ)
+$(call detected_var,mandir_SQ)
+$(call detected_var,ETC_PERFCONFIG_SQ)
+$(call detected_var,prefix_SQ)
+$(call detected_var,perfexecdir_SQ)
+$(call detected_var,LIBDIR)
+$(call detected_var,GTK_CFLAGS)
+$(call detected_var,PERL_EMBED_CCOPTS)
+$(call detected_var,PYTHON_EMBED_CCOPTS)
diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch
index ac8721f..e11fbd6 100644
--- a/tools/perf/config/Makefile.arch
+++ b/tools/perf/config/Makefile.arch
@@ -1,32 +1,15 @@
+ifndef ARCH
+ARCH := $(shell uname -m 2>/dev/null || echo not)
+endif
 
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-
-RAW_ARCH := $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
-                                  -e s/arm.*/arm/ -e s/sa110/arm/ \
+ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
+                                  -e s/sun4u/sparc/ -e s/sparc64/sparc/ \
+                                  -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \
                                   -e s/s390x/s390/ -e s/parisc64/parisc/ \
                                   -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
                                   -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
                                   -e s/tile.*/tile/ )
 
-# Additional ARCH settings for x86
-ifeq ($(RAW_ARCH),i386)
-  ARCH ?= x86
-endif
-
-ifeq ($(RAW_ARCH),x86_64)
-  ARCH ?= x86
-
-  ifneq (, $(findstring m32,$(CFLAGS)))
-    RAW_ARCH := x86_32
-  endif
-endif
-
-ifeq ($(RAW_ARCH),sparc64)
-  ARCH ?= sparc
-endif
-
-ARCH ?= $(RAW_ARCH)
-
 LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
 ifeq ($(LP64), 1)
   IS_64_BIT := 1
diff --git a/tools/perf/config/feature-checks/.gitignore b/tools/perf/config/feature-checks/.gitignore
deleted file mode 100644
index 80f3da0..0000000
--- a/tools/perf/config/feature-checks/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.d
-*.bin
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index 7076a62..c16ce83 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -175,6 +175,5 @@
 define get-executable-or-default
 $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
 endef
-_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2)))
-_gea_warn = $(warning The path '$(1)' is not executable.)
+_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2)))
 _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh
index 3356984..3ba80b2 100644
--- a/tools/perf/perf-completion.sh
+++ b/tools/perf/perf-completion.sh
@@ -47,8 +47,16 @@
 	done
 }
 
-type _get_comp_words_by_ref &>/dev/null ||
-_get_comp_words_by_ref()
+# Define preload_get_comp_words_by_ref="false", if the function
+# __perf_get_comp_words_by_ref() is required instead.
+preload_get_comp_words_by_ref="true"
+
+if [ $preload_get_comp_words_by_ref = "true" ]; then
+	type _get_comp_words_by_ref &>/dev/null ||
+	preload_get_comp_words_by_ref="false"
+fi
+[ $preload_get_comp_words_by_ref = "true" ] ||
+__perf_get_comp_words_by_ref()
 {
 	local exclude cur_ words_ cword_
 	if [ "$1" = "-n" ]; then
@@ -76,8 +84,16 @@
 	done
 }
 
-type __ltrim_colon_completions &>/dev/null ||
-__ltrim_colon_completions()
+# Define preload__ltrim_colon_completions="false", if the function
+# __perf__ltrim_colon_completions() is required instead.
+preload__ltrim_colon_completions="true"
+
+if [ $preload__ltrim_colon_completions = "true" ]; then
+	type __ltrim_colon_completions &>/dev/null ||
+	preload__ltrim_colon_completions="false"
+fi
+[ $preload__ltrim_colon_completions = "true" ] ||
+__perf__ltrim_colon_completions()
 {
 	if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then
 		# Remove colon-word prefix from COMPREPLY items
@@ -97,7 +113,32 @@
 __perfcomp_colon ()
 {
 	__perfcomp "$1" "$2"
-	__ltrim_colon_completions $cur
+	if [ $preload__ltrim_colon_completions = "true" ]; then
+		__ltrim_colon_completions $cur
+	else
+		__perf__ltrim_colon_completions $cur
+	fi
+}
+
+__perf_prev_skip_opts ()
+{
+	local i cmd_ cmds_
+
+	let i=cword-1
+	cmds_=$($cmd $1 --list-cmds)
+	prev_skip_opts=()
+	while [ $i -ge 0 ]; do
+		if [[ ${words[i]} == $1 ]]; then
+			return
+		fi
+		for cmd_ in $cmds_; do
+			if [[ ${words[i]} == $cmd_ ]]; then
+				prev_skip_opts=${words[i]}
+				return
+			fi
+		done
+		((i--))
+	done
 }
 
 __perf_main ()
@@ -107,29 +148,36 @@
 	cmd=${words[0]}
 	COMPREPLY=()
 
+	# Skip options backward and find the last perf command
+	__perf_prev_skip_opts
 	# List perf subcommands or long options
-	if [ $cword -eq 1 ]; then
+	if [ -z $prev_skip_opts ]; then
 		if [[ $cur == --* ]]; then
-			__perfcomp '--help --version \
-			--exec-path --html-path --paginate --no-pager \
-			--perf-dir --work-tree --debugfs-dir' -- "$cur"
+			cmds=$($cmd --list-opts)
 		else
 			cmds=$($cmd --list-cmds)
-			__perfcomp "$cmds" "$cur"
 		fi
+		__perfcomp "$cmds" "$cur"
 	# List possible events for -e option
-	elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then
+	elif [[ $prev == @("-e"|"--event") &&
+		$prev_skip_opts == @(record|stat|top) ]]; then
 		evts=$($cmd list --raw-dump)
 		__perfcomp_colon "$evts" "$cur"
-	# List subcommands for perf commands
-	elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then
-		subcmds=$($cmd $prev --list-cmds)
-		__perfcomp_colon "$subcmds" "$cur"
-	# List long option names
-	elif [[ $cur == --* ]];  then
-		subcmd=${words[1]}
-		opts=$($cmd $subcmd --list-opts)
-		__perfcomp "$opts" "$cur"
+	else
+		# List subcommands for perf commands
+		if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched|
+			|data|help|script|test|timechart|trace) ]]; then
+			subcmds=$($cmd $prev_skip_opts --list-cmds)
+			__perfcomp_colon "$subcmds" "$cur"
+		fi
+		# List long option names
+		if [[ $cur == --* ]];  then
+			subcmd=$prev_skip_opts
+			__perf_prev_skip_opts $subcmd
+			subcmd=$subcmd" "$prev_skip_opts
+			opts=$($cmd $subcmd --list-opts)
+			__perfcomp "$opts" "$cur"
+		fi
 	fi
 }
 
@@ -198,7 +246,11 @@
 _perf()
 {
 	local cur words cword prev
-	_get_comp_words_by_ref -n =: cur words cword prev
+	if [ $preload_get_comp_words_by_ref = "true" ]; then
+		_get_comp_words_by_ref -n =: cur words cword prev
+	else
+		__perf_get_comp_words_by_ref -n =: cur words cword prev
+	fi
 	__perf_main
 } &&
 
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 3700a7f..b857fcb 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -13,6 +13,7 @@
 #include "util/quote.h"
 #include "util/run-command.h"
 #include "util/parse-events.h"
+#include "util/parse-options.h"
 #include "util/debug.h"
 #include <api/fs/debugfs.h>
 #include <pthread.h>
@@ -62,6 +63,7 @@
 #endif
 	{ "inject",	cmd_inject,	0 },
 	{ "mem",	cmd_mem,	0 },
+	{ "data",	cmd_data,	0 },
 };
 
 struct pager_config {
@@ -124,6 +126,23 @@
 	}
 }
 
+struct option options[] = {
+	OPT_ARGUMENT("help", "help"),
+	OPT_ARGUMENT("version", "version"),
+	OPT_ARGUMENT("exec-path", "exec-path"),
+	OPT_ARGUMENT("html-path", "html-path"),
+	OPT_ARGUMENT("paginate", "paginate"),
+	OPT_ARGUMENT("no-pager", "no-pager"),
+	OPT_ARGUMENT("perf-dir", "perf-dir"),
+	OPT_ARGUMENT("work-tree", "work-tree"),
+	OPT_ARGUMENT("debugfs-dir", "debugfs-dir"),
+	OPT_ARGUMENT("buildid-dir", "buildid-dir"),
+	OPT_ARGUMENT("list-cmds", "list-cmds"),
+	OPT_ARGUMENT("list-opts", "list-opts"),
+	OPT_ARGUMENT("debug", "debug"),
+	OPT_END()
+};
+
 static int handle_options(const char ***argv, int *argc, int *envchanged)
 {
 	int handled = 0;
@@ -222,6 +241,16 @@
 				struct cmd_struct *p = commands+i;
 				printf("%s ", p->cmd);
 			}
+			putchar('\n');
+			exit(0);
+		} else if (!strcmp(cmd, "--list-opts")) {
+			unsigned int i;
+
+			for (i = 0; i < ARRAY_SIZE(options)-1; i++) {
+				struct option *p = options+i;
+				printf("--%s ", p->long_name);
+			}
+			putchar('\n');
 			exit(0);
 		} else if (!strcmp(cmd, "--debug")) {
 			if (*argc < 2) {
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 1dabb85..e14bb63 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -29,7 +29,7 @@
 	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
 }
 
-#define MAX_NR_CPUS			256
+#define MAX_NR_CPUS			1024
 
 extern const char *input_name;
 extern bool perf_host, perf_guest;
@@ -53,6 +53,7 @@
 	bool	     sample_time;
 	bool	     period;
 	bool	     sample_intr_regs;
+	bool	     running_time;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int user_freq;
@@ -61,6 +62,8 @@
 	u64	     user_interval;
 	bool	     sample_transaction;
 	unsigned     initial_delay;
+	bool         use_clockid;
+	clockid_t    clockid;
 };
 
 struct option;
diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build
new file mode 100644
index 0000000..41efd7e
--- /dev/null
+++ b/tools/perf/scripts/Build
@@ -0,0 +1,2 @@
+libperf-$(CONFIG_LIBPERL)   += perl/Perf-Trace-Util/
+libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build
new file mode 100644
index 0000000..928e110
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build
@@ -0,0 +1,3 @@
+libperf-y += Context.o
+
+CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build
new file mode 100644
index 0000000..aefc15c
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Build
@@ -0,0 +1,3 @@
+libperf-y += Context.o
+
+CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
new file mode 100644
index 0000000..6a8801b
--- /dev/null
+++ b/tools/perf/tests/Build
@@ -0,0 +1,43 @@
+perf-y += builtin-test.o
+perf-y += parse-events.o
+perf-y += dso-data.o
+perf-y += attr.o
+perf-y += vmlinux-kallsyms.o
+perf-y += open-syscall.o
+perf-y += open-syscall-all-cpus.o
+perf-y += open-syscall-tp-fields.o
+perf-y += mmap-basic.o
+perf-y += perf-record.o
+perf-y += rdpmc.o
+perf-y += evsel-roundtrip-name.o
+perf-y += evsel-tp-sched.o
+perf-y += fdarray.o
+perf-y += pmu.o
+perf-y += hists_common.o
+perf-y += hists_link.o
+perf-y += hists_filter.o
+perf-y += hists_output.o
+perf-y += hists_cumulate.o
+perf-y += python-use.o
+perf-y += bp_signal.o
+perf-y += bp_signal_overflow.o
+perf-y += task-exit.o
+perf-y += sw-clock.o
+perf-y += mmap-thread-lookup.o
+perf-y += thread-mg-share.o
+perf-y += switch-tracking.o
+perf-y += keep-tracking.o
+perf-y += code-reading.o
+perf-y += sample-parsing.o
+perf-y += parse-no-sample-id-all.o
+perf-y += kmod-path.o
+
+perf-$(CONFIG_X86) += perf-time-to-tsc.o
+
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
+perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+endif
+
+CFLAGS_attr.o         += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
+CFLAGS_python-use.o   += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
+CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index d3095da..7e6d749 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -5,7 +5,7 @@
 flags=0|8
 cpu=*
 type=0|1
-size=104
+size=112
 config=0
 sample_period=4000
 sample_type=263
diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat
index 872ed7e..f4cf148 100644
--- a/tools/perf/tests/attr/base-stat
+++ b/tools/perf/tests/attr/base-stat
@@ -5,7 +5,7 @@
 flags=0|8
 cpu=*
 type=0
-size=104
+size=112
 config=0
 sample_period=0
 sample_type=0
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 4b7d9ab..4f40981 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -167,6 +167,10 @@
 		.func = test__fdarray__add,
 	},
 	{
+		.desc = "Test kmod_path__parse function",
+		.func = test__kmod_path__parse,
+	},
+	{
 		.func = NULL,
 	},
 };
@@ -291,7 +295,7 @@
 
 int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	const char * const test_usage[] = {
+	const char *test_usage[] = {
 	"perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
 	NULL,
 	};
@@ -302,13 +306,14 @@
 		    "be more verbose (show symbol address, etc)"),
 	OPT_END()
 	};
+	const char * const test_subcommands[] = { "list", NULL };
 	struct intlist *skiplist = NULL;
         int ret = hists__init();
 
         if (ret < 0)
                 return ret;
 
-	argc = parse_options(argc, argv, test_options, test_usage, 0);
+	argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
 	if (argc >= 1 && !strcmp(argv[0], "list"))
 		return perf_test__list(argc, argv);
 
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index caaf37f..513e5fe 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -112,6 +112,9 @@
 
 	dso = dso__new((const char *)file);
 
+	TEST_ASSERT_VAL("Failed to access to dso",
+			dso__data_fd(dso, &machine) >= 0);
+
 	/* Basic 10 bytes tests. */
 	for (i = 0; i < ARRAY_SIZE(offsets); i++) {
 		struct test_data_offset *data = &offsets[i];
@@ -243,8 +246,8 @@
 	limit = nr * 4;
 	TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit));
 
-	/* and this is now our dso open FDs limit + 1 extra */
-	dso_cnt = limit / 2 + 1;
+	/* and this is now our dso open FDs limit */
+	dso_cnt = limit / 2;
 	TEST_ASSERT_VAL("failed to create dsos\n",
 		!dsos__create(dso_cnt, TEST_FILE_SIZE));
 
@@ -252,13 +255,13 @@
 		struct dso *dso = dsos[i];
 
 		/*
-		 * Open dsos via dso__data_fd or dso__data_read_offset.
-		 * Both opens the data file and keep it open.
+		 * Open dsos via dso__data_fd(), it opens the data
+		 * file and keep it open (unless open file limit).
 		 */
+		fd = dso__data_fd(dso, &machine);
+		TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
 		if (i % 2) {
-			fd = dso__data_fd(dso, &machine);
-			TEST_ASSERT_VAL("failed to get fd", fd > 0);
-		} else {
 			#define BUFSIZE 10
 			u8 buf[BUFSIZE];
 			ssize_t n;
@@ -268,7 +271,10 @@
 		}
 	}
 
-	/* open +1 dso over the allowed limit */
+	/* verify the first one is already open */
+	TEST_ASSERT_VAL("dsos[0] is not open", dsos[0]->data.fd != -1);
+
+	/* open +1 dso to reach the allowed limit */
 	fd = dso__data_fd(dsos[i], &machine);
 	TEST_ASSERT_VAL("failed to get fd", fd > 0);
 
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
new file mode 100644
index 0000000..e8d7cbb
--- /dev/null
+++ b/tools/perf/tests/kmod-path.c
@@ -0,0 +1,73 @@
+#include <stdbool.h>
+#include "tests.h"
+#include "dso.h"
+#include "debug.h"
+
+static int test(const char *path, bool alloc_name, bool alloc_ext,
+		bool kmod, bool comp, const char *name, const char *ext)
+{
+	struct kmod_path m;
+
+	memset(&m, 0x0, sizeof(m));
+
+	TEST_ASSERT_VAL("kmod_path__parse",
+			!__kmod_path__parse(&m, path, alloc_name, alloc_ext));
+
+	pr_debug("%s - alloc name %d, alloc ext %d, kmod %d, comp %d, name '%s', ext '%s'\n",
+		 path, alloc_name, alloc_ext, m.kmod, m.comp, m.name, m.ext);
+
+	TEST_ASSERT_VAL("wrong kmod", m.kmod == kmod);
+	TEST_ASSERT_VAL("wrong comp", m.comp == comp);
+
+	if (ext)
+		TEST_ASSERT_VAL("wrong ext", m.ext && !strcmp(ext, m.ext));
+	else
+		TEST_ASSERT_VAL("wrong ext", !m.ext);
+
+	if (name)
+		TEST_ASSERT_VAL("wrong name", m.name && !strcmp(name, m.name));
+	else
+		TEST_ASSERT_VAL("wrong name", !m.name);
+
+	free(m.name);
+	free(m.ext);
+	return 0;
+}
+
+#define T(path, an, ae, k, c, n, e) \
+	TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e))
+
+int test__kmod_path__parse(void)
+{
+	/* path                alloc_name  alloc_ext   kmod  comp   name     ext */
+	T("/xxxx/xxxx/x-x.ko", true      , true      , true, false, "[x_x]", NULL);
+	T("/xxxx/xxxx/x-x.ko", false     , true      , true, false, NULL   , NULL);
+	T("/xxxx/xxxx/x-x.ko", true      , false     , true, false, "[x_x]", NULL);
+	T("/xxxx/xxxx/x-x.ko", false     , false     , true, false, NULL   , NULL);
+
+	/* path                alloc_name  alloc_ext   kmod  comp  name   ext */
+	T("/xxxx/xxxx/x.ko.gz", true     , true      , true, true, "[x]", "gz");
+	T("/xxxx/xxxx/x.ko.gz", false    , true      , true, true, NULL , "gz");
+	T("/xxxx/xxxx/x.ko.gz", true     , false     , true, true, "[x]", NULL);
+	T("/xxxx/xxxx/x.ko.gz", false    , false     , true, true, NULL , NULL);
+
+	/* path              alloc_name  alloc_ext  kmod   comp  name    ext */
+	T("/xxxx/xxxx/x.gz", true      , true     , false, true, "x.gz" ,"gz");
+	T("/xxxx/xxxx/x.gz", false     , true     , false, true, NULL   ,"gz");
+	T("/xxxx/xxxx/x.gz", true      , false    , false, true, "x.gz" , NULL);
+	T("/xxxx/xxxx/x.gz", false     , false    , false, true, NULL   , NULL);
+
+	/* path   alloc_name  alloc_ext  kmod   comp  name     ext */
+	T("x.gz", true      , true     , false, true, "x.gz", "gz");
+	T("x.gz", false     , true     , false, true, NULL  , "gz");
+	T("x.gz", true      , false    , false, true, "x.gz", NULL);
+	T("x.gz", false     , false    , false, true, NULL  , NULL);
+
+	/* path      alloc_name  alloc_ext  kmod  comp  name  ext */
+	T("x.ko.gz", true      , true     , true, true, "[x]", "gz");
+	T("x.ko.gz", false     , true     , true, true, NULL , "gz");
+	T("x.ko.gz", true      , false    , true, true, "[x]", NULL);
+	T("x.ko.gz", false     , false    , true, true, NULL , NULL);
+
+	return 0;
+}
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 75709d2..bff8532 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -5,7 +5,7 @@
 
 # FIXME looks like x86 is the only arch running tests ;-)
 # we need some IS_(32/64) flag to make this generic
-ifeq ($(IS_X86_64),1)
+ifeq ($(ARCH)$(IS_64_BIT), x861)
 lib = lib64
 else
 lib = lib
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c
index 8fa82d1..3ec885c 100644
--- a/tools/perf/tests/open-syscall-all-cpus.c
+++ b/tools/perf/tests/open-syscall-all-cpus.c
@@ -29,7 +29,12 @@
 
 	evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
 	if (evsel == NULL) {
-		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		if (tracefs_configured())
+			pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
+		else if (debugfs_configured())
+			pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		else
+			pr_debug("Neither tracefs or debugfs is enabled in this kernel\n");
 		goto out_thread_map_delete;
 	}
 
diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c
index a33b2da..07aa319 100644
--- a/tools/perf/tests/open-syscall.c
+++ b/tools/perf/tests/open-syscall.c
@@ -18,7 +18,12 @@
 
 	evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
 	if (evsel == NULL) {
-		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		if (tracefs_configured())
+			pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
+		else if (debugfs_configured())
+			pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		else
+			pr_debug("Neither tracefs or debugfs is enabled in this kernel\n");
 		goto out_thread_map_delete;
 	}
 
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 1cdab0c..3de7449 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -3,6 +3,7 @@
 #include "evsel.h"
 #include "evlist.h"
 #include <api/fs/fs.h>
+#include <api/fs/tracefs.h>
 #include <api/fs/debugfs.h>
 #include "tests.h"
 #include "debug.h"
@@ -294,6 +295,36 @@
 	return test__checkevent_genhw(evlist);
 }
 
+static int test__checkevent_exclude_idle_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_exclude_idle_modifier_1(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
 static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1192,11 +1223,19 @@
 {
 	char events_path[PATH_MAX];
 	struct dirent *events_ent;
+	const char *mountpoint;
 	DIR *events_dir;
 	int cnt = 0;
 
-	scnprintf(events_path, PATH_MAX, "%s/tracing/events",
-		  debugfs_find_mountpoint());
+	mountpoint = tracefs_find_mountpoint();
+	if (mountpoint) {
+		scnprintf(events_path, PATH_MAX, "%s/events",
+			  mountpoint);
+	} else {
+		mountpoint = debugfs_find_mountpoint();
+		scnprintf(events_path, PATH_MAX, "%s/tracing/events",
+			  mountpoint);
+	}
 
 	events_dir = opendir(events_path);
 
@@ -1485,6 +1524,16 @@
 		.id    = 100,
 	},
 #endif
+	{
+		.name  = "instructions:I",
+		.check = test__checkevent_exclude_idle_modifier,
+		.id    = 45,
+	},
+	{
+		.name  = "instructions:kIG",
+		.check = test__checkevent_exclude_idle_modifier_1,
+		.id    = 46,
+	},
 };
 
 static struct evlist_test test__events_pmu[] = {
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 00e776a..52758a3 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -51,6 +51,7 @@
 int test__switch_tracking(void);
 int test__fdarray__filter(void);
 int test__fdarray__add(void);
+int test__kmod_path__parse(void);
 
 #if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build
new file mode 100644
index 0000000..0a73538
--- /dev/null
+++ b/tools/perf/ui/Build
@@ -0,0 +1,14 @@
+libperf-y += setup.o
+libperf-y += helpline.o
+libperf-y += progress.o
+libperf-y += util.o
+libperf-y += hist.o
+libperf-y += stdio/hist.o
+
+CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))"
+
+libperf-$(CONFIG_SLANG) += browser.o
+libperf-$(CONFIG_SLANG) += browsers/
+libperf-$(CONFIG_SLANG) += tui/
+
+CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST
diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build
new file mode 100644
index 0000000..de223f5
--- /dev/null
+++ b/tools/perf/ui/browsers/Build
@@ -0,0 +1,10 @@
+libperf-y += annotate.o
+libperf-y += hists.o
+libperf-y += map.o
+libperf-y += scripts.o
+libperf-y += header.o
+
+CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST
+CFLAGS_hists.o    += -DENABLE_SLFUTURE_CONST
+CFLAGS_map.o      += -DENABLE_SLFUTURE_CONST
+CFLAGS_scripts.o  += -DENABLE_SLFUTURE_CONST
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 9d32e3c..e5250eb 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -829,10 +829,16 @@
 	return key;
 }
 
+int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt)
+{
+	return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt);
+}
+
 int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 			     struct hist_browser_timer *hbt)
 {
-	return symbol__tui_annotate(he->ms.sym, he->ms.map, evsel, hbt);
+	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
 static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 788506e..995b7a8 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -48,6 +48,24 @@
 	return hists__has_filter(hb->hists) || hb->min_pcnt;
 }
 
+static int hist_browser__get_folding(struct hist_browser *browser)
+{
+	struct rb_node *nd;
+	struct hists *hists = browser->hists;
+	int unfolded_rows = 0;
+
+	for (nd = rb_first(&hists->entries);
+	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
+	     nd = rb_next(nd)) {
+		struct hist_entry *he =
+			rb_entry(nd, struct hist_entry, rb_node);
+
+		if (he->ms.unfolded)
+			unfolded_rows += he->nr_rows;
+	}
+	return unfolded_rows;
+}
+
 static u32 hist_browser__nr_entries(struct hist_browser *hb)
 {
 	u32 nr_entries;
@@ -57,6 +75,7 @@
 	else
 		nr_entries = hb->hists->nr_entries;
 
+	hb->nr_callchain_rows = hist_browser__get_folding(hb);
 	return nr_entries + hb->nr_callchain_rows;
 }
 
@@ -492,6 +511,7 @@
 {
 	int color, width;
 	char folded_sign = callchain_list__folded(chain);
+	bool show_annotated = browser->show_dso && chain->ms.sym && symbol__annotation(chain->ms.sym)->src;
 
 	color = HE_COLORSET_NORMAL;
 	width = browser->b.width - (offset + 2);
@@ -504,7 +524,8 @@
 	ui_browser__set_color(&browser->b, color);
 	hist_browser__gotorc(browser, row, 0);
 	slsmg_write_nstring(" ", offset);
-	slsmg_printf("%c ", folded_sign);
+	slsmg_printf("%c", folded_sign);
+	ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' ');
 	slsmg_write_nstring(str, width);
 }
 
@@ -1467,7 +1488,7 @@
 		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
 
 	while (1) {
-		const struct thread *thread = NULL;
+		struct thread *thread = NULL;
 		const struct dso *dso = NULL;
 		int choice = 0,
 		    annotate = -2, zoom_dso = -2, zoom_thread = -2,
@@ -1593,28 +1614,30 @@
 		if (!sort__has_sym)
 			goto add_exit_option;
 
+		if (browser->selection == NULL)
+			goto skip_annotation;
+
 		if (sort__mode == SORT_MODE__BRANCH) {
 			bi = browser->he_selection->branch_info;
-			if (browser->selection != NULL &&
-			    bi &&
-			    bi->from.sym != NULL &&
-			    !bi->from.map->dso->annotate_warned &&
-				asprintf(&options[nr_options], "Annotate %s",
-					 bi->from.sym->name) > 0)
-				annotate_f = nr_options++;
 
-			if (browser->selection != NULL &&
-			    bi &&
-			    bi->to.sym != NULL &&
+			if (bi == NULL)
+				goto skip_annotation;
+
+			if (bi->from.sym != NULL &&
+			    !bi->from.map->dso->annotate_warned &&
+			    asprintf(&options[nr_options], "Annotate %s", bi->from.sym->name) > 0) {
+				annotate_f = nr_options++;
+			}
+
+			if (bi->to.sym != NULL &&
 			    !bi->to.map->dso->annotate_warned &&
 			    (bi->to.sym != bi->from.sym ||
 			     bi->to.map->dso != bi->from.map->dso) &&
-				asprintf(&options[nr_options], "Annotate %s",
-					 bi->to.sym->name) > 0)
+			    asprintf(&options[nr_options], "Annotate %s", bi->to.sym->name) > 0) {
 				annotate_t = nr_options++;
+			}
 		} else {
-			if (browser->selection != NULL &&
-			    browser->selection->sym != NULL &&
+			if (browser->selection->sym != NULL &&
 			    !browser->selection->map->dso->annotate_warned) {
 				struct annotation *notes;
 
@@ -1622,11 +1645,12 @@
 
 				if (notes->src &&
 				    asprintf(&options[nr_options], "Annotate %s",
-						 browser->selection->sym->name) > 0)
+						 browser->selection->sym->name) > 0) {
 					annotate = nr_options++;
+				}
 			}
 		}
-
+skip_annotation:
 		if (thread != NULL &&
 		    asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
 			     (browser->hists->thread_filter ? "out of" : "into"),
@@ -1682,6 +1706,7 @@
 		if (choice == annotate || choice == annotate_t || choice == annotate_f) {
 			struct hist_entry *he;
 			struct annotation *notes;
+			struct map_symbol ms;
 			int err;
 do_annotate:
 			if (!objdump_path && perf_session_env__lookup_objdump(env))
@@ -1691,30 +1716,21 @@
 			if (he == NULL)
 				continue;
 
-			/*
-			 * we stash the branch_info symbol + map into the
-			 * the ms so we don't have to rewrite all the annotation
-			 * code to use branch_info.
-			 * in branch mode, the ms struct is not used
-			 */
 			if (choice == annotate_f) {
-				he->ms.sym = he->branch_info->from.sym;
-				he->ms.map = he->branch_info->from.map;
-			}  else if (choice == annotate_t) {
-				he->ms.sym = he->branch_info->to.sym;
-				he->ms.map = he->branch_info->to.map;
+				ms.map = he->branch_info->from.map;
+				ms.sym = he->branch_info->from.sym;
+			} else if (choice == annotate_t) {
+				ms.map = he->branch_info->to.map;
+				ms.sym = he->branch_info->to.sym;
+			} else {
+				ms = *browser->selection;
 			}
 
-			notes = symbol__annotation(he->ms.sym);
+			notes = symbol__annotation(ms.sym);
 			if (!notes->src)
 				continue;
 
-			/*
-			 * Don't let this be freed, say, by hists__decay_entry.
-			 */
-			he->used = true;
-			err = hist_entry__tui_annotate(he, evsel, hbt);
-			he->used = false;
+			err = map_symbol__tui_annotate(&ms, evsel, hbt);
 			/*
 			 * offer option to annotate the other branch source or target
 			 * (if they exists) when returning from annotate
@@ -1754,13 +1770,13 @@
 				pstack__remove(fstack, &browser->hists->thread_filter);
 zoom_out_thread:
 				ui_helpline__pop();
-				browser->hists->thread_filter = NULL;
+				thread__zput(browser->hists->thread_filter);
 				perf_hpp__set_elide(HISTC_THREAD, false);
 			} else {
 				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
 						   thread->comm_set ? thread__comm_str(thread) : "",
 						   thread->tid);
-				browser->hists->thread_filter = thread;
+				browser->hists->thread_filter = thread__get(thread);
 				perf_hpp__set_elide(HISTC_THREAD, false);
 				pstack__push(fstack, &browser->hists->thread_filter);
 			}
diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build
new file mode 100644
index 0000000..ec22e89
--- /dev/null
+++ b/tools/perf/ui/gtk/Build
@@ -0,0 +1,9 @@
+CFLAGS_gtk += -fPIC $(GTK_CFLAGS)
+
+gtk-y += browser.o
+gtk-y += hists.o
+gtk-y += setup.o
+gtk-y += util.o
+gtk-y += helpline.o
+gtk-y += progress.o
+gtk-y += annotate.o
diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build
new file mode 100644
index 0000000..9e4c6ca
--- /dev/null
+++ b/tools/perf/ui/tui/Build
@@ -0,0 +1,4 @@
+libperf-y += setup.o
+libperf-y += util.o
+libperf-y += helpline.o
+libperf-y += progress.o
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
new file mode 100644
index 0000000..797490a
--- /dev/null
+++ b/tools/perf/util/Build
@@ -0,0 +1,145 @@
+libperf-y += abspath.o
+libperf-y += alias.o
+libperf-y += annotate.o
+libperf-y += build-id.o
+libperf-y += config.o
+libperf-y += ctype.o
+libperf-y += db-export.o
+libperf-y += environment.o
+libperf-y += event.o
+libperf-y += evlist.o
+libperf-y += evsel.o
+libperf-y += exec_cmd.o
+libperf-y += find_next_bit.o
+libperf-y += help.o
+libperf-y += kallsyms.o
+libperf-y += levenshtein.o
+libperf-y += parse-options.o
+libperf-y += parse-events.o
+libperf-y += path.o
+libperf-y += rbtree.o
+libperf-y += bitmap.o
+libperf-y += hweight.o
+libperf-y += run-command.o
+libperf-y += quote.o
+libperf-y += strbuf.o
+libperf-y += string.o
+libperf-y += strlist.o
+libperf-y += strfilter.o
+libperf-y += top.o
+libperf-y += usage.o
+libperf-y += wrapper.o
+libperf-y += sigchain.o
+libperf-y += dso.o
+libperf-y += symbol.o
+libperf-y += color.o
+libperf-y += pager.o
+libperf-y += header.o
+libperf-y += callchain.o
+libperf-y += values.o
+libperf-y += debug.o
+libperf-y += machine.o
+libperf-y += map.o
+libperf-y += pstack.o
+libperf-y += session.o
+libperf-y += ordered-events.o
+libperf-y += comm.o
+libperf-y += thread.o
+libperf-y += thread_map.o
+libperf-y += trace-event-parse.o
+libperf-y += parse-events-flex.o
+libperf-y += parse-events-bison.o
+libperf-y += pmu.o
+libperf-y += pmu-flex.o
+libperf-y += pmu-bison.o
+libperf-y += trace-event-read.o
+libperf-y += trace-event-info.o
+libperf-y += trace-event-scripting.o
+libperf-y += trace-event.o
+libperf-y += svghelper.o
+libperf-y += sort.o
+libperf-y += hist.o
+libperf-y += util.o
+libperf-y += xyarray.o
+libperf-y += cpumap.o
+libperf-y += cgroup.o
+libperf-y += target.o
+libperf-y += rblist.o
+libperf-y += intlist.o
+libperf-y += vdso.o
+libperf-y += stat.o
+libperf-y += record.o
+libperf-y += srcline.o
+libperf-y += data.o
+libperf-$(CONFIG_X86) += tsc.o
+libperf-y += cloexec.o
+libperf-y += thread-stack.o
+
+libperf-$(CONFIG_LIBELF) += symbol-elf.o
+libperf-$(CONFIG_LIBELF) += probe-event.o
+
+ifndef CONFIG_LIBELF
+libperf-y += symbol-minimal.o
+endif
+
+libperf-$(CONFIG_DWARF) += probe-finder.o
+libperf-$(CONFIG_DWARF) += dwarf-aux.o
+
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+
+libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+
+libperf-y += scripting-engines/
+
+libperf-$(CONFIG_PERF_REGS) += perf_regs.o
+libperf-$(CONFIG_ZLIB) += zlib.o
+libperf-$(CONFIG_LZMA) += lzma.o
+
+CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))"
+
+$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+	$(call rule_mkdir)
+	@$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+
+$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+	$(call rule_mkdir)
+	@$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+
+$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+	$(call rule_mkdir)
+	@$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+
+$(OUTPUT)util/pmu-bison.c: util/pmu.y
+	$(call rule_mkdir)
+	@$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
+
+CFLAGS_parse-events-flex.o  += -w
+CFLAGS_pmu-flex.o           += -w
+CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+CFLAGS_pmu-bison.o          += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+
+$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+
+CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_rbtree.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_hweight.o       += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_parse-events.o  += -Wno-redundant-decls
+
+$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 9d9db3b..7f5bdfc 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1010,6 +1010,32 @@
 			}
 			filename = symfs_filename;
 		}
+	} else if (dso__needs_decompress(dso)) {
+		char tmp[PATH_MAX];
+		struct kmod_path m;
+		int fd;
+		bool ret;
+
+		if (kmod_path__parse_ext(&m, symfs_filename))
+			goto out_free_filename;
+
+		snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX");
+
+		fd = mkstemp(tmp);
+		if (fd < 0) {
+			free(m.ext);
+			goto out_free_filename;
+		}
+
+		ret = decompress_to_file(m.ext, symfs_filename, fd);
+
+		free(m.ext);
+		close(fd);
+
+		if (!ret)
+			goto out_free_filename;
+
+		strcpy(symfs_filename, tmp);
 	}
 
 	snprintf(command, sizeof(command),
@@ -1029,7 +1055,7 @@
 
 	file = popen(command, "r");
 	if (!file)
-		goto out_free_filename;
+		goto out_remove_tmp;
 
 	while (!feof(file))
 		if (symbol__parse_objdump_line(sym, map, file, privsize,
@@ -1044,6 +1070,10 @@
 		delete_last_nop(sym);
 
 	pclose(file);
+
+out_remove_tmp:
+	if (dso__needs_decompress(dso))
+		unlink(symfs_filename);
 out_free_filename:
 	if (delete_extract)
 		kcore_extract__delete(&kce);
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 0c72680..61867df 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -59,11 +59,8 @@
 	dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
 		    event->fork.ppid, event->fork.ptid);
 
-	if (thread) {
-		rb_erase(&thread->rb_node, &machine->threads);
-		machine->last_match = NULL;
-		thread__delete(thread);
-	}
+	if (thread)
+		machine__remove_thread(machine, thread);
 
 	return 0;
 }
@@ -93,6 +90,35 @@
 	return raw - build_id;
 }
 
+/* asnprintf consolidates asprintf and snprintf */
+static int asnprintf(char **strp, size_t size, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	if (!strp)
+		return -EINVAL;
+
+	va_start(ap, fmt);
+	if (*strp)
+		ret = vsnprintf(*strp, size, fmt, ap);
+	else
+		ret = vasprintf(strp, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+static char *build_id__filename(const char *sbuild_id, char *bf, size_t size)
+{
+	char *tmp = bf;
+	int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
+			    sbuild_id, sbuild_id + 2);
+	if (ret < 0 || (tmp && size < (unsigned int)ret))
+		return NULL;
+	return bf;
+}
+
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
 {
 	char build_id_hex[BUILD_ID_SIZE * 2 + 1];
@@ -101,14 +127,7 @@
 		return NULL;
 
 	build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex);
-	if (bf == NULL) {
-		if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir,
-			     build_id_hex, build_id_hex + 2) < 0)
-			return NULL;
-	} else
-		snprintf(bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
-			 build_id_hex, build_id_hex + 2);
-	return bf;
+	return build_id__filename(build_id_hex, bf, size);
 }
 
 #define dsos__for_each_with_build_id(pos, head)	\
@@ -259,52 +278,113 @@
 	no_buildid_cache = true;
 }
 
-int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
-			  const char *name, bool is_kallsyms, bool is_vdso)
+static char *build_id_cache__dirname_from_path(const char *name,
+					       bool is_kallsyms, bool is_vdso)
 {
-	const size_t size = PATH_MAX;
-	char *realname, *filename = zalloc(size),
-	     *linkname = zalloc(size), *targetname;
-	int len, err = -1;
+	char *realname = (char *)name, *filename;
 	bool slash = is_kallsyms || is_vdso;
 
-	if (is_kallsyms) {
-		if (symbol_conf.kptr_restrict) {
-			pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
-			err = 0;
-			goto out_free;
-		}
-		realname = (char *) name;
-	} else
+	if (!slash) {
 		realname = realpath(name, NULL);
+		if (!realname)
+			return NULL;
+	}
 
-	if (realname == NULL || filename == NULL || linkname == NULL)
+	if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "",
+		     is_vdso ? DSO__NAME_VDSO : realname) < 0)
+		filename = NULL;
+
+	if (!slash)
+		free(realname);
+
+	return filename;
+}
+
+int build_id_cache__list_build_ids(const char *pathname,
+				   struct strlist **result)
+{
+	struct strlist *list;
+	char *dir_name;
+	DIR *dir;
+	struct dirent *d;
+	int ret = 0;
+
+	list = strlist__new(true, NULL);
+	dir_name = build_id_cache__dirname_from_path(pathname, false, false);
+	if (!list || !dir_name) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* List up all dirents */
+	dir = opendir(dir_name);
+	if (!dir) {
+		ret = -errno;
+		goto out;
+	}
+
+	while ((d = readdir(dir)) != NULL) {
+		if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+			continue;
+		strlist__add(list, d->d_name);
+	}
+	closedir(dir);
+
+out:
+	free(dir_name);
+	if (ret)
+		strlist__delete(list);
+	else
+		*result = list;
+
+	return ret;
+}
+
+int build_id_cache__add_s(const char *sbuild_id, const char *name,
+			  bool is_kallsyms, bool is_vdso)
+{
+	const size_t size = PATH_MAX;
+	char *realname = NULL, *filename = NULL, *dir_name = NULL,
+	     *linkname = zalloc(size), *targetname, *tmp;
+	int err = -1;
+
+	if (!is_kallsyms) {
+		realname = realpath(name, NULL);
+		if (!realname)
+			goto out_free;
+	}
+
+	dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso);
+	if (!dir_name)
 		goto out_free;
 
-	len = scnprintf(filename, size, "%s%s%s",
-		       debugdir, slash ? "/" : "",
-		       is_vdso ? DSO__NAME_VDSO : realname);
-	if (mkdir_p(filename, 0755))
+	if (mkdir_p(dir_name, 0755))
 		goto out_free;
 
-	snprintf(filename + len, size - len, "/%s", sbuild_id);
+	if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) {
+		filename = NULL;
+		goto out_free;
+	}
 
 	if (access(filename, F_OK)) {
 		if (is_kallsyms) {
 			 if (copyfile("/proc/kallsyms", filename))
 				goto out_free;
-		} else if (link(realname, filename) && copyfile(name, filename))
+		} else if (link(realname, filename) && errno != EEXIST &&
+				copyfile(name, filename))
 			goto out_free;
 	}
 
-	len = scnprintf(linkname, size, "%s/.build-id/%.2s",
-		       debugdir, sbuild_id);
+	if (!build_id__filename(sbuild_id, linkname, size))
+		goto out_free;
+	tmp = strrchr(linkname, '/');
+	*tmp = '\0';
 
 	if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
 		goto out_free;
 
-	snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
-	targetname = filename + strlen(debugdir) - 5;
+	*tmp = '/';
+	targetname = filename + strlen(buildid_dir) - 5;
 	memcpy(targetname, "../..", 5);
 
 	if (symlink(targetname, linkname) == 0)
@@ -313,34 +393,46 @@
 	if (!is_kallsyms)
 		free(realname);
 	free(filename);
+	free(dir_name);
 	free(linkname);
 	return err;
 }
 
 static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
-				 const char *name, const char *debugdir,
-				 bool is_kallsyms, bool is_vdso)
+				 const char *name, bool is_kallsyms,
+				 bool is_vdso)
 {
 	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
 	build_id__sprintf(build_id, build_id_size, sbuild_id);
 
-	return build_id_cache__add_s(sbuild_id, debugdir, name,
-				     is_kallsyms, is_vdso);
+	return build_id_cache__add_s(sbuild_id, name, is_kallsyms, is_vdso);
 }
 
-int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
+bool build_id_cache__cached(const char *sbuild_id)
+{
+	bool ret = false;
+	char *filename = build_id__filename(sbuild_id, NULL, 0);
+
+	if (filename && !access(filename, F_OK))
+		ret = true;
+	free(filename);
+
+	return ret;
+}
+
+int build_id_cache__remove_s(const char *sbuild_id)
 {
 	const size_t size = PATH_MAX;
 	char *filename = zalloc(size),
-	     *linkname = zalloc(size);
+	     *linkname = zalloc(size), *tmp;
 	int err = -1;
 
 	if (filename == NULL || linkname == NULL)
 		goto out_free;
 
-	snprintf(linkname, size, "%s/.build-id/%.2s/%s",
-		 debugdir, sbuild_id, sbuild_id + 2);
+	if (!build_id__filename(sbuild_id, linkname, size))
+		goto out_free;
 
 	if (access(linkname, F_OK))
 		goto out_free;
@@ -354,8 +446,8 @@
 	/*
 	 * Since the link is relative, we must make it absolute:
 	 */
-	snprintf(linkname, size, "%s/.build-id/%.2s/%s",
-		 debugdir, sbuild_id, filename);
+	tmp = strrchr(linkname, '/') + 1;
+	snprintf(tmp, size - (tmp - linkname), "%s", filename);
 
 	if (unlink(linkname))
 		goto out_free;
@@ -367,8 +459,7 @@
 	return err;
 }
 
-static int dso__cache_build_id(struct dso *dso, struct machine *machine,
-			       const char *debugdir)
+static int dso__cache_build_id(struct dso *dso, struct machine *machine)
 {
 	bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
 	bool is_vdso = dso__is_vdso(dso);
@@ -381,28 +472,26 @@
 		name = nm;
 	}
 	return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
-				     debugdir, is_kallsyms, is_vdso);
+				     is_kallsyms, is_vdso);
 }
 
 static int __dsos__cache_build_ids(struct list_head *head,
-				   struct machine *machine, const char *debugdir)
+				   struct machine *machine)
 {
 	struct dso *pos;
 	int err = 0;
 
 	dsos__for_each_with_build_id(pos, head)
-		if (dso__cache_build_id(pos, machine, debugdir))
+		if (dso__cache_build_id(pos, machine))
 			err = -1;
 
 	return err;
 }
 
-static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
+static int machine__cache_build_ids(struct machine *machine)
 {
-	int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine,
-					  debugdir);
-	ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine,
-				       debugdir);
+	int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine);
+	ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine);
 	return ret;
 }
 
@@ -417,11 +506,11 @@
 	if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
 		return -1;
 
-	ret = machine__cache_build_ids(&session->machines.host, buildid_dir);
+	ret = machine__cache_build_ids(&session->machines.host);
 
 	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret |= machine__cache_build_ids(pos, buildid_dir);
+		ret |= machine__cache_build_ids(pos);
 	}
 	return ret ? -1 : 0;
 }
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 8236319..8501122 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -4,6 +4,7 @@
 #define BUILD_ID_SIZE 20
 
 #include "tool.h"
+#include "strlist.h"
 #include <linux/types.h>
 
 extern struct perf_tool build_id__mark_dso_hit_ops;
@@ -22,9 +23,12 @@
 int perf_session__write_buildid_table(struct perf_session *session, int fd);
 int perf_session__cache_build_ids(struct perf_session *session);
 
-int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
+int build_id_cache__list_build_ids(const char *pathname,
+				   struct strlist **result);
+bool build_id_cache__cached(const char *sbuild_id);
+int build_id_cache__add_s(const char *sbuild_id,
 			  const char *name, bool is_kallsyms, bool is_vdso);
-int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
+int build_id_cache__remove_s(const char *sbuild_id);
 void disable_buildid_cache(void);
 
 #endif
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index d04d770..fbcca21 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -17,6 +17,7 @@
 #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
 #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
 #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
+#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
 
 typedef int (*config_fn_t)(const char *, const char *, void *);
 extern int perf_default_config(const char *, const char *, void *);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 14e7a12..9f643ee 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -97,6 +97,14 @@
 				callchain_param.dump_size = size;
 			}
 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
+		} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
+			if (!strtok_r(NULL, ",", &saveptr)) {
+				callchain_param.record_mode = CALLCHAIN_LBR;
+				ret = 0;
+			} else
+				pr_err("callchain: No more arguments "
+					"needed for --call-graph lbr\n");
+			break;
 		} else {
 			pr_err("callchain: Unknown --call-graph option "
 			       "value: %s\n", arg);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index c0ec1ac..6033a0a 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -11,6 +11,7 @@
 	CALLCHAIN_NONE,
 	CALLCHAIN_FP,
 	CALLCHAIN_DWARF,
+	CALLCHAIN_LBR,
 	CALLCHAIN_MAX
 };
 
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
index 6da965b..85b5238 100644
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -7,6 +7,12 @@
 
 static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
 
+int __weak sched_getcpu(void)
+{
+	errno = ENOSYS;
+	return -1;
+}
+
 static int perf_flag_probe(void)
 {
 	/* use 'safest' configuration as used in perf_evsel__fallback() */
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
index 94a5a7d..68888c2 100644
--- a/tools/perf/util/cloexec.h
+++ b/tools/perf/util/cloexec.h
@@ -3,4 +3,10 @@
 
 unsigned long perf_event_open_cloexec_flag(void);
 
+#ifdef __GLIBC_PREREQ
+#if !__GLIBC_PREREQ(2, 6)
+extern int sched_getcpu(void) __THROW;
+#endif
+#endif
+
 #endif /* __PERF_CLOEXEC_H */
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
new file mode 100644
index 0000000..dd17c9a
--- /dev/null
+++ b/tools/perf/util/data-convert-bt.c
@@ -0,0 +1,857 @@
+/*
+ * CTF writing support via babeltrace.
+ *
+ * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com>
+ * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <linux/compiler.h>
+#include <babeltrace/ctf-writer/writer.h>
+#include <babeltrace/ctf-writer/clock.h>
+#include <babeltrace/ctf-writer/stream.h>
+#include <babeltrace/ctf-writer/event.h>
+#include <babeltrace/ctf-writer/event-types.h>
+#include <babeltrace/ctf-writer/event-fields.h>
+#include <babeltrace/ctf/events.h>
+#include <traceevent/event-parse.h>
+#include "asm/bug.h"
+#include "data-convert-bt.h"
+#include "session.h"
+#include "util.h"
+#include "debug.h"
+#include "tool.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+
+#define pr_N(n, fmt, ...) \
+	eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
+
+#define pr(fmt, ...)  pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__)
+
+struct evsel_priv {
+	struct bt_ctf_event_class *event_class;
+};
+
+struct ctf_writer {
+	/* writer primitives */
+	struct bt_ctf_writer		*writer;
+	struct bt_ctf_stream		*stream;
+	struct bt_ctf_stream_class	*stream_class;
+	struct bt_ctf_clock		*clock;
+
+	/* data types */
+	union {
+		struct {
+			struct bt_ctf_field_type	*s64;
+			struct bt_ctf_field_type	*u64;
+			struct bt_ctf_field_type	*s32;
+			struct bt_ctf_field_type	*u32;
+			struct bt_ctf_field_type	*string;
+			struct bt_ctf_field_type	*u64_hex;
+		};
+		struct bt_ctf_field_type *array[6];
+	} data;
+};
+
+struct convert {
+	struct perf_tool	tool;
+	struct ctf_writer	writer;
+
+	u64			events_size;
+	u64			events_count;
+};
+
+static int value_set(struct bt_ctf_field_type *type,
+		     struct bt_ctf_event *event,
+		     const char *name, u64 val)
+{
+	struct bt_ctf_field *field;
+	bool sign = bt_ctf_field_type_integer_get_signed(type);
+	int ret;
+
+	field = bt_ctf_field_create(type);
+	if (!field) {
+		pr_err("failed to create a field %s\n", name);
+		return -1;
+	}
+
+	if (sign) {
+		ret = bt_ctf_field_signed_integer_set_value(field, val);
+		if (ret) {
+			pr_err("failed to set field value %s\n", name);
+			goto err;
+		}
+	} else {
+		ret = bt_ctf_field_unsigned_integer_set_value(field, val);
+		if (ret) {
+			pr_err("failed to set field value %s\n", name);
+			goto err;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, name, field);
+	if (ret) {
+		pr_err("failed to set payload %s\n", name);
+		goto err;
+	}
+
+	pr2("  SET [%s = %" PRIu64 "]\n", name, val);
+
+err:
+	bt_ctf_field_put(field);
+	return ret;
+}
+
+#define __FUNC_VALUE_SET(_name, _val_type)				\
+static __maybe_unused int value_set_##_name(struct ctf_writer *cw,	\
+			     struct bt_ctf_event *event,		\
+			     const char *name,				\
+			     _val_type val)				\
+{									\
+	struct bt_ctf_field_type *type = cw->data._name;		\
+	return value_set(type, event, name, (u64) val);			\
+}
+
+#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name)
+
+FUNC_VALUE_SET(s32)
+FUNC_VALUE_SET(u32)
+FUNC_VALUE_SET(s64)
+FUNC_VALUE_SET(u64)
+__FUNC_VALUE_SET(u64_hex, u64)
+
+static struct bt_ctf_field_type*
+get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
+{
+	unsigned long flags = field->flags;
+
+	if (flags & FIELD_IS_STRING)
+		return cw->data.string;
+
+	if (!(flags & FIELD_IS_SIGNED)) {
+		/* unsigned long are mostly pointers */
+		if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER)
+			return cw->data.u64_hex;
+	}
+
+	if (flags & FIELD_IS_SIGNED) {
+		if (field->size == 8)
+			return cw->data.s64;
+		else
+			return cw->data.s32;
+	}
+
+	if (field->size == 8)
+		return cw->data.u64;
+	else
+		return cw->data.u32;
+}
+
+static int add_tracepoint_field_value(struct ctf_writer *cw,
+				      struct bt_ctf_event_class *event_class,
+				      struct bt_ctf_event *event,
+				      struct perf_sample *sample,
+				      struct format_field *fmtf)
+{
+	struct bt_ctf_field_type *type;
+	struct bt_ctf_field *array_field;
+	struct bt_ctf_field *field;
+	const char *name = fmtf->name;
+	void *data = sample->raw_data;
+	unsigned long long value_int;
+	unsigned long flags = fmtf->flags;
+	unsigned int n_items;
+	unsigned int i;
+	unsigned int offset;
+	unsigned int len;
+	int ret;
+
+	offset = fmtf->offset;
+	len = fmtf->size;
+	if (flags & FIELD_IS_STRING)
+		flags &= ~FIELD_IS_ARRAY;
+
+	if (flags & FIELD_IS_DYNAMIC) {
+		unsigned long long tmp_val;
+
+		tmp_val = pevent_read_number(fmtf->event->pevent,
+				data + offset, len);
+		offset = tmp_val;
+		len = offset >> 16;
+		offset &= 0xffff;
+	}
+
+	if (flags & FIELD_IS_ARRAY) {
+
+		type = bt_ctf_event_class_get_field_by_name(
+				event_class, name);
+		array_field = bt_ctf_field_create(type);
+		bt_ctf_field_type_put(type);
+		if (!array_field) {
+			pr_err("Failed to create array type %s\n", name);
+			return -1;
+		}
+
+		len = fmtf->size / fmtf->arraylen;
+		n_items = fmtf->arraylen;
+	} else {
+		n_items = 1;
+		array_field = NULL;
+	}
+
+	type = get_tracepoint_field_type(cw, fmtf);
+
+	for (i = 0; i < n_items; i++) {
+		if (!(flags & FIELD_IS_STRING))
+			value_int = pevent_read_number(
+					fmtf->event->pevent,
+					data + offset + i * len, len);
+
+		if (flags & FIELD_IS_ARRAY)
+			field = bt_ctf_field_array_get_field(array_field, i);
+		else
+			field = bt_ctf_field_create(type);
+
+		if (!field) {
+			pr_err("failed to create a field %s\n", name);
+			return -1;
+		}
+
+		if (flags & FIELD_IS_STRING)
+			ret = bt_ctf_field_string_set_value(field,
+					data + offset + i * len);
+		else if (!(flags & FIELD_IS_SIGNED))
+			ret = bt_ctf_field_unsigned_integer_set_value(
+					field, value_int);
+		else
+			ret = bt_ctf_field_signed_integer_set_value(
+					field, value_int);
+		if (ret) {
+			pr_err("failed to set file value %s\n", name);
+			goto err_put_field;
+		}
+		if (!(flags & FIELD_IS_ARRAY)) {
+			ret = bt_ctf_event_set_payload(event, name, field);
+			if (ret) {
+				pr_err("failed to set payload %s\n", name);
+				goto err_put_field;
+			}
+		}
+		bt_ctf_field_put(field);
+	}
+	if (flags & FIELD_IS_ARRAY) {
+		ret = bt_ctf_event_set_payload(event, name, array_field);
+		if (ret) {
+			pr_err("Failed add payload array %s\n", name);
+			return -1;
+		}
+		bt_ctf_field_put(array_field);
+	}
+	return 0;
+
+err_put_field:
+	bt_ctf_field_put(field);
+	return -1;
+}
+
+static int add_tracepoint_fields_values(struct ctf_writer *cw,
+					struct bt_ctf_event_class *event_class,
+					struct bt_ctf_event *event,
+					struct format_field *fields,
+					struct perf_sample *sample)
+{
+	struct format_field *field;
+	int ret;
+
+	for (field = fields; field; field = field->next) {
+		ret = add_tracepoint_field_value(cw, event_class, event, sample,
+				field);
+		if (ret)
+			return -1;
+	}
+	return 0;
+}
+
+static int add_tracepoint_values(struct ctf_writer *cw,
+				 struct bt_ctf_event_class *event_class,
+				 struct bt_ctf_event *event,
+				 struct perf_evsel *evsel,
+				 struct perf_sample *sample)
+{
+	struct format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct format_field *fields        = evsel->tp_format->format.fields;
+	int ret;
+
+	ret = add_tracepoint_fields_values(cw, event_class, event,
+					   common_fields, sample);
+	if (!ret)
+		ret = add_tracepoint_fields_values(cw, event_class, event,
+						   fields, sample);
+
+	return ret;
+}
+
+static int add_generic_values(struct ctf_writer *cw,
+			      struct bt_ctf_event *event,
+			      struct perf_evsel *evsel,
+			      struct perf_sample *sample)
+{
+	u64 type = evsel->attr.sample_type;
+	int ret;
+
+	/*
+	 * missing:
+	 *   PERF_SAMPLE_TIME         - not needed as we have it in
+	 *                              ctf event header
+	 *   PERF_SAMPLE_READ         - TODO
+	 *   PERF_SAMPLE_CALLCHAIN    - TODO
+	 *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+	 *   PERF_SAMPLE_BRANCH_STACK - TODO
+	 *   PERF_SAMPLE_REGS_USER    - TODO
+	 *   PERF_SAMPLE_STACK_USER   - TODO
+	 */
+
+	if (type & PERF_SAMPLE_IP) {
+		ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		ret = value_set_s32(cw, event, "perf_tid", sample->tid);
+		if (ret)
+			return -1;
+
+		ret = value_set_s32(cw, event, "perf_pid", sample->pid);
+		if (ret)
+			return -1;
+	}
+
+	if ((type & PERF_SAMPLE_ID) ||
+	    (type & PERF_SAMPLE_IDENTIFIER)) {
+		ret = value_set_u64(cw, event, "perf_id", sample->id);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_CPU) {
+		ret = value_set_u32(cw, event, "perf_cpu", sample->cpu);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_PERIOD) {
+		ret = value_set_u64(cw, event, "perf_period", sample->period);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT) {
+		ret = value_set_u64(cw, event, "perf_weight", sample->weight);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_DATA_SRC) {
+		ret = value_set_u64(cw, event, "perf_data_src",
+				sample->data_src);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_TRANSACTION) {
+		ret = value_set_u64(cw, event, "perf_transaction",
+				sample->transaction);
+		if (ret)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *_event __maybe_unused,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine __maybe_unused)
+{
+	struct convert *c = container_of(tool, struct convert, tool);
+	struct evsel_priv *priv = evsel->priv;
+	struct ctf_writer *cw = &c->writer;
+	struct bt_ctf_event_class *event_class;
+	struct bt_ctf_event *event;
+	int ret;
+
+	if (WARN_ONCE(!priv, "Failed to setup all events.\n"))
+		return 0;
+
+	event_class = priv->event_class;
+
+	/* update stats */
+	c->events_count++;
+	c->events_size += _event->header.size;
+
+	pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count);
+
+	event = bt_ctf_event_create(event_class);
+	if (!event) {
+		pr_err("Failed to create an CTF event\n");
+		return -1;
+	}
+
+	bt_ctf_clock_set_time(cw->clock, sample->time);
+
+	ret = add_generic_values(cw, event, evsel, sample);
+	if (ret)
+		return -1;
+
+	if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+		ret = add_tracepoint_values(cw, event_class, event,
+					    evsel, sample);
+		if (ret)
+			return -1;
+	}
+
+	bt_ctf_stream_append_event(cw->stream, event);
+	bt_ctf_event_put(event);
+	return 0;
+}
+
+static int add_tracepoint_fields_types(struct ctf_writer *cw,
+				       struct format_field *fields,
+				       struct bt_ctf_event_class *event_class)
+{
+	struct format_field *field;
+	int ret;
+
+	for (field = fields; field; field = field->next) {
+		struct bt_ctf_field_type *type;
+		unsigned long flags = field->flags;
+
+		pr2("  field '%s'\n", field->name);
+
+		type = get_tracepoint_field_type(cw, field);
+		if (!type)
+			return -1;
+
+		/*
+		 * A string is an array of chars. For this we use the string
+		 * type and don't care that it is an array. What we don't
+		 * support is an array of strings.
+		 */
+		if (flags & FIELD_IS_STRING)
+			flags &= ~FIELD_IS_ARRAY;
+
+		if (flags & FIELD_IS_ARRAY)
+			type = bt_ctf_field_type_array_create(type, field->arraylen);
+
+		ret = bt_ctf_event_class_add_field(event_class, type,
+				field->name);
+
+		if (flags & FIELD_IS_ARRAY)
+			bt_ctf_field_type_put(type);
+
+		if (ret) {
+			pr_err("Failed to add field '%s\n", field->name);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int add_tracepoint_types(struct ctf_writer *cw,
+				struct perf_evsel *evsel,
+				struct bt_ctf_event_class *class)
+{
+	struct format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct format_field *fields        = evsel->tp_format->format.fields;
+	int ret;
+
+	ret = add_tracepoint_fields_types(cw, common_fields, class);
+	if (!ret)
+		ret = add_tracepoint_fields_types(cw, fields, class);
+
+	return ret;
+}
+
+static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
+			     struct bt_ctf_event_class *event_class)
+{
+	u64 type = evsel->attr.sample_type;
+
+	/*
+	 * missing:
+	 *   PERF_SAMPLE_TIME         - not needed as we have it in
+	 *                              ctf event header
+	 *   PERF_SAMPLE_READ         - TODO
+	 *   PERF_SAMPLE_CALLCHAIN    - TODO
+	 *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+	 *   PERF_SAMPLE_BRANCH_STACK - TODO
+	 *   PERF_SAMPLE_REGS_USER    - TODO
+	 *   PERF_SAMPLE_STACK_USER   - TODO
+	 */
+
+#define ADD_FIELD(cl, t, n)						\
+	do {								\
+		pr2("  field '%s'\n", n);				\
+		if (bt_ctf_event_class_add_field(cl, t, n)) {		\
+			pr_err("Failed to add field '%s;\n", n);	\
+			return -1;					\
+		}							\
+	} while (0)
+
+	if (type & PERF_SAMPLE_IP)
+		ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip");
+
+	if (type & PERF_SAMPLE_TID) {
+		ADD_FIELD(event_class, cw->data.s32, "perf_tid");
+		ADD_FIELD(event_class, cw->data.s32, "perf_pid");
+	}
+
+	if ((type & PERF_SAMPLE_ID) ||
+	    (type & PERF_SAMPLE_IDENTIFIER))
+		ADD_FIELD(event_class, cw->data.u64, "perf_id");
+
+	if (type & PERF_SAMPLE_STREAM_ID)
+		ADD_FIELD(event_class, cw->data.u64, "perf_stream_id");
+
+	if (type & PERF_SAMPLE_CPU)
+		ADD_FIELD(event_class, cw->data.u32, "perf_cpu");
+
+	if (type & PERF_SAMPLE_PERIOD)
+		ADD_FIELD(event_class, cw->data.u64, "perf_period");
+
+	if (type & PERF_SAMPLE_WEIGHT)
+		ADD_FIELD(event_class, cw->data.u64, "perf_weight");
+
+	if (type & PERF_SAMPLE_DATA_SRC)
+		ADD_FIELD(event_class, cw->data.u64, "perf_data_src");
+
+	if (type & PERF_SAMPLE_TRANSACTION)
+		ADD_FIELD(event_class, cw->data.u64, "perf_transaction");
+
+#undef ADD_FIELD
+	return 0;
+}
+
+static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel)
+{
+	struct bt_ctf_event_class *event_class;
+	struct evsel_priv *priv;
+	const char *name = perf_evsel__name(evsel);
+	int ret;
+
+	pr("Adding event '%s' (type %d)\n", name, evsel->attr.type);
+
+	event_class = bt_ctf_event_class_create(name);
+	if (!event_class)
+		return -1;
+
+	ret = add_generic_types(cw, evsel, event_class);
+	if (ret)
+		goto err;
+
+	if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+		ret = add_tracepoint_types(cw, evsel, event_class);
+		if (ret)
+			goto err;
+	}
+
+	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
+	if (ret) {
+		pr("Failed to add event class into stream.\n");
+		goto err;
+	}
+
+	priv = malloc(sizeof(*priv));
+	if (!priv)
+		goto err;
+
+	priv->event_class = event_class;
+	evsel->priv       = priv;
+	return 0;
+
+err:
+	bt_ctf_event_class_put(event_class);
+	pr_err("Failed to add event '%s'.\n", name);
+	return -1;
+}
+
+static int setup_events(struct ctf_writer *cw, struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel;
+	int ret;
+
+	evlist__for_each(evlist, evsel) {
+		ret = add_event(cw, evsel);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int ctf_writer__setup_env(struct ctf_writer *cw,
+				 struct perf_session *session)
+{
+	struct perf_header *header = &session->header;
+	struct bt_ctf_writer *writer = cw->writer;
+
+#define ADD(__n, __v)							\
+do {									\
+	if (bt_ctf_writer_add_environment_field(writer, __n, __v))	\
+		return -1;						\
+} while (0)
+
+	ADD("host",    header->env.hostname);
+	ADD("sysname", "Linux");
+	ADD("release", header->env.os_release);
+	ADD("version", header->env.version);
+	ADD("machine", header->env.arch);
+	ADD("domain", "kernel");
+	ADD("tracer_name", "perf");
+
+#undef ADD
+	return 0;
+}
+
+static int ctf_writer__setup_clock(struct ctf_writer *cw)
+{
+	struct bt_ctf_clock *clock = cw->clock;
+
+	bt_ctf_clock_set_description(clock, "perf clock");
+
+#define SET(__n, __v)				\
+do {						\
+	if (bt_ctf_clock_set_##__n(clock, __v))	\
+		return -1;			\
+} while (0)
+
+	SET(frequency,   1000000000);
+	SET(offset_s,    0);
+	SET(offset,      0);
+	SET(precision,   10);
+	SET(is_absolute, 0);
+
+#undef SET
+	return 0;
+}
+
+static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
+{
+	struct bt_ctf_field_type *type;
+
+	type = bt_ctf_field_type_integer_create(size);
+	if (!type)
+		return NULL;
+
+	if (sign &&
+	    bt_ctf_field_type_integer_set_signed(type, 1))
+		goto err;
+
+	if (hex &&
+	    bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
+		goto err;
+
+	pr2("Created type: INTEGER %d-bit %ssigned %s\n",
+	    size, sign ? "un" : "", hex ? "hex" : "");
+	return type;
+
+err:
+	bt_ctf_field_type_put(type);
+	return NULL;
+}
+
+static void ctf_writer__cleanup_data(struct ctf_writer *cw)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(cw->data.array); i++)
+		bt_ctf_field_type_put(cw->data.array[i]);
+}
+
+static int ctf_writer__init_data(struct ctf_writer *cw)
+{
+#define CREATE_INT_TYPE(type, size, sign, hex)		\
+do {							\
+	(type) = create_int_type(size, sign, hex);	\
+	if (!(type))					\
+		goto err;				\
+} while (0)
+
+	CREATE_INT_TYPE(cw->data.s64, 64, true,  false);
+	CREATE_INT_TYPE(cw->data.u64, 64, false, false);
+	CREATE_INT_TYPE(cw->data.s32, 32, true,  false);
+	CREATE_INT_TYPE(cw->data.u32, 32, false, false);
+	CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true);
+
+	cw->data.string  = bt_ctf_field_type_string_create();
+	if (cw->data.string)
+		return 0;
+
+err:
+	ctf_writer__cleanup_data(cw);
+	pr_err("Failed to create data types.\n");
+	return -1;
+}
+
+static void ctf_writer__cleanup(struct ctf_writer *cw)
+{
+	ctf_writer__cleanup_data(cw);
+
+	bt_ctf_clock_put(cw->clock);
+	bt_ctf_stream_put(cw->stream);
+	bt_ctf_stream_class_put(cw->stream_class);
+	bt_ctf_writer_put(cw->writer);
+
+	/* and NULL all the pointers */
+	memset(cw, 0, sizeof(*cw));
+}
+
+static int ctf_writer__init(struct ctf_writer *cw, const char *path)
+{
+	struct bt_ctf_writer		*writer;
+	struct bt_ctf_stream_class	*stream_class;
+	struct bt_ctf_stream		*stream;
+	struct bt_ctf_clock		*clock;
+
+	/* CTF writer */
+	writer = bt_ctf_writer_create(path);
+	if (!writer)
+		goto err;
+
+	cw->writer = writer;
+
+	/* CTF clock */
+	clock = bt_ctf_clock_create("perf_clock");
+	if (!clock) {
+		pr("Failed to create CTF clock.\n");
+		goto err_cleanup;
+	}
+
+	cw->clock = clock;
+
+	if (ctf_writer__setup_clock(cw)) {
+		pr("Failed to setup CTF clock.\n");
+		goto err_cleanup;
+	}
+
+	/* CTF stream class */
+	stream_class = bt_ctf_stream_class_create("perf_stream");
+	if (!stream_class) {
+		pr("Failed to create CTF stream class.\n");
+		goto err_cleanup;
+	}
+
+	cw->stream_class = stream_class;
+
+	/* CTF clock stream setup */
+	if (bt_ctf_stream_class_set_clock(stream_class, clock)) {
+		pr("Failed to assign CTF clock to stream class.\n");
+		goto err_cleanup;
+	}
+
+	if (ctf_writer__init_data(cw))
+		goto err_cleanup;
+
+	/* CTF stream instance */
+	stream = bt_ctf_writer_create_stream(writer, stream_class);
+	if (!stream) {
+		pr("Failed to create CTF stream.\n");
+		goto err_cleanup;
+	}
+
+	cw->stream = stream;
+
+	/* CTF clock writer setup */
+	if (bt_ctf_writer_add_clock(writer, clock)) {
+		pr("Failed to assign CTF clock to writer.\n");
+		goto err_cleanup;
+	}
+
+	return 0;
+
+err_cleanup:
+	ctf_writer__cleanup(cw);
+err:
+	pr_err("Failed to setup CTF writer.\n");
+	return -1;
+}
+
+int bt_convert__perf2ctf(const char *input, const char *path, bool force)
+{
+	struct perf_session *session;
+	struct perf_data_file file = {
+		.path = input,
+		.mode = PERF_DATA_MODE_READ,
+		.force = force,
+	};
+	struct convert c = {
+		.tool = {
+			.sample          = process_sample_event,
+			.mmap            = perf_event__process_mmap,
+			.mmap2           = perf_event__process_mmap2,
+			.comm            = perf_event__process_comm,
+			.exit            = perf_event__process_exit,
+			.fork            = perf_event__process_fork,
+			.lost            = perf_event__process_lost,
+			.tracing_data    = perf_event__process_tracing_data,
+			.build_id        = perf_event__process_build_id,
+			.ordered_events  = true,
+			.ordering_requires_timestamps = true,
+		},
+	};
+	struct ctf_writer *cw = &c.writer;
+	int err = -1;
+
+	/* CTF writer */
+	if (ctf_writer__init(cw, path))
+		return -1;
+
+	/* perf.data session */
+	session = perf_session__new(&file, 0, &c.tool);
+	if (!session)
+		goto free_writer;
+
+	/* CTF writer env/clock setup  */
+	if (ctf_writer__setup_env(cw, session))
+		goto free_session;
+
+	/* CTF events setup */
+	if (setup_events(cw, session))
+		goto free_session;
+
+	err = perf_session__process_events(session);
+	if (!err)
+		err = bt_ctf_stream_flush(cw->stream);
+
+	fprintf(stderr,
+		"[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
+		file.path, path);
+
+	fprintf(stderr,
+		"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
+		(double) c.events_size / 1024.0 / 1024.0,
+		c.events_count);
+
+	/* its all good */
+free_session:
+	perf_session__delete(session);
+
+free_writer:
+	ctf_writer__cleanup(cw);
+	return err;
+}
diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h
new file mode 100644
index 0000000..4c20434
--- /dev/null
+++ b/tools/perf/util/data-convert-bt.h
@@ -0,0 +1,8 @@
+#ifndef __DATA_CONVERT_BT_H
+#define __DATA_CONVERT_BT_H
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, bool force);
+
+#endif /* HAVE_LIBBABELTRACE_SUPPORT */
+#endif /* __DATA_CONVERT_BT_H */
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index c81dae3..bb39a3f 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -282,13 +282,13 @@
 
 int db_export__sample(struct db_export *dbe, union perf_event *event,
 		      struct perf_sample *sample, struct perf_evsel *evsel,
-		      struct thread *thread, struct addr_location *al)
+		      struct addr_location *al)
 {
+	struct thread* thread = al->thread;
 	struct export_sample es = {
 		.event = event,
 		.sample = sample,
 		.evsel = evsel,
-		.thread = thread,
 		.al = al,
 	};
 	struct thread *main_thread;
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
index adbd22d..25e22fd 100644
--- a/tools/perf/util/db-export.h
+++ b/tools/perf/util/db-export.h
@@ -34,7 +34,6 @@
 	union perf_event	*event;
 	struct perf_sample	*sample;
 	struct perf_evsel	*evsel;
-	struct thread		*thread;
 	struct addr_location	*al;
 	u64			db_id;
 	u64			comm_db_id;
@@ -97,7 +96,7 @@
 			   const char *name);
 int db_export__sample(struct db_export *dbe, union perf_event *event,
 		      struct perf_sample *sample, struct perf_evsel *evsel,
-		      struct thread *thread, struct addr_location *al);
+		      struct addr_location *al);
 
 int db_export__branch_types(struct db_export *dbe);
 
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index ad60b2f..2da5581 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -20,6 +20,7 @@
 bool dump_trace = false, quiet = false;
 int debug_ordered_events;
 static int redirect_to_stderr;
+int debug_data_convert;
 
 static int _eprintf(int level, int var, const char *fmt, va_list args)
 {
@@ -147,6 +148,7 @@
 	{ .name = "verbose",		.ptr = &verbose },
 	{ .name = "ordered-events",	.ptr = &debug_ordered_events},
 	{ .name = "stderr",		.ptr = &redirect_to_stderr},
+	{ .name = "data-convert",	.ptr = &debug_data_convert },
 	{ .name = NULL, }
 };
 
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index be264d6..caac2fd 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -12,6 +12,7 @@
 extern int verbose;
 extern bool quiet, dump_trace;
 extern int debug_ordered_events;
+extern int debug_data_convert;
 
 #ifndef pr_fmt
 #define pr_fmt(fmt) fmt
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index c2f7d3b..fc0ddd5 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -45,13 +45,13 @@
 	case DSO_BINARY_TYPE__DEBUGLINK: {
 		char *debuglink;
 
-		strncpy(filename, dso->long_name, size);
-		debuglink = filename + dso->long_name_len;
+		len = __symbol__join_symfs(filename, size, dso->long_name);
+		debuglink = filename + len;
 		while (debuglink != filename && *debuglink != '/')
 			debuglink--;
 		if (*debuglink == '/')
 			debuglink++;
-		ret = filename__read_debuglink(dso->long_name, debuglink,
+		ret = filename__read_debuglink(filename, debuglink,
 					       size - (debuglink - filename));
 		}
 		break;
@@ -148,6 +148,9 @@
 #ifdef HAVE_ZLIB_SUPPORT
 	{ "gz", gzip_decompress_to_file },
 #endif
+#ifdef HAVE_LZMA_SUPPORT
+	{ "xz", lzma_decompress_to_file },
+#endif
 	{ NULL, NULL },
 };
 
@@ -162,32 +165,14 @@
 	return false;
 }
 
-bool is_kmodule_extension(const char *ext)
+bool is_kernel_module(const char *pathname)
 {
-	if (strncmp(ext, "ko", 2))
-		return false;
+	struct kmod_path m;
 
-	if (ext[2] == '\0' || (ext[2] == '.' && is_supported_compression(ext+3)))
-		return true;
+	if (kmod_path__parse(&m, pathname))
+		return NULL;
 
-	return false;
-}
-
-bool is_kernel_module(const char *pathname, bool *compressed)
-{
-	const char *ext = strrchr(pathname, '.');
-
-	if (ext == NULL)
-		return false;
-
-	if (is_supported_compression(ext + 1)) {
-		if (compressed)
-			*compressed = true;
-		ext -= 3;
-	} else if (compressed)
-		*compressed = false;
-
-	return is_kmodule_extension(ext + 1);
+	return m.kmod;
 }
 
 bool decompress_to_file(const char *ext, const char *filename, int output_fd)
@@ -209,6 +194,72 @@
 }
 
 /*
+ * Parses kernel module specified in @path and updates
+ * @m argument like:
+ *
+ *    @comp - true if @path contains supported compression suffix,
+ *            false otherwise
+ *    @kmod - true if @path contains '.ko' suffix in right position,
+ *            false otherwise
+ *    @name - if (@alloc_name && @kmod) is true, it contains strdup-ed base name
+ *            of the kernel module without suffixes, otherwise strudup-ed
+ *            base name of @path
+ *    @ext  - if (@alloc_ext && @comp) is true, it contains strdup-ed string
+ *            the compression suffix
+ *
+ * Returns 0 if there's no strdup error, -ENOMEM otherwise.
+ */
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+		       bool alloc_name, bool alloc_ext)
+{
+	const char *name = strrchr(path, '/');
+	const char *ext  = strrchr(path, '.');
+
+	memset(m, 0x0, sizeof(*m));
+	name = name ? name + 1 : path;
+
+	/* No extension, just return name. */
+	if (ext == NULL) {
+		if (alloc_name) {
+			m->name = strdup(name);
+			return m->name ? 0 : -ENOMEM;
+		}
+		return 0;
+	}
+
+	if (is_supported_compression(ext + 1)) {
+		m->comp = true;
+		ext -= 3;
+	}
+
+	/* Check .ko extension only if there's enough name left. */
+	if (ext > name)
+		m->kmod = !strncmp(ext, ".ko", 3);
+
+	if (alloc_name) {
+		if (m->kmod) {
+			if (asprintf(&m->name, "[%.*s]", (int) (ext - name), name) == -1)
+				return -ENOMEM;
+		} else {
+			if (asprintf(&m->name, "%s", name) == -1)
+				return -ENOMEM;
+		}
+
+		strxfrchar(m->name, '-', '_');
+	}
+
+	if (alloc_ext && m->comp) {
+		m->ext = strdup(ext + 4);
+		if (!m->ext) {
+			free((void *) m->name);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+/*
  * Global list of open DSOs and the counter.
  */
 static LIST_HEAD(dso__data_open);
@@ -240,7 +291,7 @@
 		if (fd >= 0)
 			return fd;
 
-		pr_debug("dso open failed, mmap: %s\n",
+		pr_debug("dso open failed: %s\n",
 			 strerror_r(errno, sbuf, sizeof(sbuf)));
 		if (!dso__data_open_cnt || errno != EMFILE)
 			break;
@@ -1002,19 +1053,22 @@
 	return dso__find_by_longname(&dsos->root, name);
 }
 
+struct dso *dsos__addnew(struct dsos *dsos, const char *name)
+{
+	struct dso *dso = dso__new(name);
+
+	if (dso != NULL) {
+		dsos__add(dsos, dso);
+		dso__set_basename(dso);
+	}
+	return dso;
+}
+
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
 {
 	struct dso *dso = dsos__find(dsos, name, false);
 
-	if (!dso) {
-		dso = dso__new(name);
-		if (dso != NULL) {
-			dsos__add(dsos, dso);
-			dso__set_basename(dso);
-		}
-	}
-
-	return dso;
+	return dso ? dso : dsos__addnew(dsos, name);
 }
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
@@ -1083,3 +1137,36 @@
 
 	return dso__type_fd(fd);
 }
+
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
+{
+	int idx, errnum = dso->load_errno;
+	/*
+	 * This must have a same ordering as the enum dso_load_errno.
+	 */
+	static const char *dso_load__error_str[] = {
+	"Internal tools/perf/ library error",
+	"Invalid ELF file",
+	"Can not read build id",
+	"Mismatching build id",
+	"Decompression failure",
+	};
+
+	BUG_ON(buflen == 0);
+
+	if (errnum >= 0) {
+		const char *err = strerror_r(errnum, buf, buflen);
+
+		if (err != buf)
+			scnprintf(buf, buflen, "%s", err);
+
+		return 0;
+	}
+
+	if (errnum <  __DSO_LOAD_ERRNO__START || errnum >= __DSO_LOAD_ERRNO__END)
+		return -1;
+
+	idx = errnum - __DSO_LOAD_ERRNO__START;
+	scnprintf(buf, buflen, "%s", dso_load__error_str[idx]);
+	return 0;
+}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index ced9284..e0901b4 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -60,6 +60,31 @@
 	DSO__TYPE_X32BIT,
 };
 
+enum dso_load_errno {
+	DSO_LOAD_ERRNO__SUCCESS		= 0,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__DSO_LOAD_ERRNO__START		= -10000,
+
+	DSO_LOAD_ERRNO__INTERNAL_ERROR	= __DSO_LOAD_ERRNO__START,
+
+	/* for symsrc__init() */
+	DSO_LOAD_ERRNO__INVALID_ELF,
+	DSO_LOAD_ERRNO__CANNOT_READ_BUILDID,
+	DSO_LOAD_ERRNO__MISMATCHING_BUILDID,
+
+	/* for decompress_kmodule */
+	DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE,
+
+	__DSO_LOAD_ERRNO__END,
+};
+
 #define DSO__SWAP(dso, type, val)			\
 ({							\
 	type ____r = val;				\
@@ -113,6 +138,7 @@
 	enum dso_swap_type	needs_swap;
 	enum dso_binary_type	symtab_type;
 	enum dso_binary_type	binary_type;
+	enum dso_load_errno	load_errno;
 	u8		 adjust_symbols:1;
 	u8		 has_build_id:1;
 	u8		 has_srcline:1;
@@ -139,7 +165,8 @@
 		u32		 status_seen;
 		size_t		 file_size;
 		struct list_head open_entry;
-		u64		 frame_offset;
+		u64		 debug_frame_offset;
+		u64		 eh_frame_hdr_offset;
 	} data;
 
 	union { /* Tool specific area */
@@ -189,11 +216,24 @@
 int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
 				   char *root_dir, char *filename, size_t size);
 bool is_supported_compression(const char *ext);
-bool is_kmodule_extension(const char *ext);
-bool is_kernel_module(const char *pathname, bool *compressed);
+bool is_kernel_module(const char *pathname);
 bool decompress_to_file(const char *ext, const char *filename, int output_fd);
 bool dso__needs_decompress(struct dso *dso);
 
+struct kmod_path {
+	char *name;
+	char *ext;
+	bool  comp;
+	bool  kmod;
+};
+
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+		     bool alloc_name, bool alloc_ext);
+
+#define kmod_path__parse(__m, __p)      __kmod_path__parse(__m, __p, false, false)
+#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false)
+#define kmod_path__parse_ext(__m, __p)  __kmod_path__parse(__m, __p, false, true)
+
 /*
  * The dso__data_* external interface provides following functions:
  *   dso__data_fd
@@ -249,6 +289,7 @@
 				const char *short_name, int dso_type);
 
 void dsos__add(struct dsos *dsos, struct dso *dso);
+struct dso *dsos__addnew(struct dsos *dsos, const char *name);
 struct dso *dsos__find(const struct dsos *dsos, const char *name,
 		       bool cmp_short);
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
@@ -279,4 +320,6 @@
 
 enum dso_type dso__type(struct dso *dso, struct machine *machine);
 
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen);
+
 #endif /* __PERF_DSO */
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index cc66c40..c34e024 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -278,6 +278,21 @@
 }
 
 /**
+ * die_is_func_instance - Ensure that this DIE is an instance of a subprogram
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is an instance (which has an entry address).
+ * This returns true if @dw_die is a function instance. If not, you need to
+ * call die_walk_instances() to find actual instances.
+ **/
+bool die_is_func_instance(Dwarf_Die *dw_die)
+{
+	Dwarf_Addr tmp;
+
+	/* Actually gcc optimizes non-inline as like as inlined */
+	return !dwarf_func_inline(dw_die) && dwarf_entrypc(dw_die, &tmp) == 0;
+}
+/**
  * die_get_data_member_location - Get the data-member offset
  * @mb_die: a DIE of a member of a data structure
  * @offs: The offset of the member in the data structure
@@ -786,10 +801,16 @@
 {
 	const char *name = data;
 
-	if ((dwarf_tag(die_mem) == DW_TAG_member) &&
-	    die_compare_name(die_mem, name))
-		return DIE_FIND_CB_END;
-
+	if (dwarf_tag(die_mem) == DW_TAG_member) {
+		if (die_compare_name(die_mem, name))
+			return DIE_FIND_CB_END;
+		else if (!dwarf_diename(die_mem)) {	/* Unnamed structure */
+			Dwarf_Die type_die, tmp_die;
+			if (die_get_type(die_mem, &type_die) &&
+			    die_find_member(&type_die, name, &tmp_die))
+				return DIE_FIND_CB_END;
+		}
+	}
 	return DIE_FIND_CB_SIBLING;
 }
 
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index b4fe90c..af7dbcd 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -41,6 +41,9 @@
 /* Ensure that this DIE is a subprogram and definition (not declaration) */
 extern bool die_is_func_def(Dwarf_Die *dw_die);
 
+/* Ensure that this DIE is an instance of a subprogram */
+extern bool die_is_func_instance(Dwarf_Die *dw_die);
+
 /* Compare diename and tname */
 extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
 
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 6c6d044..ff866c4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -49,72 +49,103 @@
 	.period	   = 1,
 };
 
-static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len)
+/*
+ * Assumes that the first 4095 bytes of /proc/pid/stat contains
+ * the comm, tgid and ppid.
+ */
+static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
+				    pid_t *tgid, pid_t *ppid)
 {
 	char filename[PATH_MAX];
-	char bf[BUFSIZ];
-	FILE *fp;
-	size_t size = 0;
-	pid_t tgid = -1;
+	char bf[4096];
+	int fd;
+	size_t size = 0, n;
+	char *nl, *name, *tgids, *ppids;
+
+	*tgid = -1;
+	*ppid = -1;
 
 	snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
 
-	fp = fopen(filename, "r");
-	if (fp == NULL) {
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
 		pr_debug("couldn't open %s\n", filename);
-		return 0;
+		return -1;
 	}
 
-	while (!comm[0] || (tgid < 0)) {
-		if (fgets(bf, sizeof(bf), fp) == NULL) {
-			pr_warning("couldn't get COMM and pgid, malformed %s\n",
-				   filename);
-			break;
-		}
+	n = read(fd, bf, sizeof(bf) - 1);
+	close(fd);
+	if (n <= 0) {
+		pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
+			   pid);
+		return -1;
+	}
+	bf[n] = '\0';
 
-		if (memcmp(bf, "Name:", 5) == 0) {
-			char *name = bf + 5;
-			while (*name && isspace(*name))
-				++name;
-			size = strlen(name) - 1;
-			if (size >= len)
-				size = len - 1;
-			memcpy(comm, name, size);
-			comm[size] = '\0';
+	name = strstr(bf, "Name:");
+	tgids = strstr(bf, "Tgid:");
+	ppids = strstr(bf, "PPid:");
 
-		} else if (memcmp(bf, "Tgid:", 5) == 0) {
-			char *tgids = bf + 5;
-			while (*tgids && isspace(*tgids))
-				++tgids;
-			tgid = atoi(tgids);
-		}
+	if (name) {
+		name += 5;  /* strlen("Name:") */
+
+		while (*name && isspace(*name))
+			++name;
+
+		nl = strchr(name, '\n');
+		if (nl)
+			*nl = '\0';
+
+		size = strlen(name);
+		if (size >= len)
+			size = len - 1;
+		memcpy(comm, name, size);
+		comm[size] = '\0';
+	} else {
+		pr_debug("Name: string not found for pid %d\n", pid);
 	}
 
-	fclose(fp);
+	if (tgids) {
+		tgids += 5;  /* strlen("Tgid:") */
+		*tgid = atoi(tgids);
+	} else {
+		pr_debug("Tgid: string not found for pid %d\n", pid);
+	}
 
-	return tgid;
+	if (ppids) {
+		ppids += 5;  /* strlen("PPid:") */
+		*ppid = atoi(ppids);
+	} else {
+		pr_debug("PPid: string not found for pid %d\n", pid);
+	}
+
+	return 0;
 }
 
-static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
-					 union perf_event *event, pid_t pid,
-					 perf_event__handler_t process,
-					 struct machine *machine)
+static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
+				    struct machine *machine,
+				    pid_t *tgid, pid_t *ppid)
 {
 	size_t size;
-	pid_t tgid;
+
+	*ppid = -1;
 
 	memset(&event->comm, 0, sizeof(event->comm));
 
-	if (machine__is_host(machine))
-		tgid = perf_event__get_comm_tgid(pid, event->comm.comm,
-						 sizeof(event->comm.comm));
-	else
-		tgid = machine->pid;
+	if (machine__is_host(machine)) {
+		if (perf_event__get_comm_ids(pid, event->comm.comm,
+					     sizeof(event->comm.comm),
+					     tgid, ppid) != 0) {
+			return -1;
+		}
+	} else {
+		*tgid = machine->pid;
+	}
 
-	if (tgid < 0)
-		goto out;
+	if (*tgid < 0)
+		return -1;
 
-	event->comm.pid = tgid;
+	event->comm.pid = *tgid;
 	event->comm.header.type = PERF_RECORD_COMM;
 
 	size = strlen(event->comm.comm) + 1;
@@ -125,23 +156,45 @@
 				machine->id_hdr_size);
 	event->comm.tid = pid;
 
+	return 0;
+}
+
+static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+					 union perf_event *event, pid_t pid,
+					 perf_event__handler_t process,
+					 struct machine *machine)
+{
+	pid_t tgid, ppid;
+
+	if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
+		return -1;
+
 	if (process(tool, event, &synth_sample, machine) != 0)
 		return -1;
 
-out:
 	return tgid;
 }
 
 static int perf_event__synthesize_fork(struct perf_tool *tool,
-				       union perf_event *event, pid_t pid,
-				       pid_t tgid, perf_event__handler_t process,
+				       union perf_event *event,
+				       pid_t pid, pid_t tgid, pid_t ppid,
+				       perf_event__handler_t process,
 				       struct machine *machine)
 {
 	memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
 
-	/* this is really a clone event but we use fork to synthesize it */
-	event->fork.ppid = tgid;
-	event->fork.ptid = tgid;
+	/*
+	 * for main thread set parent to ppid from status file. For other
+	 * threads set parent pid to main thread. ie., assume main thread
+	 * spawns all threads in a process
+	*/
+	if (tgid == pid) {
+		event->fork.ppid = ppid;
+		event->fork.ptid = ppid;
+	} else {
+		event->fork.ppid = tgid;
+		event->fork.ptid = tgid;
+	}
 	event->fork.pid  = tgid;
 	event->fork.tid  = pid;
 	event->fork.header.type = PERF_RECORD_FORK;
@@ -333,7 +386,8 @@
 	char filename[PATH_MAX];
 	DIR *tasks;
 	struct dirent dirent, *next;
-	pid_t tgid;
+	pid_t tgid, ppid;
+	int rc = 0;
 
 	/* special case: only send one comm event using passed in pid */
 	if (!full) {
@@ -361,34 +415,38 @@
 
 	while (!readdir_r(tasks, &dirent, &next) && next) {
 		char *end;
-		int rc = 0;
 		pid_t _pid;
 
 		_pid = strtol(dirent.d_name, &end, 10);
 		if (*end)
 			continue;
 
-		tgid = perf_event__synthesize_comm(tool, comm_event, _pid,
-						   process, machine);
-		if (tgid == -1)
-			return -1;
+		rc = -1;
+		if (perf_event__prepare_comm(comm_event, _pid, machine,
+					     &tgid, &ppid) != 0)
+			break;
 
+		if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
+						ppid, process, machine) < 0)
+			break;
+		/*
+		 * Send the prepared comm event
+		 */
+		if (process(tool, comm_event, &synth_sample, machine) != 0)
+			break;
+
+		rc = 0;
 		if (_pid == pid) {
 			/* process the parent's maps too */
 			rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
 						process, machine, mmap_data);
-		} else {
-			/* only fork the tid's map, to save time */
-			rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
-						 process, machine);
+			if (rc)
+				break;
 		}
-
-		if (rc)
-			return rc;
 	}
 
 	closedir(tasks);
-	return 0;
+	return rc;
 }
 
 int perf_event__synthesize_thread_map(struct perf_tool *tool,
@@ -615,7 +673,7 @@
 	else
 		s = "";
 
-	return fprintf(fp, "%s: %s:%d\n", s, event->comm.comm, event->comm.tid);
+	return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
 }
 
 int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index c4ffe2b..09b9e8d 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -242,7 +242,6 @@
 	u32 nr_invalid_chains;
 	u32 nr_unknown_id;
 	u32 nr_unprocessable_samples;
-	u32 nr_unordered_events;
 };
 
 struct attr_event {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 28b8ce8..080be93 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -7,7 +7,6 @@
  * Released under the GPL v2. (and only v2, not any later version)
  */
 #include "util.h"
-#include <api/fs/debugfs.h>
 #include <api/fs/fs.h>
 #include <poll.h>
 #include "cpumap.h"
@@ -635,8 +634,8 @@
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
 	struct perf_mmap *md = &evlist->mmap[idx];
-	unsigned int head = perf_mmap__read_head(md);
-	unsigned int old = md->prev;
+	u64 head = perf_mmap__read_head(md);
+	u64 old = md->prev;
 	unsigned char *data = md->base + page_size;
 	union perf_event *event = NULL;
 
@@ -696,7 +695,7 @@
 
 static bool perf_mmap__empty(struct perf_mmap *md)
 {
-	return perf_mmap__read_head(md) != md->prev;
+	return perf_mmap__read_head(md) == md->prev;
 }
 
 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
@@ -717,7 +716,7 @@
 	struct perf_mmap *md = &evlist->mmap[idx];
 
 	if (!evlist->overwrite) {
-		unsigned int old = md->prev;
+		u64 old = md->prev;
 
 		perf_mmap__write_tail(md, old);
 	}
@@ -1051,7 +1050,7 @@
 	return -1;
 }
 
-int perf_evlist__apply_filters(struct perf_evlist *evlist)
+int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
 {
 	struct perf_evsel *evsel;
 	int err = 0;
@@ -1063,8 +1062,10 @@
 			continue;
 
 		err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter);
-		if (err)
+		if (err) {
+			*err_evsel = evsel;
 			break;
+		}
 	}
 
 	return err;
@@ -1086,6 +1087,38 @@
 	return err;
 }
 
+int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
+{
+	char *filter;
+	int ret = -1;
+	size_t i;
+
+	for (i = 0; i < npids; ++i) {
+		if (i == 0) {
+			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
+				return -1;
+		} else {
+			char *tmp;
+
+			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
+				goto out_free;
+
+			free(filter);
+			filter = tmp;
+		}
+	}
+
+	ret = perf_evlist__set_filter(evlist, filter);
+out_free:
+	free(filter);
+	return ret;
+}
+
+int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
+{
+	return perf_evlist__set_filter_pids(evlist, 1, &pid);
+}
+
 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
 {
 	struct perf_evsel *pos;
@@ -1329,7 +1362,7 @@
 		 * writing exactly one byte, in workload.cork_fd, usually via
 		 * perf_evlist__start_workload().
 		 *
-		 * For cancelling the workload without actuallin running it,
+		 * For cancelling the workload without actually running it,
 		 * the parent will just close workload.cork_fd, without writing
 		 * anything, i.e. read will return zero and we just exit()
 		 * here.
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index e99a676..b5cce95 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -27,7 +27,7 @@
 	void		 *base;
 	int		 mask;
 	int		 refcnt;
-	unsigned int	 prev;
+	u64		 prev;
 	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
 };
 
@@ -51,6 +51,7 @@
 	struct thread_map *threads;
 	struct cpu_map	  *cpus;
 	struct perf_evsel *selected;
+	struct events_stats stats;
 };
 
 struct perf_evsel_str_handler {
@@ -77,6 +78,8 @@
 			   const char *sys, const char *name, void *handler);
 
 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter);
+int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid);
+int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids);
 
 struct perf_evsel *
 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id);
@@ -149,7 +152,7 @@
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target);
-int perf_evlist__apply_filters(struct perf_evlist *evlist);
+int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel);
 
 void __perf_evlist__set_leader(struct list_head *list);
 void perf_evlist__set_leader(struct perf_evlist *evlist);
@@ -186,16 +189,15 @@
 int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
 
-static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
+static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->base;
-	int head = ACCESS_ONCE(pc->data_head);
+	u64 head = ACCESS_ONCE(pc->data_head);
 	rmb();
 	return head;
 }
 
-static inline void perf_mmap__write_tail(struct perf_mmap *md,
-					 unsigned long tail)
+static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 {
 	struct perf_event_mmap_page *pc = md->base;
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ea51a90..33e3fd8 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -32,8 +32,12 @@
 	bool exclude_guest;
 	bool mmap2;
 	bool cloexec;
+	bool clockid;
+	bool clockid_wrong;
 } perf_missing_features;
 
+static clockid_t clockid;
+
 static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused)
 {
 	return 0;
@@ -537,13 +541,30 @@
 }
 
 static void
-perf_evsel__config_callgraph(struct perf_evsel *evsel)
+perf_evsel__config_callgraph(struct perf_evsel *evsel,
+			     struct record_opts *opts)
 {
 	bool function = perf_evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->attr;
 
 	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
+	if (callchain_param.record_mode == CALLCHAIN_LBR) {
+		if (!opts->branch_stack) {
+			if (attr->exclude_user) {
+				pr_warning("LBR callstack option is only available "
+					   "to get user callchain information. "
+					   "Falling back to framepointers.\n");
+			} else {
+				perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+				attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+							PERF_SAMPLE_BRANCH_CALL_STACK;
+			}
+		} else
+			 pr_warning("Cannot use LBR callstack with branch stack. "
+				    "Falling back to framepointers.\n");
+	}
+
 	if (callchain_param.record_mode == CALLCHAIN_DWARF) {
 		if (!function) {
 			perf_evsel__set_sample_bit(evsel, REGS_USER);
@@ -667,7 +688,7 @@
 		evsel->attr.exclude_callchain_user = 1;
 
 	if (callchain_param.enabled && !evsel->no_aux_samples)
-		perf_evsel__config_callgraph(evsel);
+		perf_evsel__config_callgraph(evsel, opts);
 
 	if (opts->sample_intr_regs) {
 		attr->sample_regs_intr = PERF_REGS_MASK;
@@ -717,6 +738,12 @@
 	if (opts->sample_transaction)
 		perf_evsel__set_sample_bit(evsel, TRANSACTION);
 
+	if (opts->running_time) {
+		evsel->attr.read_format |=
+			PERF_FORMAT_TOTAL_TIME_ENABLED |
+			PERF_FORMAT_TOTAL_TIME_RUNNING;
+	}
+
 	/*
 	 * XXX see the function comment above
 	 *
@@ -738,6 +765,12 @@
 		attr->disabled = 0;
 		attr->enable_on_exec = 0;
 	}
+
+	clockid = opts->clockid;
+	if (opts->use_clockid) {
+		attr->use_clockid = 1;
+		attr->clockid = opts->clockid;
+	}
 }
 
 static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -978,67 +1011,126 @@
 	return fd;
 }
 
-#define __PRINT_ATTR(fmt, cast, field)  \
-	fprintf(fp, "  %-19s "fmt"\n", #field, cast attr->field)
+struct bit_names {
+	int bit;
+	const char *name;
+};
 
-#define PRINT_ATTR_U32(field)  __PRINT_ATTR("%u" , , field)
-#define PRINT_ATTR_X32(field)  __PRINT_ATTR("%#x", , field)
-#define PRINT_ATTR_U64(field)  __PRINT_ATTR("%" PRIu64, (uint64_t), field)
-#define PRINT_ATTR_X64(field)  __PRINT_ATTR("%#"PRIx64, (uint64_t), field)
-
-#define PRINT_ATTR2N(name1, field1, name2, field2)	\
-	fprintf(fp, "  %-19s %u    %-19s %u\n",		\
-	name1, attr->field1, name2, attr->field2)
-
-#define PRINT_ATTR2(field1, field2) \
-	PRINT_ATTR2N(#field1, field1, #field2, field2)
-
-static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp)
+static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
 {
-	size_t ret = 0;
+	bool first_bit = true;
+	int i = 0;
 
-	ret += fprintf(fp, "%.60s\n", graph_dotted_line);
-	ret += fprintf(fp, "perf_event_attr:\n");
+	do {
+		if (value & bits[i].bit) {
+			buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
+			first_bit = false;
+		}
+	} while (bits[++i].name != NULL);
+}
 
-	ret += PRINT_ATTR_U32(type);
-	ret += PRINT_ATTR_U32(size);
-	ret += PRINT_ATTR_X64(config);
-	ret += PRINT_ATTR_U64(sample_period);
-	ret += PRINT_ATTR_U64(sample_freq);
-	ret += PRINT_ATTR_X64(sample_type);
-	ret += PRINT_ATTR_X64(read_format);
+static void __p_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_##n, #n }
+	struct bit_names bits[] = {
+		bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
+		bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
+		bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
+		bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
+		bit_name(IDENTIFIER), bit_name(REGS_INTR),
+		{ .name = NULL, }
+	};
+#undef bit_name
+	__p_bits(buf, size, value, bits);
+}
 
-	ret += PRINT_ATTR2(disabled, inherit);
-	ret += PRINT_ATTR2(pinned, exclusive);
-	ret += PRINT_ATTR2(exclude_user, exclude_kernel);
-	ret += PRINT_ATTR2(exclude_hv, exclude_idle);
-	ret += PRINT_ATTR2(mmap, comm);
-	ret += PRINT_ATTR2(mmap2, comm_exec);
-	ret += PRINT_ATTR2(freq, inherit_stat);
-	ret += PRINT_ATTR2(enable_on_exec, task);
-	ret += PRINT_ATTR2(watermark, precise_ip);
-	ret += PRINT_ATTR2(mmap_data, sample_id_all);
-	ret += PRINT_ATTR2(exclude_host, exclude_guest);
-	ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel,
-			    "excl.callchain_user", exclude_callchain_user);
+static void __p_read_format(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_FORMAT_##n, #n }
+	struct bit_names bits[] = {
+		bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
+		bit_name(ID), bit_name(GROUP),
+		{ .name = NULL, }
+	};
+#undef bit_name
+	__p_bits(buf, size, value, bits);
+}
 
-	ret += PRINT_ATTR_U32(wakeup_events);
-	ret += PRINT_ATTR_U32(wakeup_watermark);
-	ret += PRINT_ATTR_X32(bp_type);
-	ret += PRINT_ATTR_X64(bp_addr);
-	ret += PRINT_ATTR_X64(config1);
-	ret += PRINT_ATTR_U64(bp_len);
-	ret += PRINT_ATTR_X64(config2);
-	ret += PRINT_ATTR_X64(branch_sample_type);
-	ret += PRINT_ATTR_X64(sample_regs_user);
-	ret += PRINT_ATTR_U32(sample_stack_user);
-	ret += PRINT_ATTR_X64(sample_regs_intr);
+#define BUF_SIZE		1024
 
-	ret += fprintf(fp, "%.60s\n", graph_dotted_line);
+#define p_hex(val)		snprintf(buf, BUF_SIZE, "%"PRIx64, (uint64_t)(val))
+#define p_unsigned(val)		snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
+#define p_signed(val)		snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
+#define p_sample_type(val)	__p_sample_type(buf, BUF_SIZE, val)
+#define p_read_format(val)	__p_read_format(buf, BUF_SIZE, val)
+
+#define PRINT_ATTRn(_n, _f, _p)				\
+do {							\
+	if (attr->_f) {					\
+		_p(attr->_f);				\
+		ret += attr__fprintf(fp, _n, buf, priv);\
+	}						\
+} while (0)
+
+#define PRINT_ATTRf(_f, _p)	PRINT_ATTRn(#_f, _f, _p)
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+			     attr__fprintf_f attr__fprintf, void *priv)
+{
+	char buf[BUF_SIZE];
+	int ret = 0;
+
+	PRINT_ATTRf(type, p_unsigned);
+	PRINT_ATTRf(size, p_unsigned);
+	PRINT_ATTRf(config, p_hex);
+	PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
+	PRINT_ATTRf(sample_type, p_sample_type);
+	PRINT_ATTRf(read_format, p_read_format);
+
+	PRINT_ATTRf(disabled, p_unsigned);
+	PRINT_ATTRf(inherit, p_unsigned);
+	PRINT_ATTRf(pinned, p_unsigned);
+	PRINT_ATTRf(exclusive, p_unsigned);
+	PRINT_ATTRf(exclude_user, p_unsigned);
+	PRINT_ATTRf(exclude_kernel, p_unsigned);
+	PRINT_ATTRf(exclude_hv, p_unsigned);
+	PRINT_ATTRf(exclude_idle, p_unsigned);
+	PRINT_ATTRf(mmap, p_unsigned);
+	PRINT_ATTRf(comm, p_unsigned);
+	PRINT_ATTRf(freq, p_unsigned);
+	PRINT_ATTRf(inherit_stat, p_unsigned);
+	PRINT_ATTRf(enable_on_exec, p_unsigned);
+	PRINT_ATTRf(task, p_unsigned);
+	PRINT_ATTRf(watermark, p_unsigned);
+	PRINT_ATTRf(precise_ip, p_unsigned);
+	PRINT_ATTRf(mmap_data, p_unsigned);
+	PRINT_ATTRf(sample_id_all, p_unsigned);
+	PRINT_ATTRf(exclude_host, p_unsigned);
+	PRINT_ATTRf(exclude_guest, p_unsigned);
+	PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
+	PRINT_ATTRf(exclude_callchain_user, p_unsigned);
+	PRINT_ATTRf(mmap2, p_unsigned);
+	PRINT_ATTRf(comm_exec, p_unsigned);
+	PRINT_ATTRf(use_clockid, p_unsigned);
+
+	PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
+	PRINT_ATTRf(bp_type, p_unsigned);
+	PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
+	PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
+	PRINT_ATTRf(sample_regs_user, p_hex);
+	PRINT_ATTRf(sample_stack_user, p_unsigned);
+	PRINT_ATTRf(clockid, p_signed);
+	PRINT_ATTRf(sample_regs_intr, p_hex);
 
 	return ret;
 }
 
+static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
+				void *priv __attribute__((unused)))
+{
+	return fprintf(fp, "  %-32s %s\n", name, val);
+}
+
 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 			      struct thread_map *threads)
 {
@@ -1062,6 +1154,12 @@
 	}
 
 fallback_missing_features:
+	if (perf_missing_features.clockid_wrong)
+		evsel->attr.clockid = CLOCK_MONOTONIC; /* should always work */
+	if (perf_missing_features.clockid) {
+		evsel->attr.use_clockid = 0;
+		evsel->attr.clockid = 0;
+	}
 	if (perf_missing_features.cloexec)
 		flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
 	if (perf_missing_features.mmap2)
@@ -1072,8 +1170,12 @@
 	if (perf_missing_features.sample_id_all)
 		evsel->attr.sample_id_all = 0;
 
-	if (verbose >= 2)
-		perf_event_attr__fprintf(&evsel->attr, stderr);
+	if (verbose >= 2) {
+		fprintf(stderr, "%.60s\n", graph_dotted_line);
+		fprintf(stderr, "perf_event_attr:\n");
+		perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL);
+		fprintf(stderr, "%.60s\n", graph_dotted_line);
+	}
 
 	for (cpu = 0; cpu < cpus->nr; cpu++) {
 
@@ -1099,6 +1201,17 @@
 				goto try_fallback;
 			}
 			set_rlimit = NO_CHANGE;
+
+			/*
+			 * If we succeeded but had to kill clockid, fail and
+			 * have perf_evsel__open_strerror() print us a nice
+			 * error.
+			 */
+			if (perf_missing_features.clockid ||
+			    perf_missing_features.clockid_wrong) {
+				err = -EINVAL;
+				goto out_close;
+			}
 		}
 	}
 
@@ -1132,7 +1245,17 @@
 	if (err != -EINVAL || cpu > 0 || thread > 0)
 		goto out_close;
 
-	if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
+	/*
+	 * Must probe features in the order they were added to the
+	 * perf_event_attr interface.
+	 */
+	if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
+		perf_missing_features.clockid_wrong = true;
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.clockid && evsel->attr.use_clockid) {
+		perf_missing_features.clockid = true;
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
 		perf_missing_features.cloexec = true;
 		goto fallback_missing_features;
 	} else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
@@ -1892,7 +2015,7 @@
 		value = *(u32 *)ptr;
 		break;
 	case 8:
-		value = *(u64 *)ptr;
+		memcpy(&value, ptr, sizeof(u64));
 		break;
 	default:
 		return 0;
@@ -1933,62 +2056,9 @@
 	return ret;
 }
 
-static int __if_fprintf(FILE *fp, bool *first, const char *field, u64 value)
+static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
 {
-	if (value == 0)
-		return 0;
-
-	return comma_fprintf(fp, first, " %s: %" PRIu64, field, value);
-}
-
-#define if_print(field) printed += __if_fprintf(fp, &first, #field, evsel->attr.field)
-
-struct bit_names {
-	int bit;
-	const char *name;
-};
-
-static int bits__fprintf(FILE *fp, const char *field, u64 value,
-			 struct bit_names *bits, bool *first)
-{
-	int i = 0, printed = comma_fprintf(fp, first, " %s: ", field);
-	bool first_bit = true;
-
-	do {
-		if (value & bits[i].bit) {
-			printed += fprintf(fp, "%s%s", first_bit ? "" : "|", bits[i].name);
-			first_bit = false;
-		}
-	} while (bits[++i].name != NULL);
-
-	return printed;
-}
-
-static int sample_type__fprintf(FILE *fp, bool *first, u64 value)
-{
-#define bit_name(n) { PERF_SAMPLE_##n, #n }
-	struct bit_names bits[] = {
-		bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
-		bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
-		bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
-		bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
-		bit_name(IDENTIFIER), bit_name(REGS_INTR),
-		{ .name = NULL, }
-	};
-#undef bit_name
-	return bits__fprintf(fp, "sample_type", value, bits, first);
-}
-
-static int read_format__fprintf(FILE *fp, bool *first, u64 value)
-{
-#define bit_name(n) { PERF_FORMAT_##n, #n }
-	struct bit_names bits[] = {
-		bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
-		bit_name(ID), bit_name(GROUP),
-		{ .name = NULL, }
-	};
-#undef bit_name
-	return bits__fprintf(fp, "read_format", value, bits, first);
+	return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
 }
 
 int perf_evsel__fprintf(struct perf_evsel *evsel,
@@ -2017,47 +2087,13 @@
 
 	printed += fprintf(fp, "%s", perf_evsel__name(evsel));
 
-	if (details->verbose || details->freq) {
+	if (details->verbose) {
+		printed += perf_event_attr__fprintf(fp, &evsel->attr,
+						    __print_attr__fprintf, &first);
+	} else if (details->freq) {
 		printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64,
 					 (u64)evsel->attr.sample_freq);
 	}
-
-	if (details->verbose) {
-		if_print(type);
-		if_print(config);
-		if_print(config1);
-		if_print(config2);
-		if_print(size);
-		printed += sample_type__fprintf(fp, &first, evsel->attr.sample_type);
-		if (evsel->attr.read_format)
-			printed += read_format__fprintf(fp, &first, evsel->attr.read_format);
-		if_print(disabled);
-		if_print(inherit);
-		if_print(pinned);
-		if_print(exclusive);
-		if_print(exclude_user);
-		if_print(exclude_kernel);
-		if_print(exclude_hv);
-		if_print(exclude_idle);
-		if_print(mmap);
-		if_print(mmap2);
-		if_print(comm);
-		if_print(comm_exec);
-		if_print(freq);
-		if_print(inherit_stat);
-		if_print(enable_on_exec);
-		if_print(task);
-		if_print(watermark);
-		if_print(precise_ip);
-		if_print(mmap_data);
-		if_print(sample_id_all);
-		if_print(exclude_host);
-		if_print(exclude_guest);
-		if_print(__reserved_1);
-		if_print(wakeup_events);
-		if_print(bp_type);
-		if_print(branch_sample_type);
-	}
 out:
 	fputc('\n', fp);
 	return ++printed;
@@ -2135,6 +2171,12 @@
 	"The PMU counters are busy/taken by another profiler.\n"
 	"We found oprofile daemon running, please stop it and try again.");
 		break;
+	case EINVAL:
+		if (perf_missing_features.clockid)
+			return scnprintf(msg, size, "clockid feature not supported.");
+		if (perf_missing_features.clockid_wrong)
+			return scnprintf(msg, size, "wrong clockid (%d).", clockid);
+		break;
 	default:
 		break;
 	}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3862274..e486151 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -335,6 +335,7 @@
 	bool freq;
 	bool verbose;
 	bool event_group;
+	bool force;
 };
 
 int perf_evsel__fprintf(struct perf_evsel *evsel,
@@ -355,4 +356,14 @@
      (_evsel) && (_evsel)->leader == (_leader);					\
      (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
 
+static inline bool has_branch_callstack(struct perf_evsel *evsel)
+{
+	return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
+}
+
+typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+			     attr__fprintf_f attr__fprintf, void *priv);
+
 #endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1f407f7..918fd8a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1055,6 +1055,12 @@
 	goto out;
 }
 
+static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val,
+				void *priv __attribute__((unused)))
+{
+	return fprintf(fp, ", %s = %s", name, val);
+}
+
 static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
 {
 	struct perf_evsel *evsel, *events = read_event_desc(ph, fd);
@@ -1069,26 +1075,6 @@
 	for (evsel = events; evsel->attr.size; evsel++) {
 		fprintf(fp, "# event : name = %s, ", evsel->name);
 
-		fprintf(fp, "type = %d, config = 0x%"PRIx64
-			    ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64,
-				evsel->attr.type,
-				(u64)evsel->attr.config,
-				(u64)evsel->attr.config1,
-				(u64)evsel->attr.config2);
-
-		fprintf(fp, ", excl_usr = %d, excl_kern = %d",
-				evsel->attr.exclude_user,
-				evsel->attr.exclude_kernel);
-
-		fprintf(fp, ", excl_host = %d, excl_guest = %d",
-				evsel->attr.exclude_host,
-				evsel->attr.exclude_guest);
-
-		fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip);
-
-		fprintf(fp, ", attr_mmap2 = %d", evsel->attr.mmap2);
-		fprintf(fp, ", attr_mmap  = %d", evsel->attr.mmap);
-		fprintf(fp, ", attr_mmap_data = %d", evsel->attr.mmap_data);
 		if (evsel->ids) {
 			fprintf(fp, ", id = {");
 			for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) {
@@ -1099,6 +1085,8 @@
 			fprintf(fp, " }");
 		}
 
+		perf_event_attr__fprintf(fp, &evsel->attr, __desc_attr__fprintf, NULL);
+
 		fputc('\n', fp);
 	}
 
@@ -1266,7 +1254,7 @@
 
 		dso__set_build_id(dso, &bev->build_id);
 
-		if (!is_kernel_module(filename, NULL))
+		if (!is_kernel_module(filename))
 			dso->kernel = dso_type;
 
 		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
@@ -2516,8 +2504,11 @@
 		if (read_attr(fd, header, &f_attr) < 0)
 			goto out_errno;
 
-		if (header->needs_swap)
+		if (header->needs_swap) {
+			f_attr.ids.size   = bswap_64(f_attr.ids.size);
+			f_attr.ids.offset = bswap_64(f_attr.ids.offset);
 			perf_event__attr_swap(&f_attr.attr);
+		}
 
 		tmp = lseek(fd, 0, SEEK_CUR);
 		evsel = perf_evsel__new(&f_attr.attr);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 70b48a6..cc22b91 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -263,15 +263,9 @@
 	while (next) {
 		n = rb_entry(next, struct hist_entry, rb_node);
 		next = rb_next(&n->rb_node);
-		/*
-		 * We may be annotating this, for instance, so keep it here in
-		 * case some it gets new samples, we'll eventually free it when
-		 * the user stops browsing and it agains gets fully decayed.
-		 */
 		if (((zap_user && n->level == '.') ||
 		     (zap_kernel && n->level != '.') ||
-		     hists__decay_entry(hists, n)) &&
-		    !n->used) {
+		     hists__decay_entry(hists, n))) {
 			hists__delete_entry(hists, n);
 		}
 	}
@@ -355,6 +349,7 @@
 			callchain_init(he->callchain);
 
 		INIT_LIST_HEAD(&he->pairs.node);
+		thread__get(he->thread);
 	}
 
 	return he;
@@ -941,6 +936,7 @@
 
 void hist_entry__delete(struct hist_entry *he)
 {
+	thread__zput(he->thread);
 	zfree(&he->branch_info);
 	zfree(&he->mem_info);
 	zfree(&he->stat_acc);
@@ -1169,6 +1165,7 @@
 	/* force fold unfiltered entry for simplicity */
 	h->ms.unfolded = false;
 	h->row_offset = 0;
+	h->nr_rows = 0;
 
 	hists->stats.nr_non_filtered_samples += h->stat.nr_events;
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2b690d0..9f31b89 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -60,7 +60,7 @@
 	struct rb_root		entries_collapsed;
 	u64			nr_entries;
 	u64			nr_non_filtered_entries;
-	const struct thread	*thread_filter;
+	struct thread		*thread_filter;
 	const struct dso	*dso_filter;
 	const char		*uid_filter_str;
 	const char		*symbol_filter_str;
@@ -303,6 +303,9 @@
 
 #ifdef HAVE_SLANG_SUPPORT
 #include "../ui/keysyms.h"
+int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt);
+
 int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 			     struct hist_browser_timer *hbt);
 
@@ -321,6 +324,12 @@
 {
 	return 0;
 }
+static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
+					   struct perf_evsel *evsel __maybe_unused,
+					   struct hist_browser_timer *hbt __maybe_unused)
+{
+	return 0;
+}
 
 static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
 					   struct perf_evsel *evsel __maybe_unused,
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index cf1d7913..ae825d4 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -99,6 +99,7 @@
 	int timerfd;
 	unsigned int display_time;
 	bool live;
+	bool force;
 };
 
 struct kvm_reg_events_ops {
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
new file mode 100644
index 0000000..95a1acb
--- /dev/null
+++ b/tools/perf/util/lzma.c
@@ -0,0 +1,95 @@
+#include <lzma.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+#include "util.h"
+#include "debug.h"
+
+#define BUFSIZE 8192
+
+static const char *lzma_strerror(lzma_ret ret)
+{
+	switch ((int) ret) {
+	case LZMA_MEM_ERROR:
+		return "Memory allocation failed";
+	case LZMA_OPTIONS_ERROR:
+		return "Unsupported decompressor flags";
+	case LZMA_FORMAT_ERROR:
+		return "The input is not in the .xz format";
+	case LZMA_DATA_ERROR:
+		return "Compressed file is corrupt";
+	case LZMA_BUF_ERROR:
+		return "Compressed file is truncated or otherwise corrupt";
+	default:
+		return "Unknown error, possibly a bug";
+	}
+}
+
+int lzma_decompress_to_file(const char *input, int output_fd)
+{
+	lzma_action action = LZMA_RUN;
+	lzma_stream strm   = LZMA_STREAM_INIT;
+	lzma_ret ret;
+
+	u8 buf_in[BUFSIZE];
+	u8 buf_out[BUFSIZE];
+	FILE *infile;
+
+	infile = fopen(input, "rb");
+	if (!infile) {
+		pr_err("lzma: fopen failed on %s: '%s'\n",
+		       input, strerror(errno));
+		return -1;
+	}
+
+	ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
+	if (ret != LZMA_OK) {
+		pr_err("lzma: lzma_stream_decoder failed %s (%d)\n",
+			lzma_strerror(ret), ret);
+		return -1;
+	}
+
+	strm.next_in   = NULL;
+	strm.avail_in  = 0;
+	strm.next_out  = buf_out;
+	strm.avail_out = sizeof(buf_out);
+
+	while (1) {
+		if (strm.avail_in == 0 && !feof(infile)) {
+			strm.next_in  = buf_in;
+			strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile);
+
+			if (ferror(infile)) {
+				pr_err("lzma: read error: %s\n", strerror(errno));
+				return -1;
+			}
+
+			if (feof(infile))
+				action = LZMA_FINISH;
+		}
+
+		ret = lzma_code(&strm, action);
+
+		if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
+			ssize_t write_size = sizeof(buf_out) - strm.avail_out;
+
+			if (writen(output_fd, buf_out, write_size) != write_size) {
+				pr_err("lzma: write error: %s\n", strerror(errno));
+				return -1;
+			}
+
+			strm.next_out  = buf_out;
+			strm.avail_out = sizeof(buf_out);
+		}
+
+		if (ret != LZMA_OK) {
+			if (ret == LZMA_STREAM_END)
+				return 0;
+
+			pr_err("lzma: failed %s\n", lzma_strerror(ret));
+			return -1;
+		}
+	}
+
+	fclose(infile);
+	return 0;
+}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 1bca3a9..527e032 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -89,16 +89,6 @@
 	}
 }
 
-void machine__delete_dead_threads(struct machine *machine)
-{
-	struct thread *n, *t;
-
-	list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
-		list_del(&t->node);
-		thread__delete(t);
-	}
-}
-
 void machine__delete_threads(struct machine *machine)
 {
 	struct rb_node *nd = rb_first(&machine->threads);
@@ -106,9 +96,8 @@
 	while (nd) {
 		struct thread *t = rb_entry(nd, struct thread, rb_node);
 
-		rb_erase(&t->rb_node, &machine->threads);
 		nd = rb_next(nd);
-		thread__delete(t);
+		machine__remove_thread(machine, t);
 	}
 }
 
@@ -361,9 +350,13 @@
 	 * the full rbtree:
 	 */
 	th = machine->last_match;
-	if (th && th->tid == tid) {
-		machine__update_thread_pid(machine, th, pid);
-		return th;
+	if (th != NULL) {
+		if (th->tid == tid) {
+			machine__update_thread_pid(machine, th, pid);
+			return th;
+		}
+
+		thread__zput(machine->last_match);
 	}
 
 	while (*p != NULL) {
@@ -371,7 +364,7 @@
 		th = rb_entry(parent, struct thread, rb_node);
 
 		if (th->tid == tid) {
-			machine->last_match = th;
+			machine->last_match = thread__get(th);
 			machine__update_thread_pid(machine, th, pid);
 			return th;
 		}
@@ -403,8 +396,11 @@
 			thread__delete(th);
 			return NULL;
 		}
-
-		machine->last_match = th;
+		/*
+		 * It is now in the rbtree, get a ref
+		 */
+		thread__get(th);
+		machine->last_match = thread__get(th);
 	}
 
 	return th;
@@ -462,30 +458,61 @@
 	return 0;
 }
 
+static struct dso*
+machine__module_dso(struct machine *machine, struct kmod_path *m,
+		    const char *filename)
+{
+	struct dso *dso;
+
+	dso = dsos__find(&machine->kernel_dsos, m->name, true);
+	if (!dso) {
+		dso = dsos__addnew(&machine->kernel_dsos, m->name);
+		if (dso == NULL)
+			return NULL;
+
+		if (machine__is_host(machine))
+			dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+		else
+			dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+
+		/* _KMODULE_COMP should be next to _KMODULE */
+		if (m->kmod && m->comp)
+			dso->symtab_type++;
+
+		dso__set_short_name(dso, strdup(m->name), true);
+		dso__set_long_name(dso, strdup(filename), true);
+	}
+
+	return dso;
+}
+
 struct map *machine__new_module(struct machine *machine, u64 start,
 				const char *filename)
 {
-	struct map *map;
-	struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename);
-	bool compressed;
+	struct map *map = NULL;
+	struct dso *dso;
+	struct kmod_path m;
 
-	if (dso == NULL)
+	if (kmod_path__parse_name(&m, filename))
 		return NULL;
 
+	map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION,
+				       m.name);
+	if (map)
+		goto out;
+
+	dso = machine__module_dso(machine, &m, filename);
+	if (dso == NULL)
+		goto out;
+
 	map = map__new2(start, dso, MAP__FUNCTION);
 	if (map == NULL)
-		return NULL;
-
-	if (machine__is_host(machine))
-		dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
-	else
-		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
-
-	/* _KMODULE_COMP should be next to _KMODULE */
-	if (is_kernel_module(filename, &compressed) && compressed)
-		dso->symtab_type++;
+		goto out;
 
 	map_groups__insert(&machine->kmaps, map);
+
+out:
+	free(m.name);
 	return map;
 }
 
@@ -650,6 +677,9 @@
 			machine->vmlinux_maps[type]->unmap_ip =
 				identity__map_ip;
 		kmap = map__kmap(machine->vmlinux_maps[type]);
+		if (!kmap)
+			return -1;
+
 		kmap->kmaps = &machine->kmaps;
 		map_groups__insert(&machine->kmaps,
 				   machine->vmlinux_maps[type]);
@@ -671,7 +701,7 @@
 		kmap = map__kmap(machine->vmlinux_maps[type]);
 		map_groups__remove(&machine->kmaps,
 				   machine->vmlinux_maps[type]);
-		if (kmap->ref_reloc_sym) {
+		if (kmap && kmap->ref_reloc_sym) {
 			/*
 			 * ref_reloc_sym is shared among all maps, so free just
 			 * on one of them.
@@ -827,6 +857,39 @@
 	return strdup(name);
 }
 
+static bool is_kmod_dso(struct dso *dso)
+{
+	return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+	       dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE;
+}
+
+static int map_groups__set_module_path(struct map_groups *mg, const char *path,
+				       struct kmod_path *m)
+{
+	struct map *map;
+	char *long_name;
+
+	map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name);
+	if (map == NULL)
+		return 0;
+
+	long_name = strdup(path);
+	if (long_name == NULL)
+		return -ENOMEM;
+
+	dso__set_long_name(map->dso, long_name, true);
+	dso__kernel_module_get_build_id(map->dso, "");
+
+	/*
+	 * Full name could reveal us kmod compression, so
+	 * we need to update the symtab_type if needed.
+	 */
+	if (m->comp && is_kmod_dso(map->dso))
+		map->dso->symtab_type++;
+
+	return 0;
+}
+
 static int map_groups__set_modules_path_dir(struct map_groups *mg,
 				const char *dir_name, int depth)
 {
@@ -865,35 +928,19 @@
 			if (ret < 0)
 				goto out;
 		} else {
-			char *dot = strrchr(dent->d_name, '.'),
-			     dso_name[PATH_MAX];
-			struct map *map;
-			char *long_name;
+			struct kmod_path m;
 
-			if (dot == NULL)
-				continue;
-
-			/* On some system, modules are compressed like .ko.gz */
-			if (is_supported_compression(dot + 1) &&
-			    is_kmodule_extension(dot - 2))
-				dot -= 3;
-
-			snprintf(dso_name, sizeof(dso_name), "[%.*s]",
-				 (int)(dot - dent->d_name), dent->d_name);
-
-			strxfrchar(dso_name, '-', '_');
-			map = map_groups__find_by_name(mg, MAP__FUNCTION,
-						       dso_name);
-			if (map == NULL)
-				continue;
-
-			long_name = strdup(path);
-			if (long_name == NULL) {
-				ret = -1;
+			ret = kmod_path__parse_name(&m, dent->d_name);
+			if (ret)
 				goto out;
-			}
-			dso__set_long_name(map->dso, long_name, true);
-			dso__kernel_module_get_build_id(map->dso, "");
+
+			if (m.kmod)
+				ret = map_groups__set_module_path(mg, path, &m);
+
+			free(m.name);
+
+			if (ret)
+				goto out;
 		}
 	}
 
@@ -1046,40 +1093,11 @@
 				strlen(kmmap_prefix) - 1) == 0;
 	if (event->mmap.filename[0] == '/' ||
 	    (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
-
-		char short_module_name[1024];
-		char *name, *dot;
-
-		if (event->mmap.filename[0] == '/') {
-			name = strrchr(event->mmap.filename, '/');
-			if (name == NULL)
-				goto out_problem;
-
-			++name; /* skip / */
-			dot = strrchr(name, '.');
-			if (dot == NULL)
-				goto out_problem;
-			/* On some system, modules are compressed like .ko.gz */
-			if (is_supported_compression(dot + 1))
-				dot -= 3;
-			if (!is_kmodule_extension(dot + 1))
-				goto out_problem;
-			snprintf(short_module_name, sizeof(short_module_name),
-					"[%.*s]", (int)(dot - name), name);
-			strxfrchar(short_module_name, '-', '_');
-		} else
-			strcpy(short_module_name, event->mmap.filename);
-
 		map = machine__new_module(machine, event->mmap.start,
 					  event->mmap.filename);
 		if (map == NULL)
 			goto out_problem;
 
-		name = strdup(short_module_name);
-		if (name == NULL)
-			goto out_problem;
-
-		dso__set_short_name(map->dso, name, true);
 		map->end = map->start + event->mmap.len;
 	} else if (is_kernel_mmap) {
 		const char *symbol_name = (event->mmap.filename +
@@ -1092,7 +1110,7 @@
 		struct dso *dso;
 
 		list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
-			if (is_kernel_module(dso->long_name, NULL))
+			if (is_kernel_module(dso->long_name))
 				continue;
 
 			kernel = dso;
@@ -1236,15 +1254,19 @@
 	return 0;
 }
 
-static void machine__remove_thread(struct machine *machine, struct thread *th)
+void machine__remove_thread(struct machine *machine, struct thread *th)
 {
-	machine->last_match = NULL;
+	if (machine->last_match == th)
+		thread__zput(machine->last_match);
+
 	rb_erase(&th->rb_node, &machine->threads);
 	/*
-	 * We may have references to this thread, for instance in some hist_entry
-	 * instances, so just move them to a separate list.
+	 * Move it first to the dead_threads list, then drop the reference,
+	 * if this is the last reference, then the thread__delete destructor
+	 * will be called and we will remove it from the dead_threads list.
 	 */
 	list_add_tail(&th->node, &machine->dead_threads);
+	thread__put(th);
 }
 
 int machine__process_fork_event(struct machine *machine, union perf_event *event,
@@ -1387,29 +1409,27 @@
 static int add_callchain_ip(struct thread *thread,
 			    struct symbol **parent,
 			    struct addr_location *root_al,
-			    bool branch_history,
+			    u8 *cpumode,
 			    u64 ip)
 {
 	struct addr_location al;
 
 	al.filtered = 0;
 	al.sym = NULL;
-	if (branch_history)
+	if (!cpumode) {
 		thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
 						   ip, &al);
-	else {
-		u8 cpumode = PERF_RECORD_MISC_USER;
-
+	} else {
 		if (ip >= PERF_CONTEXT_MAX) {
 			switch (ip) {
 			case PERF_CONTEXT_HV:
-				cpumode = PERF_RECORD_MISC_HYPERVISOR;
+				*cpumode = PERF_RECORD_MISC_HYPERVISOR;
 				break;
 			case PERF_CONTEXT_KERNEL:
-				cpumode = PERF_RECORD_MISC_KERNEL;
+				*cpumode = PERF_RECORD_MISC_KERNEL;
 				break;
 			case PERF_CONTEXT_USER:
-				cpumode = PERF_RECORD_MISC_USER;
+				*cpumode = PERF_RECORD_MISC_USER;
 				break;
 			default:
 				pr_debug("invalid callchain context: "
@@ -1423,8 +1443,8 @@
 			}
 			return 0;
 		}
-		thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
-				   ip, &al);
+		thread__find_addr_location(thread, *cpumode, MAP__FUNCTION,
+					   ip, &al);
 	}
 
 	if (al.sym != NULL) {
@@ -1502,18 +1522,102 @@
 	return nr;
 }
 
-static int thread__resolve_callchain_sample(struct thread *thread,
-					     struct ip_callchain *chain,
-					     struct branch_stack *branch,
-					     struct symbol **parent,
-					     struct addr_location *root_al,
-					     int max_stack)
+/*
+ * Recolve LBR callstack chain sample
+ * Return:
+ * 1 on success get LBR callchain information
+ * 0 no available LBR callchain information, should try fp
+ * negative error code on other errors.
+ */
+static int resolve_lbr_callchain_sample(struct thread *thread,
+					struct perf_sample *sample,
+					struct symbol **parent,
+					struct addr_location *root_al,
+					int max_stack)
 {
+	struct ip_callchain *chain = sample->callchain;
 	int chain_nr = min(max_stack, (int)chain->nr);
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	int i, j, err;
+	u64 ip;
+
+	for (i = 0; i < chain_nr; i++) {
+		if (chain->ips[i] == PERF_CONTEXT_USER)
+			break;
+	}
+
+	/* LBR only affects the user callchain */
+	if (i != chain_nr) {
+		struct branch_stack *lbr_stack = sample->branch_stack;
+		int lbr_nr = lbr_stack->nr;
+		/*
+		 * LBR callstack can only get user call chain.
+		 * The mix_chain_nr is kernel call chain
+		 * number plus LBR user call chain number.
+		 * i is kernel call chain number,
+		 * 1 is PERF_CONTEXT_USER,
+		 * lbr_nr + 1 is the user call chain number.
+		 * For details, please refer to the comments
+		 * in callchain__printf
+		 */
+		int mix_chain_nr = i + 1 + lbr_nr + 1;
+
+		if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) {
+			pr_warning("corrupted callchain. skipping...\n");
+			return 0;
+		}
+
+		for (j = 0; j < mix_chain_nr; j++) {
+			if (callchain_param.order == ORDER_CALLEE) {
+				if (j < i + 1)
+					ip = chain->ips[j];
+				else if (j > i + 1)
+					ip = lbr_stack->entries[j - i - 2].from;
+				else
+					ip = lbr_stack->entries[0].to;
+			} else {
+				if (j < lbr_nr)
+					ip = lbr_stack->entries[lbr_nr - j - 1].from;
+				else if (j > lbr_nr)
+					ip = chain->ips[i + 1 - (j - lbr_nr)];
+				else
+					ip = lbr_stack->entries[0].to;
+			}
+
+			err = add_callchain_ip(thread, parent, root_al, &cpumode, ip);
+			if (err)
+				return (err < 0) ? err : 0;
+		}
+		return 1;
+	}
+
+	return 0;
+}
+
+static int thread__resolve_callchain_sample(struct thread *thread,
+					    struct perf_evsel *evsel,
+					    struct perf_sample *sample,
+					    struct symbol **parent,
+					    struct addr_location *root_al,
+					    int max_stack)
+{
+	struct branch_stack *branch = sample->branch_stack;
+	struct ip_callchain *chain = sample->callchain;
+	int chain_nr = min(max_stack, (int)chain->nr);
+	u8 cpumode = PERF_RECORD_MISC_USER;
 	int i, j, err;
 	int skip_idx = -1;
 	int first_call = 0;
 
+	callchain_cursor_reset(&callchain_cursor);
+
+	if (has_branch_callstack(evsel)) {
+		err = resolve_lbr_callchain_sample(thread, sample, parent,
+						   root_al, max_stack);
+		if (err)
+			return (err < 0) ? err : 0;
+	}
+
 	/*
 	 * Based on DWARF debug information, some architectures skip
 	 * a callchain entry saved by the kernel.
@@ -1521,8 +1625,6 @@
 	if (chain->nr < PERF_MAX_STACK_DEPTH)
 		skip_idx = arch_skip_callchain_idx(thread, chain);
 
-	callchain_cursor_reset(&callchain_cursor);
-
 	/*
 	 * Add branches to call stack for easier browsing. This gives
 	 * more context for a sample than just the callers.
@@ -1568,10 +1670,10 @@
 
 		for (i = 0; i < nr; i++) {
 			err = add_callchain_ip(thread, parent, root_al,
-					       true, be[i].to);
+					       NULL, be[i].to);
 			if (!err)
 				err = add_callchain_ip(thread, parent, root_al,
-						       true, be[i].from);
+						       NULL, be[i].from);
 			if (err == -EINVAL)
 				break;
 			if (err)
@@ -1600,7 +1702,7 @@
 #endif
 		ip = chain->ips[j];
 
-		err = add_callchain_ip(thread, parent, root_al, false, ip);
+		err = add_callchain_ip(thread, parent, root_al, &cpumode, ip);
 
 		if (err)
 			return (err < 0) ? err : 0;
@@ -1623,9 +1725,9 @@
 			      struct addr_location *root_al,
 			      int max_stack)
 {
-	int ret = thread__resolve_callchain_sample(thread, sample->callchain,
-						   sample->branch_stack,
-						   parent, root_al, max_stack);
+	int ret = thread__resolve_callchain_sample(thread, evsel,
+						   sample, parent,
+						   root_al, max_stack);
 	if (ret)
 		return ret;
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index e8b7779..6d64ced 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -118,9 +118,9 @@
 struct machine *machine__new_host(void);
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
 void machine__exit(struct machine *machine);
-void machine__delete_dead_threads(struct machine *machine);
 void machine__delete_threads(struct machine *machine);
 void machine__delete(struct machine *machine);
+void machine__remove_thread(struct machine *machine, struct thread *th);
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 					   struct addr_location *al);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 62ca9f2..a14f08f 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -778,3 +778,23 @@
 		return rb_entry(next, struct map, rb_node);
 	return NULL;
 }
+
+struct kmap *map__kmap(struct map *map)
+{
+	if (!map->dso || !map->dso->kernel) {
+		pr_err("Internal error: map__kmap with a non-kernel map\n");
+		return NULL;
+	}
+	return (struct kmap *)(map + 1);
+}
+
+struct map_groups *map__kmaps(struct map *map)
+{
+	struct kmap *kmap = map__kmap(map);
+
+	if (!kmap || !kmap->kmaps) {
+		pr_err("Internal error: map__kmaps with a non-kernel map\n");
+		return NULL;
+	}
+	return kmap->kmaps;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e42438..ec19c59 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -76,10 +76,8 @@
 
 void map_groups__put(struct map_groups *mg);
 
-static inline struct kmap *map__kmap(struct map *map)
-{
-	return (struct kmap *)(map + 1);
-}
+struct kmap *map__kmap(struct map *map);
+struct map_groups *map__kmaps(struct map *map);
 
 static inline u64 map__map_ip(struct map *map, u64 ip)
 {
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index fd4be94..52be201 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -2,7 +2,6 @@
 #include <linux/compiler.h>
 #include <linux/string.h>
 #include "ordered-events.h"
-#include "evlist.h"
 #include "session.h"
 #include "asm/bug.h"
 #include "debug.h"
@@ -131,8 +130,8 @@
 	return new;
 }
 
-struct ordered_event *
-ordered_events__new(struct ordered_events *oe, u64 timestamp,
+static struct ordered_event *
+ordered_events__new_event(struct ordered_events *oe, u64 timestamp,
 		    union perf_event *event)
 {
 	struct ordered_event *new;
@@ -153,20 +152,47 @@
 	free_dup_event(oe, event->event);
 }
 
-static int __ordered_events__flush(struct perf_session *s,
-				   struct perf_tool *tool)
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+			  struct perf_sample *sample, u64 file_offset)
 {
-	struct ordered_events *oe = &s->ordered_events;
+	u64 timestamp = sample->time;
+	struct ordered_event *oevent;
+
+	if (!timestamp || timestamp == ~0ULL)
+		return -ETIME;
+
+	if (timestamp < oe->last_flush) {
+		pr_oe_time(timestamp,      "out of order event\n");
+		pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n",
+			   oe->last_flush_type);
+
+		oe->nr_unordered_events++;
+	}
+
+	oevent = ordered_events__new_event(oe, timestamp, event);
+	if (!oevent) {
+		ordered_events__flush(oe, OE_FLUSH__HALF);
+		oevent = ordered_events__new_event(oe, timestamp, event);
+	}
+
+	if (!oevent)
+		return -ENOMEM;
+
+	oevent->file_offset = file_offset;
+	return 0;
+}
+
+static int __ordered_events__flush(struct ordered_events *oe)
+{
 	struct list_head *head = &oe->events;
 	struct ordered_event *tmp, *iter;
-	struct perf_sample sample;
 	u64 limit = oe->next_flush;
 	u64 last_ts = oe->last ? oe->last->timestamp : 0ULL;
 	bool show_progress = limit == ULLONG_MAX;
 	struct ui_progress prog;
 	int ret;
 
-	if (!tool->ordered_events || !limit)
+	if (!limit)
 		return 0;
 
 	if (show_progress)
@@ -178,16 +204,9 @@
 
 		if (iter->timestamp > limit)
 			break;
-
-		ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample);
+		ret = oe->deliver(oe, iter);
 		if (ret)
-			pr_err("Can't parse sample, err = %d\n", ret);
-		else {
-			ret = perf_session__deliver_event(s, iter->event, &sample, tool,
-							  iter->file_offset);
-			if (ret)
-				return ret;
-		}
+			return ret;
 
 		ordered_events__delete(oe, iter);
 		oe->last_flush = iter->timestamp;
@@ -204,10 +223,8 @@
 	return 0;
 }
 
-int ordered_events__flush(struct perf_session *s, struct perf_tool *tool,
-			  enum oe_flush how)
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
 {
-	struct ordered_events *oe = &s->ordered_events;
 	static const char * const str[] = {
 		"NONE",
 		"FINAL",
@@ -216,6 +233,9 @@
 	};
 	int err;
 
+	if (oe->nr_events == 0)
+		return 0;
+
 	switch (how) {
 	case OE_FLUSH__FINAL:
 		oe->next_flush = ULLONG_MAX;
@@ -248,7 +268,7 @@
 		   str[how], oe->nr_events);
 	pr_oe_time(oe->max_timestamp, "max_timestamp\n");
 
-	err = __ordered_events__flush(s, tool);
+	err = __ordered_events__flush(oe);
 
 	if (!err) {
 		if (how == OE_FLUSH__ROUND)
@@ -264,13 +284,14 @@
 	return err;
 }
 
-void ordered_events__init(struct ordered_events *oe)
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver)
 {
 	INIT_LIST_HEAD(&oe->events);
 	INIT_LIST_HEAD(&oe->cache);
 	INIT_LIST_HEAD(&oe->to_free);
 	oe->max_alloc_size = (u64) -1;
 	oe->cur_alloc_size = 0;
+	oe->deliver	   = deliver;
 }
 
 void ordered_events__free(struct ordered_events *oe)
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 7b8f9b0..f403991 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -2,9 +2,8 @@
 #define __ORDERED_EVENTS_H
 
 #include <linux/types.h>
-#include "tool.h"
 
-struct perf_session;
+struct perf_sample;
 
 struct ordered_event {
 	u64			timestamp;
@@ -20,6 +19,11 @@
 	OE_FLUSH__HALF,
 };
 
+struct ordered_events;
+
+typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
+					 struct ordered_event *event);
+
 struct ordered_events {
 	u64			last_flush;
 	u64			next_flush;
@@ -31,18 +35,19 @@
 	struct list_head	to_free;
 	struct ordered_event	*buffer;
 	struct ordered_event	*last;
+	ordered_events__deliver_t deliver;
 	int			buffer_idx;
 	unsigned int		nr_events;
 	enum oe_flush		last_flush_type;
+	u32			nr_unordered_events;
 	bool                    copy_on_queue;
 };
 
-struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp,
-					  union perf_event *event);
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+			  struct perf_sample *sample, u64 file_offset);
 void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
-int ordered_events__flush(struct perf_session *s, struct perf_tool *tool,
-			  enum oe_flush how);
-void ordered_events__init(struct ordered_events *oe);
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver);
 void ordered_events__free(struct ordered_events *oe);
 
 static inline
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 7f8ec6c..be06553 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -20,11 +20,6 @@
 
 #define MAX_NAME_LEN 100
 
-struct event_symbol {
-	const char	*symbol;
-	const char	*alias;
-};
-
 #ifdef PARSER_DEBUG
 extern int parse_events_debug;
 #endif
@@ -39,7 +34,7 @@
  */
 static int perf_pmu_events_list_num;
 
-static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
+struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_CPU_CYCLES] = {
 		.symbol = "cpu-cycles",
 		.alias  = "cycles",
@@ -82,7 +77,7 @@
 	},
 };
 
-static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
+struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
 	[PERF_COUNT_SW_CPU_CLOCK] = {
 		.symbol = "cpu-clock",
 		.alias  = "",
@@ -175,9 +170,6 @@
 	char evt_path[MAXPATHLEN];
 	char dir_path[MAXPATHLEN];
 
-	if (debugfs_valid_mountpoint(tracing_events_path))
-		return NULL;
-
 	sys_dir = opendir(tracing_events_path);
 	if (!sys_dir)
 		return NULL;
@@ -473,12 +465,6 @@
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
 				char *sys, char *event)
 {
-	int ret;
-
-	ret = debugfs_valid_mountpoint(tracing_events_path);
-	if (ret)
-		return ret;
-
 	if (strpbrk(sys, "*?"))
 		return add_tracepoint_multi_sys(list, idx, sys, event);
 	else
@@ -723,6 +709,7 @@
 	int eh;
 	int eH;
 	int eG;
+	int eI;
 	int precise;
 	int exclude_GH;
 	int sample_read;
@@ -737,6 +724,7 @@
 	int eh = evsel ? evsel->attr.exclude_hv : 0;
 	int eH = evsel ? evsel->attr.exclude_host : 0;
 	int eG = evsel ? evsel->attr.exclude_guest : 0;
+	int eI = evsel ? evsel->attr.exclude_idle : 0;
 	int precise = evsel ? evsel->attr.precise_ip : 0;
 	int sample_read = 0;
 	int pinned = evsel ? evsel->attr.pinned : 0;
@@ -767,6 +755,8 @@
 			if (!exclude_GH)
 				exclude_GH = eG = eH = 1;
 			eH = 0;
+		} else if (*str == 'I') {
+			eI = 1;
 		} else if (*str == 'p') {
 			precise++;
 			/* use of precise requires exclude_guest */
@@ -800,6 +790,7 @@
 	mod->eh = eh;
 	mod->eH = eH;
 	mod->eG = eG;
+	mod->eI = eI;
 	mod->precise = precise;
 	mod->exclude_GH = exclude_GH;
 	mod->sample_read = sample_read;
@@ -817,7 +808,7 @@
 	char *p = str;
 
 	/* The sizeof includes 0 byte as well. */
-	if (strlen(str) > (sizeof("ukhGHpppSD") - 1))
+	if (strlen(str) > (sizeof("ukhGHpppSDI") - 1))
 		return -1;
 
 	while (*p) {
@@ -853,6 +844,7 @@
 		evsel->attr.precise_ip     = mod.precise;
 		evsel->attr.exclude_host   = mod.eH;
 		evsel->attr.exclude_guest  = mod.eG;
+		evsel->attr.exclude_idle   = mod.eI;
 		evsel->exclude_GH          = mod.exclude_GH;
 		evsel->sample_read         = mod.sample_read;
 
@@ -1098,6 +1090,14 @@
 	"Hardware breakpoint",
 };
 
+static int cmp_string(const void *a, const void *b)
+{
+	const char * const *as = a;
+	const char * const *bs = b;
+
+	return strcmp(*as, *bs);
+}
+
 /*
  * Print the events from <debugfs_mount_point>/tracing/events
  */
@@ -1109,18 +1109,21 @@
 	struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
 	char evt_path[MAXPATHLEN];
 	char dir_path[MAXPATHLEN];
-	char sbuf[STRERR_BUFSIZE];
+	char **evt_list = NULL;
+	unsigned int evt_i = 0, evt_num = 0;
+	bool evt_num_known = false;
 
-	if (debugfs_valid_mountpoint(tracing_events_path)) {
-		printf("  [ Tracepoints not available: %s ]\n",
-			strerror_r(errno, sbuf, sizeof(sbuf)));
-		return;
-	}
-
+restart:
 	sys_dir = opendir(tracing_events_path);
 	if (!sys_dir)
 		return;
 
+	if (evt_num_known) {
+		evt_list = zalloc(sizeof(char *) * evt_num);
+		if (!evt_list)
+			goto out_close_sys_dir;
+	}
+
 	for_each_subsystem(sys_dir, sys_dirent, sys_next) {
 		if (subsys_glob != NULL &&
 		    !strglobmatch(sys_dirent.d_name, subsys_glob))
@@ -1137,19 +1140,56 @@
 			    !strglobmatch(evt_dirent.d_name, event_glob))
 				continue;
 
-			if (name_only) {
-				printf("%s:%s ", sys_dirent.d_name, evt_dirent.d_name);
+			if (!evt_num_known) {
+				evt_num++;
 				continue;
 			}
 
 			snprintf(evt_path, MAXPATHLEN, "%s:%s",
 				 sys_dirent.d_name, evt_dirent.d_name);
-			printf("  %-50s [%s]\n", evt_path,
-				event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+
+			evt_list[evt_i] = strdup(evt_path);
+			if (evt_list[evt_i] == NULL)
+				goto out_close_evt_dir;
+			evt_i++;
 		}
 		closedir(evt_dir);
 	}
 	closedir(sys_dir);
+
+	if (!evt_num_known) {
+		evt_num_known = true;
+		goto restart;
+	}
+	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+	evt_i = 0;
+	while (evt_i < evt_num) {
+		if (name_only) {
+			printf("%s ", evt_list[evt_i++]);
+			continue;
+		}
+		printf("  %-50s [%s]\n", evt_list[evt_i++],
+				event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+	}
+	if (evt_num)
+		printf("\n");
+
+out_free:
+	evt_num = evt_i;
+	for (evt_i = 0; evt_i < evt_num; evt_i++)
+		zfree(&evt_list[evt_i]);
+	zfree(&evt_list);
+	return;
+
+out_close_evt_dir:
+	closedir(evt_dir);
+out_close_sys_dir:
+	closedir(sys_dir);
+
+	printf("FATAL: not enough memory to print %s\n",
+			event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+	if (evt_list)
+		goto out_free;
 }
 
 /*
@@ -1163,9 +1203,6 @@
 	char evt_path[MAXPATHLEN];
 	char dir_path[MAXPATHLEN];
 
-	if (debugfs_valid_mountpoint(tracing_events_path))
-		return 0;
-
 	sys_dir = opendir(tracing_events_path);
 	if (!sys_dir)
 		return 0;
@@ -1233,38 +1270,19 @@
 	return ret;
 }
 
-static void __print_events_type(u8 type, struct event_symbol *syms,
-				unsigned max)
-{
-	char name[64];
-	unsigned i;
-
-	for (i = 0; i < max ; i++, syms++) {
-		if (!is_event_supported(type, i))
-			continue;
-
-		if (strlen(syms->alias))
-			snprintf(name, sizeof(name),  "%s OR %s",
-				 syms->symbol, syms->alias);
-		else
-			snprintf(name, sizeof(name), "%s", syms->symbol);
-
-		printf("  %-50s [%s]\n", name, event_type_descriptors[type]);
-	}
-}
-
-void print_events_type(u8 type)
-{
-	if (type == PERF_TYPE_SOFTWARE)
-		__print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX);
-	else
-		__print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX);
-}
-
 int print_hwcache_events(const char *event_glob, bool name_only)
 {
-	unsigned int type, op, i, printed = 0;
+	unsigned int type, op, i, evt_i = 0, evt_num = 0;
 	char name[64];
+	char **evt_list = NULL;
+	bool evt_num_known = false;
+
+restart:
+	if (evt_num_known) {
+		evt_list = zalloc(sizeof(char *) * evt_num);
+		if (!evt_list)
+			goto out_enomem;
+	}
 
 	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
 		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
@@ -1282,27 +1300,66 @@
 							type | (op << 8) | (i << 16)))
 					continue;
 
-				if (name_only)
-					printf("%s ", name);
-				else
-					printf("  %-50s [%s]\n", name,
-					       event_type_descriptors[PERF_TYPE_HW_CACHE]);
-				++printed;
+				if (!evt_num_known) {
+					evt_num++;
+					continue;
+				}
+
+				evt_list[evt_i] = strdup(name);
+				if (evt_list[evt_i] == NULL)
+					goto out_enomem;
+				evt_i++;
 			}
 		}
 	}
 
-	if (printed)
+	if (!evt_num_known) {
+		evt_num_known = true;
+		goto restart;
+	}
+	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+	evt_i = 0;
+	while (evt_i < evt_num) {
+		if (name_only) {
+			printf("%s ", evt_list[evt_i++]);
+			continue;
+		}
+		printf("  %-50s [%s]\n", evt_list[evt_i++],
+				event_type_descriptors[PERF_TYPE_HW_CACHE]);
+	}
+	if (evt_num)
 		printf("\n");
-	return printed;
+
+out_free:
+	evt_num = evt_i;
+	for (evt_i = 0; evt_i < evt_num; evt_i++)
+		zfree(&evt_list[evt_i]);
+	zfree(&evt_list);
+	return evt_num;
+
+out_enomem:
+	printf("FATAL: not enough memory to print %s\n", event_type_descriptors[PERF_TYPE_HW_CACHE]);
+	if (evt_list)
+		goto out_free;
+	return evt_num;
 }
 
-static void print_symbol_events(const char *event_glob, unsigned type,
+void print_symbol_events(const char *event_glob, unsigned type,
 				struct event_symbol *syms, unsigned max,
 				bool name_only)
 {
-	unsigned i, printed = 0;
+	unsigned int i, evt_i = 0, evt_num = 0;
 	char name[MAX_NAME_LEN];
+	char **evt_list = NULL;
+	bool evt_num_known = false;
+
+restart:
+	if (evt_num_known) {
+		evt_list = zalloc(sizeof(char *) * evt_num);
+		if (!evt_list)
+			goto out_enomem;
+		syms -= max;
+	}
 
 	for (i = 0; i < max; i++, syms++) {
 
@@ -1314,23 +1371,49 @@
 		if (!is_event_supported(type, i))
 			continue;
 
-		if (name_only) {
-			printf("%s ", syms->symbol);
+		if (!evt_num_known) {
+			evt_num++;
 			continue;
 		}
 
-		if (strlen(syms->alias))
+		if (!name_only && strlen(syms->alias))
 			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
 		else
 			strncpy(name, syms->symbol, MAX_NAME_LEN);
 
-		printf("  %-50s [%s]\n", name, event_type_descriptors[type]);
-
-		printed++;
+		evt_list[evt_i] = strdup(name);
+		if (evt_list[evt_i] == NULL)
+			goto out_enomem;
+		evt_i++;
 	}
 
-	if (printed)
+	if (!evt_num_known) {
+		evt_num_known = true;
+		goto restart;
+	}
+	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+	evt_i = 0;
+	while (evt_i < evt_num) {
+		if (name_only) {
+			printf("%s ", evt_list[evt_i++]);
+			continue;
+		}
+		printf("  %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]);
+	}
+	if (evt_num)
 		printf("\n");
+
+out_free:
+	evt_num = evt_i;
+	for (evt_i = 0; evt_i < evt_num; evt_i++)
+		zfree(&evt_list[evt_i]);
+	zfree(&evt_list);
+	return;
+
+out_enomem:
+	printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]);
+	if (evt_list)
+		goto out_free;
 }
 
 /*
@@ -1338,11 +1421,6 @@
  */
 void print_events(const char *event_glob, bool name_only)
 {
-	if (!name_only) {
-		printf("\n");
-		printf("List of pre-defined events (to be used in -e):\n");
-	}
-
 	print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
 			    event_symbols_hw, PERF_COUNT_HW_MAX, name_only);
 
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index ff6e1fa..52a2dda 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -116,12 +116,21 @@
 void parse_events_error(void *data, void *scanner, char const *msg);
 
 void print_events(const char *event_glob, bool name_only);
-void print_events_type(u8 type);
+
+struct event_symbol {
+	const char	*symbol;
+	const char	*alias;
+};
+extern struct event_symbol event_symbols_hw[];
+extern struct event_symbol event_symbols_sw[];
+void print_symbol_events(const char *event_glob, unsigned type,
+				struct event_symbol *syms, unsigned max,
+				bool name_only);
 void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
 			     bool name_only);
 int print_hwcache_events(const char *event_glob, bool name_only);
 extern int is_valid_tracepoint(const char *event_string);
 
-extern int valid_debugfs_mount(const char *debugfs);
+int valid_event_mount(const char *eventfs);
 
 #endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 94eacb6..8895cf3 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -101,7 +101,7 @@
 name		[a-zA-Z_*?][a-zA-Z0-9_*?]*
 name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?]*
 /* If you add a modifier you need to update check_modifier() */
-modifier_event	[ukhpGHSD]+
+modifier_event	[ukhpGHSDI]+
 modifier_bp	[rwx]{1,3}
 
 %%
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
index 4a015f7..01626be 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/perf/util/parse-options.c
@@ -37,6 +37,7 @@
 {
 	const char *s, *arg = NULL;
 	const int unset = flags & OPT_UNSET;
+	int err;
 
 	if (unset && p->opt)
 		return opterror(opt, "takes no value", flags);
@@ -114,13 +115,29 @@
 		return 0;
 
 	case OPTION_STRING:
+		err = 0;
 		if (unset)
 			*(const char **)opt->value = NULL;
 		else if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
 			*(const char **)opt->value = (const char *)opt->defval;
 		else
-			return get_arg(p, opt, flags, (const char **)opt->value);
-		return 0;
+			err = get_arg(p, opt, flags, (const char **)opt->value);
+
+		/* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */
+		if (opt->flags & PARSE_OPT_NOEMPTY) {
+			const char *val = *(const char **)opt->value;
+
+			if (!val)
+				return err;
+
+			/* Similar to unset if we are given an empty string. */
+			if (val[0] == '\0') {
+				*(const char **)opt->value = NULL;
+				return 0;
+			}
+		}
+
+		return err;
 
 	case OPTION_CALLBACK:
 		if (unset)
@@ -505,13 +522,18 @@
 		break;
 	case PARSE_OPT_LIST_OPTS:
 		while (options->type != OPTION_END) {
-			printf("--%s ", options->long_name);
+			if (options->long_name)
+				printf("--%s ", options->long_name);
 			options++;
 		}
+		putchar('\n');
 		exit(130);
 	case PARSE_OPT_LIST_SUBCMDS:
-		for (int i = 0; subcommands[i]; i++)
-			printf("%s ", subcommands[i]);
+		if (subcommands) {
+			for (int i = 0; subcommands[i]; i++)
+				printf("%s ", subcommands[i]);
+		}
+		putchar('\n');
 		exit(130);
 	default: /* PARSE_OPT_UNKNOWN */
 		if (ctx.argv[0][1] == '-') {
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index 97b153f..59561fd 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -40,6 +40,7 @@
 	PARSE_OPT_LASTARG_DEFAULT = 16,
 	PARSE_OPT_DISABLED = 32,
 	PARSE_OPT_EXCLUSIVE = 64,
+	PARSE_OPT_NOEMPTY  = 128,
 };
 
 struct option;
@@ -122,6 +123,7 @@
 #define OPT_LONG(s, l, v, h)        { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
 #define OPT_U64(s, l, v, h)         { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
 #define OPT_STRING(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) }
+#define OPT_STRING_NOEMPTY(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
 #define OPT_DATE(s, l, v, h) \
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
 #define OPT_CALLBACK(s, l, v, a, h, f) \
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 919937e..30545ce 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -41,6 +41,7 @@
 #include "symbol.h"
 #include "thread.h"
 #include <api/fs/debugfs.h>
+#include <api/fs/tracefs.h>
 #include "trace-event.h"	/* For __maybe_unused */
 #include "probe-event.h"
 #include "probe-finder.h"
@@ -79,6 +80,7 @@
 	int ret;
 
 	symbol_conf.sort_by_name = true;
+	symbol_conf.allow_aliases = true;
 	ret = symbol__init(NULL);
 	if (ret < 0) {
 		pr_debug("Failed to init symbol map.\n");
@@ -133,6 +135,8 @@
 		return NULL;
 
 	kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]);
+	if (!kmap)
+		return NULL;
 	return kmap->ref_reloc_sym;
 }
 
@@ -150,7 +154,7 @@
 		sym = __find_kernel_function_by_name(name, &map);
 		if (sym)
 			return map->unmap_ip(map, sym->start) -
-				(reloc) ? 0 : map->reloc;
+				((reloc) ? 0 : map->reloc);
 	}
 	return 0;
 }
@@ -177,6 +181,25 @@
 	return NULL;
 }
 
+static struct map *get_target_map(const char *target, bool user)
+{
+	/* Init maps of given executable or kernel */
+	if (user)
+		return dso__new_map(target);
+	else
+		return kernel_get_module_map(target);
+}
+
+static void put_target_map(struct map *map, bool user)
+{
+	if (map && user) {
+		/* Only the user map needs to be released */
+		dso__delete(map->dso);
+		map__delete(map);
+	}
+}
+
+
 static struct dso *kernel_get_module_dso(const char *module)
 {
 	struct dso *dso;
@@ -248,6 +271,13 @@
 	return ret;
 }
 
+static void clear_perf_probe_point(struct perf_probe_point *pp)
+{
+	free(pp->file);
+	free(pp->function);
+	free(pp->lazy_line);
+}
+
 static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
 {
 	int i;
@@ -257,6 +287,103 @@
 }
 
 #ifdef HAVE_DWARF_SUPPORT
+/*
+ * Some binaries like glibc have special symbols which are on the symbol
+ * table, but not in the debuginfo. If we can find the address of the
+ * symbol from map, we can translate the address back to the probe point.
+ */
+static int find_alternative_probe_point(struct debuginfo *dinfo,
+					struct perf_probe_point *pp,
+					struct perf_probe_point *result,
+					const char *target, bool uprobes)
+{
+	struct map *map = NULL;
+	struct symbol *sym;
+	u64 address = 0;
+	int ret = -ENOENT;
+
+	/* This can work only for function-name based one */
+	if (!pp->function || pp->file)
+		return -ENOTSUP;
+
+	map = get_target_map(target, uprobes);
+	if (!map)
+		return -EINVAL;
+
+	/* Find the address of given function */
+	map__for_each_symbol_by_name(map, pp->function, sym) {
+		if (uprobes)
+			address = sym->start;
+		else
+			address = map->unmap_ip(map, sym->start);
+		break;
+	}
+	if (!address) {
+		ret = -ENOENT;
+		goto out;
+	}
+	pr_debug("Symbol %s address found : %" PRIx64 "\n",
+			pp->function, address);
+
+	ret = debuginfo__find_probe_point(dinfo, (unsigned long)address,
+					  result);
+	if (ret <= 0)
+		ret = (!ret) ? -ENOENT : ret;
+	else {
+		result->offset += pp->offset;
+		result->line += pp->line;
+		ret = 0;
+	}
+
+out:
+	put_target_map(map, uprobes);
+	return ret;
+
+}
+
+static int get_alternative_probe_event(struct debuginfo *dinfo,
+				       struct perf_probe_event *pev,
+				       struct perf_probe_point *tmp,
+				       const char *target)
+{
+	int ret;
+
+	memcpy(tmp, &pev->point, sizeof(*tmp));
+	memset(&pev->point, 0, sizeof(pev->point));
+	ret = find_alternative_probe_point(dinfo, tmp, &pev->point,
+					   target, pev->uprobes);
+	if (ret < 0)
+		memcpy(&pev->point, tmp, sizeof(*tmp));
+
+	return ret;
+}
+
+static int get_alternative_line_range(struct debuginfo *dinfo,
+				      struct line_range *lr,
+				      const char *target, bool user)
+{
+	struct perf_probe_point pp = { .function = lr->function,
+				       .file = lr->file,
+				       .line = lr->start };
+	struct perf_probe_point result;
+	int ret, len = 0;
+
+	memset(&result, 0, sizeof(result));
+
+	if (lr->end != INT_MAX)
+		len = lr->end - lr->start;
+	ret = find_alternative_probe_point(dinfo, &pp, &result,
+					   target, user);
+	if (!ret) {
+		lr->function = result.function;
+		lr->file = result.file;
+		lr->start = result.line;
+		if (lr->end != INT_MAX)
+			lr->end = lr->start + len;
+		clear_perf_probe_point(&pp);
+	}
+	return ret;
+}
 
 /* Open new debuginfo of given module */
 static struct debuginfo *open_debuginfo(const char *module, bool silent)
@@ -465,6 +592,7 @@
 					  int max_tevs, const char *target)
 {
 	bool need_dwarf = perf_probe_event_need_dwarf(pev);
+	struct perf_probe_point tmp;
 	struct debuginfo *dinfo;
 	int ntevs, ret = 0;
 
@@ -481,6 +609,20 @@
 	/* Searching trace events corresponding to a probe event */
 	ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs);
 
+	if (ntevs == 0)	{  /* Not found, retry with an alternative */
+		ret = get_alternative_probe_event(dinfo, pev, &tmp, target);
+		if (!ret) {
+			ntevs = debuginfo__find_trace_events(dinfo, pev,
+							     tevs, max_tevs);
+			/*
+			 * Write back to the original probe_event for
+			 * setting appropriate (user given) event name
+			 */
+			clear_perf_probe_point(&pev->point);
+			memcpy(&pev->point, &tmp, sizeof(tmp));
+		}
+	}
+
 	debuginfo__delete(dinfo);
 
 	if (ntevs > 0) {	/* Succeeded to find trace events */
@@ -495,11 +637,9 @@
 	}
 
 	if (ntevs == 0)	{	/* No error but failed to find probe point. */
-		pr_warning("Probe point '%s' not found in debuginfo.\n",
+		pr_warning("Probe point '%s' not found.\n",
 			   synthesize_perf_probe_point(&pev->point));
-		if (need_dwarf)
-			return -ENOENT;
-		return 0;
+		return -ENOENT;
 	}
 	/* Error path : ntevs < 0 */
 	pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
@@ -532,7 +672,7 @@
 		else {
 			if (access(raw_path, R_OK) == 0) {
 				*new_path = strdup(raw_path);
-				return 0;
+				return *new_path ? 0 : -ENOMEM;
 			} else
 				return -errno;
 		}
@@ -548,9 +688,11 @@
 		if (access(*new_path, R_OK) == 0)
 			return 0;
 
-		if (!symbol_conf.source_prefix)
+		if (!symbol_conf.source_prefix) {
 			/* In case of searching comp_dir, don't retry */
+			zfree(new_path);
 			return -errno;
+		}
 
 		switch (errno) {
 		case ENAMETOOLONG:
@@ -622,7 +764,8 @@
  * Show line-range always requires debuginfo to find source file and
  * line number.
  */
-static int __show_line_range(struct line_range *lr, const char *module)
+static int __show_line_range(struct line_range *lr, const char *module,
+			     bool user)
 {
 	int l = 1;
 	struct int_node *ln;
@@ -638,6 +781,11 @@
 		return -ENOENT;
 
 	ret = debuginfo__find_line_range(dinfo, lr);
+	if (!ret) {	/* Not found, retry with an alternative */
+		ret = get_alternative_line_range(dinfo, lr, module, user);
+		if (!ret)
+			ret = debuginfo__find_line_range(dinfo, lr);
+	}
 	debuginfo__delete(dinfo);
 	if (ret == 0 || ret == -ENOENT) {
 		pr_warning("Specified source line is not found.\n");
@@ -650,7 +798,11 @@
 	/* Convert source file path */
 	tmp = lr->path;
 	ret = get_real_path(tmp, lr->comp_dir, &lr->path);
-	free(tmp);	/* Free old path */
+
+	/* Free old path when new path is assigned */
+	if (tmp != lr->path)
+		free(tmp);
+
 	if (ret < 0) {
 		pr_warning("Failed to find source file path.\n");
 		return ret;
@@ -707,7 +859,7 @@
 	ret = init_symbol_maps(user);
 	if (ret < 0)
 		return ret;
-	ret = __show_line_range(lr, module);
+	ret = __show_line_range(lr, module, user);
 	exit_symbol_maps();
 
 	return ret;
@@ -716,12 +868,13 @@
 static int show_available_vars_at(struct debuginfo *dinfo,
 				  struct perf_probe_event *pev,
 				  int max_vls, struct strfilter *_filter,
-				  bool externs)
+				  bool externs, const char *target)
 {
 	char *buf;
 	int ret, i, nvars;
 	struct str_node *node;
 	struct variable_list *vls = NULL, *vl;
+	struct perf_probe_point tmp;
 	const char *var;
 
 	buf = synthesize_perf_probe_point(&pev->point);
@@ -731,6 +884,15 @@
 
 	ret = debuginfo__find_available_vars_at(dinfo, pev, &vls,
 						max_vls, externs);
+	if (!ret) {  /* Not found, retry with an alternative */
+		ret = get_alternative_probe_event(dinfo, pev, &tmp, target);
+		if (!ret) {
+			ret = debuginfo__find_available_vars_at(dinfo, pev,
+						&vls, max_vls, externs);
+			/* Release the old probe_point */
+			clear_perf_probe_point(&tmp);
+		}
+	}
 	if (ret <= 0) {
 		if (ret == 0 || ret == -ENOENT) {
 			pr_err("Failed to find the address of %s\n", buf);
@@ -793,7 +955,7 @@
 
 	for (i = 0; i < npevs && ret >= 0; i++)
 		ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter,
-					     externs);
+					     externs, module);
 
 	debuginfo__delete(dinfo);
 out:
@@ -1739,15 +1901,13 @@
 
 void clear_perf_probe_event(struct perf_probe_event *pev)
 {
-	struct perf_probe_point *pp = &pev->point;
 	struct perf_probe_arg_field *field, *next;
 	int i;
 
 	free(pev->event);
 	free(pev->group);
-	free(pp->file);
-	free(pp->function);
-	free(pp->lazy_line);
+	free(pev->target);
+	clear_perf_probe_point(&pev->point);
 
 	for (i = 0; i < pev->nargs; i++) {
 		free(pev->args[i].name);
@@ -1805,7 +1965,7 @@
 			   " - please rebuild kernel with %s.\n",
 			   is_kprobe ? 'k' : 'u', config);
 	} else if (err == -ENOTSUP)
-		pr_warning("Debugfs is not mounted.\n");
+		pr_warning("Tracefs or debugfs is not mounted.\n");
 	else
 		pr_warning("Failed to open %cprobe_events: %s\n",
 			   is_kprobe ? 'k' : 'u',
@@ -1816,7 +1976,7 @@
 {
 	/* Both kprobes and uprobes are disabled, warn it. */
 	if (kerr == -ENOTSUP && uerr == -ENOTSUP)
-		pr_warning("Debugfs is not mounted.\n");
+		pr_warning("Tracefs or debugfs is not mounted.\n");
 	else if (kerr == -ENOENT && uerr == -ENOENT)
 		pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS "
 			   "or/and CONFIG_UPROBE_EVENTS.\n");
@@ -1833,13 +1993,20 @@
 {
 	char buf[PATH_MAX];
 	const char *__debugfs;
+	const char *tracing_dir = "";
 	int ret;
 
-	__debugfs = debugfs_find_mountpoint();
-	if (__debugfs == NULL)
-		return -ENOTSUP;
+	__debugfs = tracefs_find_mountpoint();
+	if (__debugfs == NULL) {
+		tracing_dir = "tracing/";
 
-	ret = e_snprintf(buf, PATH_MAX, "%s/%s", __debugfs, trace_file);
+		__debugfs = debugfs_find_mountpoint();
+		if (__debugfs == NULL)
+			return -ENOTSUP;
+	}
+
+	ret = e_snprintf(buf, PATH_MAX, "%s/%s%s",
+			 __debugfs, tracing_dir, trace_file);
 	if (ret >= 0) {
 		pr_debug("Opening %s write=%d\n", buf, readwrite);
 		if (readwrite && !probe_event_dry_run)
@@ -1855,12 +2022,12 @@
 
 static int open_kprobe_events(bool readwrite)
 {
-	return open_probe_events("tracing/kprobe_events", readwrite);
+	return open_probe_events("kprobe_events", readwrite);
 }
 
 static int open_uprobe_events(bool readwrite)
 {
-	return open_probe_events("tracing/uprobe_events", readwrite);
+	return open_probe_events("uprobe_events", readwrite);
 }
 
 /* Get raw string list of current kprobe_events  or uprobe_events */
@@ -1895,6 +2062,95 @@
 	return sl;
 }
 
+struct kprobe_blacklist_node {
+	struct list_head list;
+	unsigned long start;
+	unsigned long end;
+	char *symbol;
+};
+
+static void kprobe_blacklist__delete(struct list_head *blacklist)
+{
+	struct kprobe_blacklist_node *node;
+
+	while (!list_empty(blacklist)) {
+		node = list_first_entry(blacklist,
+					struct kprobe_blacklist_node, list);
+		list_del(&node->list);
+		free(node->symbol);
+		free(node);
+	}
+}
+
+static int kprobe_blacklist__load(struct list_head *blacklist)
+{
+	struct kprobe_blacklist_node *node;
+	const char *__debugfs = debugfs_find_mountpoint();
+	char buf[PATH_MAX], *p;
+	FILE *fp;
+	int ret;
+
+	if (__debugfs == NULL)
+		return -ENOTSUP;
+
+	ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs);
+	if (ret < 0)
+		return ret;
+
+	fp = fopen(buf, "r");
+	if (!fp)
+		return -errno;
+
+	ret = 0;
+	while (fgets(buf, PATH_MAX, fp)) {
+		node = zalloc(sizeof(*node));
+		if (!node) {
+			ret = -ENOMEM;
+			break;
+		}
+		INIT_LIST_HEAD(&node->list);
+		list_add_tail(&node->list, blacklist);
+		if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) {
+			ret = -EINVAL;
+			break;
+		}
+		p = strchr(buf, '\t');
+		if (p) {
+			p++;
+			if (p[strlen(p) - 1] == '\n')
+				p[strlen(p) - 1] = '\0';
+		} else
+			p = (char *)"unknown";
+		node->symbol = strdup(p);
+		if (!node->symbol) {
+			ret = -ENOMEM;
+			break;
+		}
+		pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n",
+			  node->start, node->end, node->symbol);
+		ret++;
+	}
+	if (ret < 0)
+		kprobe_blacklist__delete(blacklist);
+	fclose(fp);
+
+	return ret;
+}
+
+static struct kprobe_blacklist_node *
+kprobe_blacklist__find_by_address(struct list_head *blacklist,
+				  unsigned long address)
+{
+	struct kprobe_blacklist_node *node;
+
+	list_for_each_entry(node, blacklist, list) {
+		if (node->start <= address && address <= node->end)
+			return node;
+	}
+
+	return NULL;
+}
+
 /* Show an event */
 static int show_perf_probe_event(struct perf_probe_event *pev,
 				 const char *module)
@@ -2100,6 +2356,27 @@
 	return ret;
 }
 
+/* Warn if the current kernel's uprobe implementation is old */
+static void warn_uprobe_event_compat(struct probe_trace_event *tev)
+{
+	int i;
+	char *buf = synthesize_probe_trace_command(tev);
+
+	/* Old uprobe event doesn't support memory dereference */
+	if (!tev->uprobes || tev->nargs == 0 || !buf)
+		goto out;
+
+	for (i = 0; i < tev->nargs; i++)
+		if (strglobmatch(tev->args[i].value, "[$@+-]*")) {
+			pr_warning("Please upgrade your kernel to at least "
+				   "3.14 to have access to feature %s\n",
+				   tev->args[i].value);
+			break;
+		}
+out:
+	free(buf);
+}
+
 static int __add_probe_trace_events(struct perf_probe_event *pev,
 				     struct probe_trace_event *tevs,
 				     int ntevs, bool allow_suffix)
@@ -2109,6 +2386,8 @@
 	char buf[64];
 	const char *event, *group;
 	struct strlist *namelist;
+	LIST_HEAD(blacklist);
+	struct kprobe_blacklist_node *node;
 
 	if (pev->uprobes)
 		fd = open_uprobe_events(true);
@@ -2126,11 +2405,25 @@
 		pr_debug("Failed to get current event list.\n");
 		return -EIO;
 	}
+	/* Get kprobe blacklist if exists */
+	if (!pev->uprobes) {
+		ret = kprobe_blacklist__load(&blacklist);
+		if (ret < 0)
+			pr_debug("No kprobe blacklist support, ignored\n");
+	}
 
 	ret = 0;
 	pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
 	for (i = 0; i < ntevs; i++) {
 		tev = &tevs[i];
+		/* Ensure that the address is NOT blacklisted */
+		node = kprobe_blacklist__find_by_address(&blacklist,
+							 tev->point.address);
+		if (node) {
+			pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol);
+			continue;
+		}
+
 		if (pev->event)
 			event = pev->event;
 		else
@@ -2180,14 +2473,18 @@
 		 */
 		allow_suffix = true;
 	}
+	if (ret == -EINVAL && pev->uprobes)
+		warn_uprobe_event_compat(tev);
 
-	if (ret >= 0) {
+	/* Note that it is possible to skip all events because of blacklist */
+	if (ret >= 0 && tev->event) {
 		/* Show how to use the event. */
 		pr_info("\nYou can now use it in all perf tools, such as:\n\n");
 		pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
 			 tev->event);
 	}
 
+	kprobe_blacklist__delete(&blacklist);
 	strlist__delete(namelist);
 	close(fd);
 	return ret;
@@ -2199,8 +2496,7 @@
 	struct symbol *sym;
 
 	map__for_each_symbol_by_name(map, name, sym) {
-		if (sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL)
-			found++;
+		found++;
 	}
 
 	return found;
@@ -2218,7 +2514,6 @@
 					    int max_tevs, const char *target)
 {
 	struct map *map = NULL;
-	struct kmap *kmap = NULL;
 	struct ref_reloc_sym *reloc_sym = NULL;
 	struct symbol *sym;
 	struct probe_trace_event *tev;
@@ -2227,11 +2522,7 @@
 	int num_matched_functions;
 	int ret, i;
 
-	/* Init maps of given executable or kernel */
-	if (pev->uprobes)
-		map = dso__new_map(target);
-	else
-		map = kernel_get_module_map(target);
+	map = get_target_map(target, pev->uprobes);
 	if (!map) {
 		ret = -EINVAL;
 		goto out;
@@ -2255,8 +2546,7 @@
 	}
 
 	if (!pev->uprobes && !pp->retprobe) {
-		kmap = map__kmap(map);
-		reloc_sym = kmap->ref_reloc_sym;
+		reloc_sym = kernel_get_ref_reloc_sym();
 		if (!reloc_sym) {
 			pr_warning("Relocated base symbol is not found!\n");
 			ret = -EINVAL;
@@ -2324,11 +2614,7 @@
 	}
 
 out:
-	if (map && pev->uprobes) {
-		/* Only when using uprobe(exec) map needs to be released */
-		dso__delete(map->dso);
-		map__delete(map);
-	}
+	put_target_map(map, pev->uprobes);
 	return ret;
 
 nomem_out:
@@ -2369,7 +2655,7 @@
 };
 
 int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-			  int max_tevs, const char *target, bool force_add)
+			  int max_tevs, bool force_add)
 {
 	int i, j, ret;
 	struct __event_package *pkgs;
@@ -2393,7 +2679,7 @@
 		ret  = convert_to_probe_trace_events(pkgs[i].pev,
 						     &pkgs[i].tevs,
 						     max_tevs,
-						     target);
+						     pkgs[i].pev->target);
 		if (ret < 0)
 			goto end;
 		pkgs[i].ntevs = ret;
@@ -2568,8 +2854,7 @@
 static int filter_available_functions(struct map *map __maybe_unused,
 				      struct symbol *sym)
 {
-	if ((sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) &&
-	    strfilter__compare(available_func_filter, sym->name))
+	if (strfilter__compare(available_func_filter, sym->name))
 		return 0;
 	return 1;
 }
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e01e994..d6b7834 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -73,7 +73,8 @@
 	char			*group;	/* Group name */
 	struct perf_probe_point	point;	/* Probe point */
 	int			nargs;	/* Number of arguments */
-	bool			uprobes;
+	bool			uprobes;	/* Uprobe event flag */
+	char			*target;	/* Target binary */
 	struct perf_probe_arg	*args;	/* Arguments */
 };
 
@@ -124,8 +125,7 @@
 extern const char *kernel_get_module_path(const char *module);
 
 extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-				 int max_probe_points, const char *module,
-				 bool force_add);
+				 int max_probe_points, bool force_add);
 extern int del_perf_probe_events(struct strlist *dellist);
 extern int show_perf_probe_events(void);
 extern int show_line_range(struct line_range *lr, const char *module,
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index b5247d7..e307423 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -456,11 +456,12 @@
 			return -EINVAL;
 		}
 		if (field->name[0] == '[') {
-			pr_err("Semantic error: %s is not a pointor"
+			pr_err("Semantic error: %s is not a pointer"
 			       " nor array.\n", varname);
 			return -EINVAL;
 		}
-		if (field->ref) {
+		/* While prcessing unnamed field, we don't care about this */
+		if (field->ref && dwarf_diename(vr_die)) {
 			pr_err("Semantic error: %s must be referred by '.'\n",
 			       field->name);
 			return -EINVAL;
@@ -491,6 +492,11 @@
 	}
 	ref->offset += (long)offs;
 
+	/* If this member is unnamed, we need to reuse this field */
+	if (!dwarf_diename(die_mem))
+		return convert_variable_fields(die_mem, varname, field,
+						&ref, die_mem);
+
 next:
 	/* Converting next field */
 	if (field->next)
@@ -915,17 +921,13 @@
 		dwarf_decl_line(sp_die, &pf->lno);
 		pf->lno += pp->line;
 		param->retval = find_probe_point_by_line(pf);
-	} else if (!dwarf_func_inline(sp_die)) {
+	} else if (die_is_func_instance(sp_die)) {
+		/* Instances always have the entry address */
+		dwarf_entrypc(sp_die, &pf->addr);
 		/* Real function */
 		if (pp->lazy_line)
 			param->retval = find_probe_point_lazy(sp_die, pf);
 		else {
-			if (dwarf_entrypc(sp_die, &pf->addr) != 0) {
-				pr_warning("Failed to get entry address of "
-					   "%s.\n", dwarf_diename(sp_die));
-				param->retval = -ENOENT;
-				return DWARF_CB_ABORT;
-			}
 			pf->addr += pp->offset;
 			/* TODO: Check the address in this function */
 			param->retval = call_probe_finder(sp_die, pf);
@@ -1349,11 +1351,8 @@
 	const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp;
 	int baseline = 0, lineno = 0, ret = 0;
 
-	/* Adjust address with bias */
-	addr += dbg->bias;
-
 	/* Find cu die */
-	if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr - dbg->bias, &cudie)) {
+	if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) {
 		pr_warning("Failed to find debug information for address %lx\n",
 			   addr);
 		ret = -EINVAL;
@@ -1536,7 +1535,7 @@
 		pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e);
 		lr->start = lf->lno_s;
 		lr->end = lf->lno_e;
-		if (dwarf_func_inline(sp_die))
+		if (!die_is_func_instance(sp_die))
 			param->retval = die_walk_instances(sp_die,
 						line_range_inline_cb, lf);
 		else
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 6c6a695..4d28624 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -17,6 +17,5 @@
 util/cgroup.c
 util/rblist.c
 util/strlist.c
-../lib/api/fs/fs.c
 util/trace-event.c
 ../../lib/rbtree.c
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build
new file mode 100644
index 0000000..6516e22
--- /dev/null
+++ b/tools/perf/util/scripting-engines/Build
@@ -0,0 +1,6 @@
+libperf-$(CONFIG_LIBPERL)   += trace-event-perl.o
+libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o
+
+CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default
+
+CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 22ebc46..430b5d2 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -214,6 +214,11 @@
 		define_event_symbols(event, ev_name, args->hex.field);
 		define_event_symbols(event, ev_name, args->hex.size);
 		break;
+	case PRINT_INT_ARRAY:
+		define_event_symbols(event, ev_name, args->int_array.field);
+		define_event_symbols(event, ev_name, args->int_array.count);
+		define_event_symbols(event, ev_name, args->int_array.el_size);
+		break;
 	case PRINT_BSTRING:
 	case PRINT_DYNAMIC_ARRAY:
 	case PRINT_STRING:
@@ -355,10 +360,9 @@
 static void perl_process_event(union perf_event *event,
 			       struct perf_sample *sample,
 			       struct perf_evsel *evsel,
-			       struct thread *thread,
-			       struct addr_location *al __maybe_unused)
+			       struct addr_location *al)
 {
-	perl_process_tracepoint(sample, evsel, thread);
+	perl_process_tracepoint(sample, evsel, al->thread);
 	perl_process_event_generic(event, sample, evsel);
 }
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 0c815a4..5544b8c 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -231,6 +231,11 @@
 		define_event_symbols(event, ev_name, args->hex.field);
 		define_event_symbols(event, ev_name, args->hex.size);
 		break;
+	case PRINT_INT_ARRAY:
+		define_event_symbols(event, ev_name, args->int_array.field);
+		define_event_symbols(event, ev_name, args->int_array.count);
+		define_event_symbols(event, ev_name, args->int_array.el_size);
+		break;
 	case PRINT_STRING:
 		break;
 	case PRINT_TYPE:
@@ -376,7 +381,6 @@
 
 static void python_process_tracepoint(struct perf_sample *sample,
 				      struct perf_evsel *evsel,
-				      struct thread *thread,
 				      struct addr_location *al)
 {
 	struct event_format *event = evsel->tp_format;
@@ -390,7 +394,7 @@
 	int cpu = sample->cpu;
 	void *data = sample->raw_data;
 	unsigned long long nsecs = sample->time;
-	const char *comm = thread__comm_str(thread);
+	const char *comm = thread__comm_str(al->thread);
 
 	t = PyTuple_New(MAX_FIELDS);
 	if (!t)
@@ -675,7 +679,7 @@
 	tuple_set_u64(t, 0, es->db_id);
 	tuple_set_u64(t, 1, es->evsel->db_id);
 	tuple_set_u64(t, 2, es->al->machine->db_id);
-	tuple_set_u64(t, 3, es->thread->db_id);
+	tuple_set_u64(t, 3, es->al->thread->db_id);
 	tuple_set_u64(t, 4, es->comm_db_id);
 	tuple_set_u64(t, 5, es->dso_db_id);
 	tuple_set_u64(t, 6, es->sym_db_id);
@@ -761,7 +765,6 @@
 
 static void python_process_general_event(struct perf_sample *sample,
 					 struct perf_evsel *evsel,
-					 struct thread *thread,
 					 struct addr_location *al)
 {
 	PyObject *handler, *t, *dict, *callchain, *dict_sample;
@@ -811,7 +814,7 @@
 	pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize(
 			(const char *)sample->raw_data, sample->raw_size));
 	pydict_set_item_string_decref(dict, "comm",
-			PyString_FromString(thread__comm_str(thread)));
+			PyString_FromString(thread__comm_str(al->thread)));
 	if (al->map) {
 		pydict_set_item_string_decref(dict, "dso",
 			PyString_FromString(al->map->dso->name));
@@ -838,22 +841,20 @@
 static void python_process_event(union perf_event *event,
 				 struct perf_sample *sample,
 				 struct perf_evsel *evsel,
-				 struct thread *thread,
 				 struct addr_location *al)
 {
 	struct tables *tables = &tables_global;
 
 	switch (evsel->attr.type) {
 	case PERF_TYPE_TRACEPOINT:
-		python_process_tracepoint(sample, evsel, thread, al);
+		python_process_tracepoint(sample, evsel, al);
 		break;
 	/* Reserve for future process_hw/sw/raw APIs */
 	default:
 		if (tables->db_export_mode)
-			db_export__sample(&tables->dbe, event, sample, evsel,
-					  thread, al);
+			db_export__sample(&tables->dbe, event, sample, evsel, al);
 		else
-			python_process_general_event(sample, evsel, thread, al);
+			python_process_general_event(sample, evsel, al);
 	}
 }
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0baf75f..0c74012 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -16,6 +16,12 @@
 #include "perf_regs.h"
 #include "asm/bug.h"
 
+static int machines__deliver_event(struct machines *machines,
+				   struct perf_evlist *evlist,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct perf_tool *tool, u64 file_offset);
+
 static int perf_session__open(struct perf_session *session)
 {
 	struct perf_data_file *file = session->file;
@@ -86,6 +92,23 @@
 	machines__set_comm_exec(&session->machines, comm_exec);
 }
 
+static int ordered_events__deliver_event(struct ordered_events *oe,
+					 struct ordered_event *event)
+{
+	struct perf_sample sample;
+	struct perf_session *session = container_of(oe, struct perf_session,
+						    ordered_events);
+	int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample);
+
+	if (ret) {
+		pr_err("Can't parse sample, err = %d\n", ret);
+		return ret;
+	}
+
+	return machines__deliver_event(&session->machines, session->evlist, event->event,
+				       &sample, session->tool, event->file_offset);
+}
+
 struct perf_session *perf_session__new(struct perf_data_file *file,
 				       bool repipe, struct perf_tool *tool)
 {
@@ -95,8 +118,9 @@
 		goto out;
 
 	session->repipe = repipe;
-	ordered_events__init(&session->ordered_events);
+	session->tool   = tool;
 	machines__init(&session->machines);
+	ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
 
 	if (file) {
 		if (perf_data_file__open(file))
@@ -138,11 +162,6 @@
 	return NULL;
 }
 
-static void perf_session__delete_dead_threads(struct perf_session *session)
-{
-	machine__delete_dead_threads(&session->machines.host);
-}
-
 static void perf_session__delete_threads(struct perf_session *session)
 {
 	machine__delete_threads(&session->machines.host);
@@ -167,7 +186,6 @@
 void perf_session__delete(struct perf_session *session)
 {
 	perf_session__destroy_kernel_maps(session);
-	perf_session__delete_dead_threads(session);
 	perf_session__delete_threads(session);
 	perf_session_env__delete(&session->header.env);
 	machines__exit(&session->machines);
@@ -215,10 +233,17 @@
 	return 0;
 }
 
+static int process_build_id_stub(struct perf_tool *tool __maybe_unused,
+				 union perf_event *event __maybe_unused,
+				 struct perf_session *session __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
 static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
 				       union perf_event *event __maybe_unused,
-				       struct perf_session *perf_session
-				       __maybe_unused)
+				       struct ordered_events *oe __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
@@ -226,7 +251,7 @@
 
 static int process_finished_round(struct perf_tool *tool,
 				  union perf_event *event,
-				  struct perf_session *session);
+				  struct ordered_events *oe);
 
 static int process_id_index_stub(struct perf_tool *tool __maybe_unused,
 				 union perf_event *event __maybe_unused,
@@ -264,7 +289,7 @@
 	if (tool->tracing_data == NULL)
 		tool->tracing_data = process_event_synth_tracing_data_stub;
 	if (tool->build_id == NULL)
-		tool->build_id = process_finished_round_stub;
+		tool->build_id = process_build_id_stub;
 	if (tool->finished_round == NULL) {
 		if (tool->ordered_events)
 			tool->finished_round = process_finished_round;
@@ -514,54 +539,80 @@
  *      Flush every events below timestamp 7
  *      etc...
  */
-static int process_finished_round(struct perf_tool *tool,
+static int process_finished_round(struct perf_tool *tool __maybe_unused,
 				  union perf_event *event __maybe_unused,
-				  struct perf_session *session)
+				  struct ordered_events *oe)
 {
-	return ordered_events__flush(session, tool, OE_FLUSH__ROUND);
+	return ordered_events__flush(oe, OE_FLUSH__ROUND);
 }
 
-int perf_session_queue_event(struct perf_session *s, union perf_event *event,
-			     struct perf_tool *tool, struct perf_sample *sample,
-			     u64 file_offset)
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+			      struct perf_sample *sample, u64 file_offset)
 {
-	struct ordered_events *oe = &s->ordered_events;
-	u64 timestamp = sample->time;
-	struct ordered_event *new;
-
-	if (!timestamp || timestamp == ~0ULL)
-		return -ETIME;
-
-	if (timestamp < oe->last_flush) {
-		pr_oe_time(timestamp,      "out of order event\n");
-		pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n",
-			   oe->last_flush_type);
-
-		s->stats.nr_unordered_events++;
-	}
-
-	new = ordered_events__new(oe, timestamp, event);
-	if (!new) {
-		ordered_events__flush(s, tool, OE_FLUSH__HALF);
-		new = ordered_events__new(oe, timestamp, event);
-	}
-
-	if (!new)
-		return -ENOMEM;
-
-	new->file_offset = file_offset;
-	return 0;
+	return ordered_events__queue(&s->ordered_events, event, sample, file_offset);
 }
 
-static void callchain__printf(struct perf_sample *sample)
+static void callchain__lbr_callstack_printf(struct perf_sample *sample)
 {
+	struct ip_callchain *callchain = sample->callchain;
+	struct branch_stack *lbr_stack = sample->branch_stack;
+	u64 kernel_callchain_nr = callchain->nr;
 	unsigned int i;
 
-	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
+	for (i = 0; i < kernel_callchain_nr; i++) {
+		if (callchain->ips[i] == PERF_CONTEXT_USER)
+			break;
+	}
 
-	for (i = 0; i < sample->callchain->nr; i++)
+	if ((i != kernel_callchain_nr) && lbr_stack->nr) {
+		u64 total_nr;
+		/*
+		 * LBR callstack can only get user call chain,
+		 * i is kernel call chain number,
+		 * 1 is PERF_CONTEXT_USER.
+		 *
+		 * The user call chain is stored in LBR registers.
+		 * LBR are pair registers. The caller is stored
+		 * in "from" register, while the callee is stored
+		 * in "to" register.
+		 * For example, there is a call stack
+		 * "A"->"B"->"C"->"D".
+		 * The LBR registers will recorde like
+		 * "C"->"D", "B"->"C", "A"->"B".
+		 * So only the first "to" register and all "from"
+		 * registers are needed to construct the whole stack.
+		 */
+		total_nr = i + 1 + lbr_stack->nr + 1;
+		kernel_callchain_nr = i + 1;
+
+		printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr);
+
+		for (i = 0; i < kernel_callchain_nr; i++)
+			printf("..... %2d: %016" PRIx64 "\n",
+			       i, callchain->ips[i]);
+
 		printf("..... %2d: %016" PRIx64 "\n",
-		       i, sample->callchain->ips[i]);
+		       (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
+		for (i = 0; i < lbr_stack->nr; i++)
+			printf("..... %2d: %016" PRIx64 "\n",
+			       (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
+	}
+}
+
+static void callchain__printf(struct perf_evsel *evsel,
+			      struct perf_sample *sample)
+{
+	unsigned int i;
+	struct ip_callchain *callchain = sample->callchain;
+
+	if (has_branch_callstack(evsel))
+		callchain__lbr_callstack_printf(sample);
+
+	printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
+
+	for (i = 0; i < callchain->nr; i++)
+		printf("..... %2d: %016" PRIx64 "\n",
+		       i, callchain->ips[i]);
 }
 
 static void branch_stack__printf(struct perf_sample *sample)
@@ -636,14 +687,14 @@
 	       dump->size, dump->offset);
 }
 
-static void perf_session__print_tstamp(struct perf_session *session,
+static void perf_evlist__print_tstamp(struct perf_evlist *evlist,
 				       union perf_event *event,
 				       struct perf_sample *sample)
 {
-	u64 sample_type = __perf_evlist__combined_sample_type(session->evlist);
+	u64 sample_type = __perf_evlist__combined_sample_type(evlist);
 
 	if (event->header.type != PERF_RECORD_SAMPLE &&
-	    !perf_evlist__sample_id_all(session->evlist)) {
+	    !perf_evlist__sample_id_all(evlist)) {
 		fputs("-1 -1 ", stdout);
 		return;
 	}
@@ -685,7 +736,7 @@
 			sample->read.one.id, sample->read.one.value);
 }
 
-static void dump_event(struct perf_session *session, union perf_event *event,
+static void dump_event(struct perf_evlist *evlist, union perf_event *event,
 		       u64 file_offset, struct perf_sample *sample)
 {
 	if (!dump_trace)
@@ -697,7 +748,7 @@
 	trace_event(event);
 
 	if (sample)
-		perf_session__print_tstamp(session, event, sample);
+		perf_evlist__print_tstamp(evlist, event, sample);
 
 	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
 	       event->header.size, perf_event__name(event->header.type));
@@ -718,9 +769,9 @@
 	sample_type = evsel->attr.sample_type;
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN)
-		callchain__printf(sample);
+		callchain__printf(evsel, sample);
 
-	if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+	if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel))
 		branch_stack__printf(sample);
 
 	if (sample_type & PERF_SAMPLE_REGS_USER)
@@ -745,8 +796,7 @@
 		sample_read__printf(sample, evsel->attr.read_format);
 }
 
-static struct machine *
-	perf_session__find_machine_for_cpumode(struct perf_session *session,
+static struct machine *machines__find_for_cpumode(struct machines *machines,
 					       union perf_event *event,
 					       struct perf_sample *sample)
 {
@@ -764,26 +814,24 @@
 		else
 			pid = sample->pid;
 
-		machine = perf_session__find_machine(session, pid);
+		machine = machines__find(machines, pid);
 		if (!machine)
-			machine = perf_session__findnew_machine(session,
-						DEFAULT_GUEST_KERNEL_ID);
+			machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID);
 		return machine;
 	}
 
-	return &session->machines.host;
+	return &machines->host;
 }
 
-static int deliver_sample_value(struct perf_session *session,
+static int deliver_sample_value(struct perf_evlist *evlist,
 				struct perf_tool *tool,
 				union perf_event *event,
 				struct perf_sample *sample,
 				struct sample_read_value *v,
 				struct machine *machine)
 {
-	struct perf_sample_id *sid;
+	struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id);
 
-	sid = perf_evlist__id2sid(session->evlist, v->id);
 	if (sid) {
 		sample->id     = v->id;
 		sample->period = v->value - sid->period;
@@ -791,14 +839,14 @@
 	}
 
 	if (!sid || sid->evsel == NULL) {
-		++session->stats.nr_unknown_id;
+		++evlist->stats.nr_unknown_id;
 		return 0;
 	}
 
 	return tool->sample(tool, event, sample, sid->evsel, machine);
 }
 
-static int deliver_sample_group(struct perf_session *session,
+static int deliver_sample_group(struct perf_evlist *evlist,
 				struct perf_tool *tool,
 				union  perf_event *event,
 				struct perf_sample *sample,
@@ -808,7 +856,7 @@
 	u64 i;
 
 	for (i = 0; i < sample->read.group.nr; i++) {
-		ret = deliver_sample_value(session, tool, event, sample,
+		ret = deliver_sample_value(evlist, tool, event, sample,
 					   &sample->read.group.values[i],
 					   machine);
 		if (ret)
@@ -819,7 +867,7 @@
 }
 
 static int
-perf_session__deliver_sample(struct perf_session *session,
+ perf_evlist__deliver_sample(struct perf_evlist *evlist,
 			     struct perf_tool *tool,
 			     union  perf_event *event,
 			     struct perf_sample *sample,
@@ -836,41 +884,40 @@
 
 	/* For PERF_SAMPLE_READ we have either single or group mode. */
 	if (read_format & PERF_FORMAT_GROUP)
-		return deliver_sample_group(session, tool, event, sample,
+		return deliver_sample_group(evlist, tool, event, sample,
 					    machine);
 	else
-		return deliver_sample_value(session, tool, event, sample,
+		return deliver_sample_value(evlist, tool, event, sample,
 					    &sample->read.one, machine);
 }
 
-int perf_session__deliver_event(struct perf_session *session,
-				union perf_event *event,
-				struct perf_sample *sample,
-				struct perf_tool *tool, u64 file_offset)
+static int machines__deliver_event(struct machines *machines,
+				   struct perf_evlist *evlist,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct perf_tool *tool, u64 file_offset)
 {
 	struct perf_evsel *evsel;
 	struct machine *machine;
 
-	dump_event(session, event, file_offset, sample);
+	dump_event(evlist, event, file_offset, sample);
 
-	evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+	evsel = perf_evlist__id2evsel(evlist, sample->id);
 
-	machine = perf_session__find_machine_for_cpumode(session, event,
-							 sample);
+	machine = machines__find_for_cpumode(machines, event, sample);
 
 	switch (event->header.type) {
 	case PERF_RECORD_SAMPLE:
 		dump_sample(evsel, event, sample);
 		if (evsel == NULL) {
-			++session->stats.nr_unknown_id;
+			++evlist->stats.nr_unknown_id;
 			return 0;
 		}
 		if (machine == NULL) {
-			++session->stats.nr_unprocessable_samples;
+			++evlist->stats.nr_unprocessable_samples;
 			return 0;
 		}
-		return perf_session__deliver_sample(session, tool, event,
-						    sample, evsel, machine);
+		return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
 	case PERF_RECORD_MMAP:
 		return tool->mmap(tool, event, sample, machine);
 	case PERF_RECORD_MMAP2:
@@ -883,7 +930,7 @@
 		return tool->exit(tool, event, sample, machine);
 	case PERF_RECORD_LOST:
 		if (tool->lost == perf_event__process_lost)
-			session->stats.total_lost += event->lost.lost;
+			evlist->stats.total_lost += event->lost.lost;
 		return tool->lost(tool, event, sample, machine);
 	case PERF_RECORD_READ:
 		return tool->read(tool, event, sample, evsel, machine);
@@ -892,20 +939,21 @@
 	case PERF_RECORD_UNTHROTTLE:
 		return tool->unthrottle(tool, event, sample, machine);
 	default:
-		++session->stats.nr_unknown_events;
+		++evlist->stats.nr_unknown_events;
 		return -1;
 	}
 }
 
 static s64 perf_session__process_user_event(struct perf_session *session,
 					    union perf_event *event,
-					    struct perf_tool *tool,
 					    u64 file_offset)
 {
+	struct ordered_events *oe = &session->ordered_events;
+	struct perf_tool *tool = session->tool;
 	int fd = perf_data_file__fd(session->file);
 	int err;
 
-	dump_event(session, event, file_offset, NULL);
+	dump_event(session->evlist, event, file_offset, NULL);
 
 	/* These events are processed right away */
 	switch (event->header.type) {
@@ -929,7 +977,7 @@
 	case PERF_RECORD_HEADER_BUILD_ID:
 		return tool->build_id(tool, event, session);
 	case PERF_RECORD_FINISHED_ROUND:
-		return tool->finished_round(tool, event, session);
+		return tool->finished_round(tool, event, oe);
 	case PERF_RECORD_ID_INDEX:
 		return tool->id_index(tool, event, session);
 	default:
@@ -939,15 +987,17 @@
 
 int perf_session__deliver_synth_event(struct perf_session *session,
 				      union perf_event *event,
-				      struct perf_sample *sample,
-				      struct perf_tool *tool)
+				      struct perf_sample *sample)
 {
-	events_stats__inc(&session->stats, event->header.type);
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_tool *tool = session->tool;
+
+	events_stats__inc(&evlist->stats, event->header.type);
 
 	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
-		return perf_session__process_user_event(session, event, tool, 0);
+		return perf_session__process_user_event(session, event, 0);
 
-	return perf_session__deliver_event(session, event, sample, tool, 0);
+	return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0);
 }
 
 static void event_swap(union perf_event *event, bool sample_id_all)
@@ -1015,40 +1065,39 @@
 }
 
 static s64 perf_session__process_event(struct perf_session *session,
-				       union perf_event *event,
-				       struct perf_tool *tool,
-				       u64 file_offset)
+				       union perf_event *event, u64 file_offset)
 {
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_tool *tool = session->tool;
 	struct perf_sample sample;
 	int ret;
 
 	if (session->header.needs_swap)
-		event_swap(event, perf_evlist__sample_id_all(session->evlist));
+		event_swap(event, perf_evlist__sample_id_all(evlist));
 
 	if (event->header.type >= PERF_RECORD_HEADER_MAX)
 		return -EINVAL;
 
-	events_stats__inc(&session->stats, event->header.type);
+	events_stats__inc(&evlist->stats, event->header.type);
 
 	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
-		return perf_session__process_user_event(session, event, tool, file_offset);
+		return perf_session__process_user_event(session, event, file_offset);
 
 	/*
 	 * For all kernel events we get the sample data
 	 */
-	ret = perf_evlist__parse_sample(session->evlist, event, &sample);
+	ret = perf_evlist__parse_sample(evlist, event, &sample);
 	if (ret)
 		return ret;
 
 	if (tool->ordered_events) {
-		ret = perf_session_queue_event(session, event, tool, &sample,
-					       file_offset);
+		ret = perf_session__queue_event(session, event, &sample, file_offset);
 		if (ret != -ETIME)
 			return ret;
 	}
 
-	return perf_session__deliver_event(session, event, &sample, tool,
-					   file_offset);
+	return machines__deliver_event(&session->machines, evlist, event,
+				       &sample, tool, file_offset);
 }
 
 void perf_event_header__bswap(struct perf_event_header *hdr)
@@ -1076,54 +1125,57 @@
 	return thread;
 }
 
-static void perf_session__warn_about_errors(const struct perf_session *session,
-					    const struct perf_tool *tool)
+static void perf_session__warn_about_errors(const struct perf_session *session)
 {
-	if (tool->lost == perf_event__process_lost &&
-	    session->stats.nr_events[PERF_RECORD_LOST] != 0) {
+	const struct events_stats *stats = &session->evlist->stats;
+	const struct ordered_events *oe = &session->ordered_events;
+
+	if (session->tool->lost == perf_event__process_lost &&
+	    stats->nr_events[PERF_RECORD_LOST] != 0) {
 		ui__warning("Processed %d events and lost %d chunks!\n\n"
 			    "Check IO/CPU overload!\n\n",
-			    session->stats.nr_events[0],
-			    session->stats.nr_events[PERF_RECORD_LOST]);
+			    stats->nr_events[0],
+			    stats->nr_events[PERF_RECORD_LOST]);
 	}
 
-	if (session->stats.nr_unknown_events != 0) {
+	if (stats->nr_unknown_events != 0) {
 		ui__warning("Found %u unknown events!\n\n"
 			    "Is this an older tool processing a perf.data "
 			    "file generated by a more recent tool?\n\n"
 			    "If that is not the case, consider "
 			    "reporting to linux-kernel@vger.kernel.org.\n\n",
-			    session->stats.nr_unknown_events);
+			    stats->nr_unknown_events);
 	}
 
-	if (session->stats.nr_unknown_id != 0) {
+	if (stats->nr_unknown_id != 0) {
 		ui__warning("%u samples with id not present in the header\n",
-			    session->stats.nr_unknown_id);
+			    stats->nr_unknown_id);
 	}
 
- 	if (session->stats.nr_invalid_chains != 0) {
- 		ui__warning("Found invalid callchains!\n\n"
- 			    "%u out of %u events were discarded for this reason.\n\n"
- 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
- 			    session->stats.nr_invalid_chains,
- 			    session->stats.nr_events[PERF_RECORD_SAMPLE]);
- 	}
+	if (stats->nr_invalid_chains != 0) {
+		ui__warning("Found invalid callchains!\n\n"
+			    "%u out of %u events were discarded for this reason.\n\n"
+			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
+			    stats->nr_invalid_chains,
+			    stats->nr_events[PERF_RECORD_SAMPLE]);
+	}
 
-	if (session->stats.nr_unprocessable_samples != 0) {
+	if (stats->nr_unprocessable_samples != 0) {
 		ui__warning("%u unprocessable samples recorded.\n"
 			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
-			    session->stats.nr_unprocessable_samples);
+			    stats->nr_unprocessable_samples);
 	}
 
-	if (session->stats.nr_unordered_events != 0)
-		ui__warning("%u out of order events recorded.\n", session->stats.nr_unordered_events);
+	if (oe->nr_unordered_events != 0)
+		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
 }
 
 volatile int session_done;
 
-static int __perf_session__process_pipe_events(struct perf_session *session,
-					       struct perf_tool *tool)
+static int __perf_session__process_pipe_events(struct perf_session *session)
 {
+	struct ordered_events *oe = &session->ordered_events;
+	struct perf_tool *tool = session->tool;
 	int fd = perf_data_file__fd(session->file);
 	union perf_event *event;
 	uint32_t size, cur_size = 0;
@@ -1187,7 +1239,7 @@
 		}
 	}
 
-	if ((skip = perf_session__process_event(session, event, tool, head)) < 0) {
+	if ((skip = perf_session__process_event(session, event, head)) < 0) {
 		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
 		       head, event->header.size, event->header.type);
 		err = -EINVAL;
@@ -1203,10 +1255,10 @@
 		goto more;
 done:
 	/* do the final flush for ordered samples */
-	err = ordered_events__flush(session, tool, OE_FLUSH__FINAL);
+	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
 out_err:
 	free(buf);
-	perf_session__warn_about_errors(session, tool);
+	perf_session__warn_about_errors(session);
 	ordered_events__free(&session->ordered_events);
 	return err;
 }
@@ -1253,8 +1305,10 @@
 
 static int __perf_session__process_events(struct perf_session *session,
 					  u64 data_offset, u64 data_size,
-					  u64 file_size, struct perf_tool *tool)
+					  u64 file_size)
 {
+	struct ordered_events *oe = &session->ordered_events;
+	struct perf_tool *tool = session->tool;
 	int fd = perf_data_file__fd(session->file);
 	u64 head, page_offset, file_offset, file_pos, size;
 	int err, mmap_prot, mmap_flags, map_idx = 0;
@@ -1323,8 +1377,7 @@
 	size = event->header.size;
 
 	if (size < sizeof(struct perf_event_header) ||
-	    (skip = perf_session__process_event(session, event, tool, file_pos))
-									< 0) {
+	    (skip = perf_session__process_event(session, event, file_pos)) < 0) {
 		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
 		       file_offset + head, event->header.size,
 		       event->header.type);
@@ -1348,17 +1401,16 @@
 
 out:
 	/* do the final flush for ordered samples */
-	err = ordered_events__flush(session, tool, OE_FLUSH__FINAL);
+	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
 out_err:
 	ui_progress__finish();
-	perf_session__warn_about_errors(session, tool);
+	perf_session__warn_about_errors(session);
 	ordered_events__free(&session->ordered_events);
 	session->one_mmap = false;
 	return err;
 }
 
-int perf_session__process_events(struct perf_session *session,
-				 struct perf_tool *tool)
+int perf_session__process_events(struct perf_session *session)
 {
 	u64 size = perf_data_file__size(session->file);
 	int err;
@@ -1369,10 +1421,9 @@
 	if (!perf_data_file__is_pipe(session->file))
 		err = __perf_session__process_events(session,
 						     session->header.data_offset,
-						     session->header.data_size,
-						     size, tool);
+						     session->header.data_size, size);
 	else
-		err = __perf_session__process_pipe_events(session, tool);
+		err = __perf_session__process_pipe_events(session);
 
 	return err;
 }
@@ -1415,6 +1466,9 @@
 
 	for (i = 0; i < MAP__NR_TYPES; ++i) {
 		struct kmap *kmap = map__kmap(maps[i]);
+
+		if (!kmap)
+			continue;
 		kmap->ref_reloc_sym = ref;
 	}
 
@@ -1436,7 +1490,7 @@
 {
 	size_t ret = fprintf(fp, "Aggregated stats:\n");
 
-	ret += events_stats__fprintf(&session->stats, fp);
+	ret += events_stats__fprintf(&session->evlist->stats, fp);
 	return ret;
 }
 
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 6d663dc..d5fa7b791 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -20,13 +20,13 @@
 	struct machines		machines;
 	struct perf_evlist	*evlist;
 	struct trace_event	tevent;
-	struct events_stats	stats;
 	bool			repipe;
 	bool			one_mmap;
 	void			*one_mmap_addr;
 	u64			one_mmap_offset;
 	struct ordered_events	ordered_events;
 	struct perf_data_file	*file;
+	struct perf_tool	*tool;
 };
 
 #define PRINT_IP_OPT_IP		(1<<0)
@@ -49,20 +49,13 @@
 			     union perf_event **event_ptr,
 			     struct perf_sample *sample);
 
-int perf_session__process_events(struct perf_session *session,
-				 struct perf_tool *tool);
+int perf_session__process_events(struct perf_session *session);
 
-int perf_session_queue_event(struct perf_session *s, union perf_event *event,
-			     struct perf_tool *tool, struct perf_sample *sample,
-			     u64 file_offset);
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+			      struct perf_sample *sample, u64 file_offset);
 
 void perf_tool__fill_defaults(struct perf_tool *tool);
 
-int perf_session__deliver_event(struct perf_session *session,
-				union perf_event *event,
-				struct perf_sample *sample,
-				struct perf_tool *tool, u64 file_offset);
-
 int perf_session__resolve_callchain(struct perf_session *session,
 				    struct perf_evsel *evsel,
 				    struct thread *thread,
@@ -126,8 +119,7 @@
 
 int perf_session__deliver_synth_event(struct perf_session *session,
 				      union perf_event *event,
-				      struct perf_sample *sample,
-				      struct perf_tool *tool);
+				      struct perf_sample *sample);
 
 int perf_event__process_id_index(struct perf_tool *tool,
 				 union perf_event *event,
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index d0aee4b..1833103 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -25,7 +25,7 @@
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 libtraceevent = getenv('LIBTRACEEVENT')
-libapikfs = getenv('LIBAPIKFS')
+libapikfs = getenv('LIBAPI')
 
 ext_sources = [f.strip() for f in file('util/python-ext-sources')
 				if len(f.strip()) > 0 and f[0] != '#']
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 7a39c1e..4593f36 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1463,6 +1463,15 @@
 			sort__has_parent = 1;
 		} else if (sd->entry == &sort_sym) {
 			sort__has_sym = 1;
+			/*
+			 * perf diff displays the performance difference amongst
+			 * two or more perf.data files. Those files could come
+			 * from different binaries. So we should not compare
+			 * their ips, but the name of symbol.
+			 */
+			if (sort__mode == SORT_MODE__DIFF)
+				sd->entry->se_collapse = sort__sym_sort;
+
 		} else if (sd->entry == &sort_dso) {
 			sort__has_dso = 1;
 		}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index c03e4ff..846036a 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -44,6 +44,7 @@
 extern struct sort_entry sort_sym_from;
 extern struct sort_entry sort_sym_to;
 extern enum sort_type sort__first_dimension;
+extern const char default_mem_sort_order[];
 
 struct he_stat {
 	u64			period;
@@ -102,7 +103,6 @@
 
 	bool			init_have_children;
 	char			level;
-	bool			used;
 	u8			filtered;
 	char			*srcline;
 	struct symbol		*parent;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 33b7a2a..a7ab606 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -74,6 +74,10 @@
 	return GELF_ST_TYPE(sym->st_info);
 }
 
+#ifndef STT_GNU_IFUNC
+#define STT_GNU_IFUNC 10
+#endif
+
 static inline int elf_sym__is_function(const GElf_Sym *sym)
 {
 	return (elf_sym__type(sym) == STT_FUNC ||
@@ -575,32 +579,37 @@
 static int decompress_kmodule(struct dso *dso, const char *name,
 			      enum dso_binary_type type)
 {
-	int fd;
-	const char *ext = strrchr(name, '.');
+	int fd = -1;
 	char tmpbuf[] = "/tmp/perf-kmod-XXXXXX";
+	struct kmod_path m;
 
 	if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP &&
 	    type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP &&
 	    type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
 		return -1;
 
-	if (!ext || !is_supported_compression(ext + 1)) {
-		ext = strrchr(dso->name, '.');
-		if (!ext || !is_supported_compression(ext + 1))
-			return -1;
-	}
+	if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
+		name = dso->long_name;
 
-	fd = mkstemp(tmpbuf);
-	if (fd < 0)
+	if (kmod_path__parse_ext(&m, name) || !m.comp)
 		return -1;
 
-	if (!decompress_to_file(ext + 1, name, fd)) {
+	fd = mkstemp(tmpbuf);
+	if (fd < 0) {
+		dso->load_errno = errno;
+		goto out;
+	}
+
+	if (!decompress_to_file(m.ext, name, fd)) {
+		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
 		close(fd);
 		fd = -1;
 	}
 
 	unlink(tmpbuf);
 
+out:
+	free(m.ext);
 	return fd;
 }
 
@@ -629,37 +638,49 @@
 	Elf *elf;
 	int fd;
 
-	if (dso__needs_decompress(dso))
+	if (dso__needs_decompress(dso)) {
 		fd = decompress_kmodule(dso, name, type);
-	else
+		if (fd < 0)
+			return -1;
+	} else {
 		fd = open(name, O_RDONLY);
-
-	if (fd < 0)
-		return -1;
+		if (fd < 0) {
+			dso->load_errno = errno;
+			return -1;
+		}
+	}
 
 	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
 	if (elf == NULL) {
 		pr_debug("%s: cannot read %s ELF file.\n", __func__, name);
+		dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
 		goto out_close;
 	}
 
 	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
 		pr_debug("%s: cannot get elf header.\n", __func__);
 		goto out_elf_end;
 	}
 
-	if (dso__swap_init(dso, ehdr.e_ident[EI_DATA]))
+	if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) {
+		dso->load_errno = DSO_LOAD_ERRNO__INTERNAL_ERROR;
 		goto out_elf_end;
+	}
 
 	/* Always reject images with a mismatched build-id: */
 	if (dso->has_build_id) {
 		u8 build_id[BUILD_ID_SIZE];
 
-		if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0)
+		if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) {
+			dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID;
 			goto out_elf_end;
+		}
 
-		if (!dso__build_id_equal(dso, build_id))
+		if (!dso__build_id_equal(dso, build_id)) {
+			dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
 			goto out_elf_end;
+		}
 	}
 
 	ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
@@ -695,8 +716,10 @@
 	}
 
 	ss->name   = strdup(name);
-	if (!ss->name)
+	if (!ss->name) {
+		dso->load_errno = errno;
 		goto out_elf_end;
+	}
 
 	ss->elf    = elf;
 	ss->fd     = fd;
@@ -753,6 +776,7 @@
 		  symbol_filter_t filter, int kmodule)
 {
 	struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL;
+	struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL;
 	struct map *curr_map = map;
 	struct dso *curr_dso = dso;
 	Elf_Data *symstrs, *secstrs;
@@ -768,6 +792,9 @@
 	int nr = 0;
 	bool remap_kernel = false, adjust_kernel_syms = false;
 
+	if (kmap && !kmaps)
+		return -1;
+
 	dso->symtab_type = syms_ss->type;
 	dso->is_64_bit = syms_ss->is_64_bit;
 	dso->rel = syms_ss->ehdr.e_type == ET_REL;
@@ -864,10 +891,9 @@
 		/* Reject ARM ELF "mapping symbols": these aren't unique and
 		 * don't identify functions, so will confuse the profile
 		 * output: */
-		if (ehdr.e_machine == EM_ARM) {
-			if (!strcmp(elf_name, "$a") ||
-			    !strcmp(elf_name, "$d") ||
-			    !strcmp(elf_name, "$t"))
+		if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) {
+			if (elf_name[0] == '$' && strchr("adtx", elf_name[1])
+			    && (elf_name[2] == '\0' || elf_name[2] == '.'))
 				continue;
 		}
 
@@ -936,8 +962,10 @@
 					map->map_ip = map__map_ip;
 					map->unmap_ip = map__unmap_ip;
 					/* Ensure maps are correctly ordered */
-					map_groups__remove(kmap->kmaps, map);
-					map_groups__insert(kmap->kmaps, map);
+					if (kmaps) {
+						map_groups__remove(kmaps, map);
+						map_groups__insert(kmaps, map);
+					}
 				}
 
 				/*
@@ -961,7 +989,7 @@
 			snprintf(dso_name, sizeof(dso_name),
 				 "%s%s", dso->short_name, section_name);
 
-			curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name);
+			curr_map = map_groups__find_by_name(kmaps, map->type, dso_name);
 			if (curr_map == NULL) {
 				u64 start = sym.st_value;
 
@@ -991,7 +1019,7 @@
 					curr_map->unmap_ip = identity__map_ip;
 				}
 				curr_dso->symtab_type = dso->symtab_type;
-				map_groups__insert(kmap->kmaps, curr_map);
+				map_groups__insert(kmaps, curr_map);
 				/*
 				 * The new DSO should go to the kernel DSOS
 				 */
@@ -1045,14 +1073,15 @@
 	 * For misannotated, zeroed, ASM function sizes.
 	 */
 	if (nr > 0) {
-		symbols__fixup_duplicate(&dso->symbols[map->type]);
+		if (!symbol_conf.allow_aliases)
+			symbols__fixup_duplicate(&dso->symbols[map->type]);
 		symbols__fixup_end(&dso->symbols[map->type]);
 		if (kmap) {
 			/*
 			 * We need to fixup this here too because we create new
 			 * maps here, for things like vsyscall sections.
 			 */
-			__map_groups__fixup_end(kmap->kmaps, map->type);
+			__map_groups__fixup_end(kmaps, map->type);
 		}
 	}
 	err = nr;
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index d7efb03..fd8477c 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -246,13 +246,12 @@
 	return ret;
 }
 
-int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused,
-		 const char *name,
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 	         enum dso_binary_type type)
 {
 	int fd = open(name, O_RDONLY);
 	if (fd < 0)
-		return -1;
+		goto out_errno;
 
 	ss->name = strdup(name);
 	if (!ss->name)
@@ -264,6 +263,8 @@
 	return 0;
 out_close:
 	close(fd);
+out_errno:
+	dso->load_errno = errno;
 	return -1;
 }
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a690668..201f6c4c 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -15,6 +15,7 @@
 #include "machine.h"
 #include "symbol.h"
 #include "strlist.h"
+#include "intlist.h"
 #include "header.h"
 
 #include <elf.h>
@@ -629,13 +630,16 @@
 static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
 					 symbol_filter_t filter)
 {
-	struct map_groups *kmaps = map__kmap(map)->kmaps;
+	struct map_groups *kmaps = map__kmaps(map);
 	struct map *curr_map;
 	struct symbol *pos;
 	int count = 0, moved = 0;
 	struct rb_root *root = &dso->symbols[map->type];
 	struct rb_node *next = rb_first(root);
 
+	if (!kmaps)
+		return -1;
+
 	while (next) {
 		char *module;
 
@@ -681,8 +685,8 @@
 static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
 			       symbol_filter_t filter)
 {
-	struct map_groups *kmaps = map__kmap(map)->kmaps;
-	struct machine *machine = kmaps->machine;
+	struct map_groups *kmaps = map__kmaps(map);
+	struct machine *machine;
 	struct map *curr_map = map;
 	struct symbol *pos;
 	int count = 0, moved = 0;
@@ -690,6 +694,11 @@
 	struct rb_node *next = rb_first(root);
 	int kernel_range = 0;
 
+	if (!kmaps)
+		return -1;
+
+	machine = kmaps->machine;
+
 	while (next) {
 		char *module;
 
@@ -1024,9 +1033,12 @@
 static int validate_kcore_modules(const char *kallsyms_filename,
 				  struct map *map)
 {
-	struct map_groups *kmaps = map__kmap(map)->kmaps;
+	struct map_groups *kmaps = map__kmaps(map);
 	char modules_filename[PATH_MAX];
 
+	if (!kmaps)
+		return -EINVAL;
+
 	if (!filename_from_kallsyms_filename(modules_filename, "modules",
 					     kallsyms_filename))
 		return -EINVAL;
@@ -1042,6 +1054,9 @@
 {
 	struct kmap *kmap = map__kmap(map);
 
+	if (!kmap)
+		return -EINVAL;
+
 	if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) {
 		u64 start;
 
@@ -1080,8 +1095,8 @@
 static int dso__load_kcore(struct dso *dso, struct map *map,
 			   const char *kallsyms_filename)
 {
-	struct map_groups *kmaps = map__kmap(map)->kmaps;
-	struct machine *machine = kmaps->machine;
+	struct map_groups *kmaps = map__kmaps(map);
+	struct machine *machine;
 	struct kcore_mapfn_data md;
 	struct map *old_map, *new_map, *replacement_map = NULL;
 	bool is_64_bit;
@@ -1089,6 +1104,11 @@
 	char kcore_filename[PATH_MAX];
 	struct symbol *sym;
 
+	if (!kmaps)
+		return -EINVAL;
+
+	machine = kmaps->machine;
+
 	/* This function requires that the map is the kernel map */
 	if (map != machine->vmlinux_maps[map->type])
 		return -EINVAL;
@@ -1201,6 +1221,9 @@
 	struct kmap *kmap = map__kmap(map);
 	u64 addr;
 
+	if (!kmap)
+		return -1;
+
 	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
 		return 0;
 
@@ -1859,6 +1882,20 @@
 	return 0;
 }
 
+int setup_intlist(struct intlist **list, const char *list_str,
+		  const char *list_name)
+{
+	if (list_str == NULL)
+		return 0;
+
+	*list = intlist__new(list_str);
+	if (!*list) {
+		pr_err("problems parsing %s list\n", list_name);
+		return -1;
+	}
+	return 0;
+}
+
 static bool symbol__read_kptr_restrict(void)
 {
 	bool value = false;
@@ -1909,9 +1946,17 @@
 		       symbol_conf.comm_list_str, "comm") < 0)
 		goto out_free_dso_list;
 
+	if (setup_intlist(&symbol_conf.pid_list,
+		       symbol_conf.pid_list_str, "pid") < 0)
+		goto out_free_comm_list;
+
+	if (setup_intlist(&symbol_conf.tid_list,
+		       symbol_conf.tid_list_str, "tid") < 0)
+		goto out_free_pid_list;
+
 	if (setup_list(&symbol_conf.sym_list,
 		       symbol_conf.sym_list_str, "symbol") < 0)
-		goto out_free_comm_list;
+		goto out_free_tid_list;
 
 	/*
 	 * A path to symbols of "/" is identical to ""
@@ -1930,6 +1975,10 @@
 	symbol_conf.initialized = true;
 	return 0;
 
+out_free_tid_list:
+	intlist__delete(symbol_conf.tid_list);
+out_free_pid_list:
+	intlist__delete(symbol_conf.pid_list);
 out_free_comm_list:
 	strlist__delete(symbol_conf.comm_list);
 out_free_dso_list:
@@ -1944,6 +1993,8 @@
 	strlist__delete(symbol_conf.sym_list);
 	strlist__delete(symbol_conf.dso_list);
 	strlist__delete(symbol_conf.comm_list);
+	intlist__delete(symbol_conf.tid_list);
+	intlist__delete(symbol_conf.pid_list);
 	vmlinux_path__exit();
 	symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
 	symbol_conf.initialized = false;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 1650dcb..0956150 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -78,6 +78,7 @@
 }
 
 struct strlist;
+struct intlist;
 
 struct symbol_conf {
 	unsigned short	priv_size;
@@ -87,6 +88,7 @@
 			ignore_vmlinux_buildid,
 			show_kernel_path,
 			use_modules,
+			allow_aliases,
 			sort_by_name,
 			show_nr_samples,
 			show_total_period,
@@ -114,6 +116,8 @@
 	const char	*guestmount;
 	const char	*dso_list_str,
 			*comm_list_str,
+			*pid_list_str,
+			*tid_list_str,
 			*sym_list_str,
 			*col_width_list_str;
        struct strlist	*dso_list,
@@ -123,6 +127,8 @@
 			*dso_to_list,
 			*sym_from_list,
 			*sym_to_list;
+	struct intlist	*pid_list,
+			*tid_list;
 	const char	*symfs;
 };
 
@@ -294,5 +300,7 @@
 
 int setup_list(struct strlist **list, const char *list_str,
 	       const char *list_name);
+int setup_intlist(struct intlist **list, const char *list_str,
+		  const char *list_name);
 
 #endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
index e74c596..a53603b 100644
--- a/tools/perf/util/target.c
+++ b/tools/perf/util/target.c
@@ -123,11 +123,8 @@
 	if (errnum >= 0) {
 		const char *err = strerror_r(errnum, buf, buflen);
 
-		if (err != buf) {
-			size_t len = strlen(err);
-			memcpy(buf, err, min(buflen - 1, len));
-			*(buf + min(buflen - 1, len)) = '\0';
-		}
+		if (err != buf)
+			scnprintf(buf, buflen, "%s", err);
 
 		return 0;
 	}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 9ebc8b1..1c8fbc9 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -82,6 +82,20 @@
 	free(thread);
 }
 
+struct thread *thread__get(struct thread *thread)
+{
+	++thread->refcnt;
+	return thread;
+}
+
+void thread__put(struct thread *thread)
+{
+	if (thread && --thread->refcnt == 0) {
+		list_del_init(&thread->node);
+		thread__delete(thread);
+	}
+}
+
 struct comm *thread__comm(const struct thread *thread)
 {
 	if (list_empty(&thread->comm_list))
@@ -192,7 +206,6 @@
 		err = thread__set_comm(thread, comm, timestamp);
 		if (err)
 			return err;
-		thread->comm_set = true;
 	}
 
 	thread->ppid = parent->tid;
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 160fd06..9b8a54d 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -7,6 +7,7 @@
 #include <sys/types.h>
 #include "symbol.h"
 #include <strlist.h>
+#include <intlist.h>
 
 struct thread_stack;
 
@@ -20,6 +21,7 @@
 	pid_t			tid;
 	pid_t			ppid;
 	int			cpu;
+	int			refcnt;
 	char			shortname[3];
 	bool			comm_set;
 	bool			dead; /* if set thread has exited */
@@ -37,6 +39,18 @@
 struct thread *thread__new(pid_t pid, pid_t tid);
 int thread__init_map_groups(struct thread *thread, struct machine *machine);
 void thread__delete(struct thread *thread);
+
+struct thread *thread__get(struct thread *thread);
+void thread__put(struct thread *thread);
+
+static inline void __thread__zput(struct thread **thread)
+{
+	thread__put(*thread);
+	*thread = NULL;
+}
+
+#define thread__zput(thread) __thread__zput(&thread)
+
 static inline void thread__exited(struct thread *thread)
 {
 	thread->dead = true;
@@ -87,6 +101,16 @@
 		return true;
 	}
 
+	if (symbol_conf.pid_list &&
+	    !intlist__has_entry(symbol_conf.pid_list, thread->pid_)) {
+		return true;
+	}
+
+	if (symbol_conf.tid_list &&
+	    !intlist__has_entry(symbol_conf.tid_list, thread->tid)) {
+		return true;
+	}
+
 	return false;
 }
 
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index bb2708b..51d9e56 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -10,6 +10,7 @@
 struct perf_sample;
 struct perf_tool;
 struct machine;
+struct ordered_events;
 
 typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
 			    struct perf_sample *sample,
@@ -25,6 +26,9 @@
 typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
 			 struct perf_session *session);
 
+typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
+			struct ordered_events *oe);
+
 struct perf_tool {
 	event_sample	sample,
 			read;
@@ -38,8 +42,8 @@
 			unthrottle;
 	event_attr_op	attr;
 	event_op2	tracing_data;
-	event_op2	finished_round,
-			build_id,
+	event_oe	finished_round;
+	event_op2	build_id,
 			id_index;
 	bool		ordered_events;
 	bool		ordering_requires_timestamps;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index c36636f..25d6c73 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -112,8 +112,8 @@
 	return pevent_read_number(event->pevent, ptr, size);
 }
 
-void event_format__print(struct event_format *event,
-			 int cpu, void *data, int size)
+void event_format__fprintf(struct event_format *event,
+			   int cpu, void *data, int size, FILE *fp)
 {
 	struct pevent_record record;
 	struct trace_seq s;
@@ -125,10 +125,16 @@
 
 	trace_seq_init(&s);
 	pevent_event_info(&s, event, &record);
-	trace_seq_do_printf(&s);
+	trace_seq_do_fprintf(&s, fp);
 	trace_seq_destroy(&s);
 }
 
+void event_format__print(struct event_format *event,
+			 int cpu, void *data, int size)
+{
+	return event_format__fprintf(event, cpu, data, size, stdout);
+}
+
 void parse_proc_kallsyms(struct pevent *pevent,
 			 char *file, unsigned int size __maybe_unused)
 {
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 5c9bdd1..9df6105 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -43,7 +43,6 @@
 static void process_event_unsupported(union perf_event *event __maybe_unused,
 				      struct perf_sample *sample __maybe_unused,
 				      struct perf_evsel *evsel __maybe_unused,
-				      struct thread *thread __maybe_unused,
 				      struct addr_location *al __maybe_unused)
 {
 }
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 52aaa19..d5168f0 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -23,6 +23,9 @@
 
 int bigendian(void);
 
+void event_format__fprintf(struct event_format *event,
+			   int cpu, void *data, int size, FILE *fp);
+
 void event_format__print(struct event_format *event,
 			 int cpu, void *data, int size);
 
@@ -69,8 +72,7 @@
 	void (*process_event) (union perf_event *event,
 			       struct perf_sample *sample,
 			       struct perf_evsel *evsel,
-			       struct thread *thread,
-				   struct addr_location *al);
+			       struct addr_location *al);
 	int (*generate_script) (struct pevent *pevent, const char *outfile);
 };
 
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index e3c40a5..7b09a44 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -266,7 +266,7 @@
 				     u64 *fde_count)
 {
 	int ret = -EINVAL, fd;
-	u64 offset = dso->data.frame_offset;
+	u64 offset = dso->data.eh_frame_hdr_offset;
 
 	if (offset == 0) {
 		fd = dso__data_fd(dso, machine);
@@ -275,7 +275,7 @@
 
 		/* Check the .eh_frame section for unwinding info */
 		offset = elf_section_offset(fd, ".eh_frame_hdr");
-		dso->data.frame_offset = offset;
+		dso->data.eh_frame_hdr_offset = offset;
 	}
 
 	if (offset)
@@ -291,7 +291,7 @@
 					struct machine *machine, u64 *offset)
 {
 	int fd;
-	u64 ofs = dso->data.frame_offset;
+	u64 ofs = dso->data.debug_frame_offset;
 
 	if (ofs == 0) {
 		fd = dso__data_fd(dso, machine);
@@ -300,7 +300,7 @@
 
 		/* Check the .debug_frame section for unwinding info */
 		ofs = elf_section_offset(fd, ".debug_frame");
-		dso->data.frame_offset = ofs;
+		dso->data.debug_frame_offset = ofs;
 	}
 
 	*offset = ofs;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index b86744f..4ee6d0d 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -269,6 +269,13 @@
 void dump_stack(void) {}
 #endif
 
+void sighandler_dump_stack(int sig)
+{
+	psignal(sig, "perf");
+	dump_stack();
+	exit(sig);
+}
+
 void get_term_dimensions(struct winsize *ws)
 {
 	char *s = getenv("LINES");
@@ -303,13 +310,26 @@
 	tcsetattr(0, TCSANOW, &tc);
 }
 
-static void set_tracing_events_path(const char *mountpoint)
+static void set_tracing_events_path(const char *tracing, const char *mountpoint)
 {
-	snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s",
-		 mountpoint, "tracing/events");
+	snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
+		 mountpoint, tracing, "events");
 }
 
-const char *perf_debugfs_mount(const char *mountpoint)
+static const char *__perf_tracefs_mount(const char *mountpoint)
+{
+	const char *mnt;
+
+	mnt = tracefs_mount(mountpoint);
+	if (!mnt)
+		return NULL;
+
+	set_tracing_events_path("", mnt);
+
+	return mnt;
+}
+
+static const char *__perf_debugfs_mount(const char *mountpoint)
 {
 	const char *mnt;
 
@@ -317,7 +337,20 @@
 	if (!mnt)
 		return NULL;
 
-	set_tracing_events_path(mnt);
+	set_tracing_events_path("tracing/", mnt);
+
+	return mnt;
+}
+
+const char *perf_debugfs_mount(const char *mountpoint)
+{
+	const char *mnt;
+
+	mnt = __perf_tracefs_mount(mountpoint);
+	if (mnt)
+		return mnt;
+
+	mnt = __perf_debugfs_mount(mountpoint);
 
 	return mnt;
 }
@@ -325,12 +358,19 @@
 void perf_debugfs_set_path(const char *mntpt)
 {
 	snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt);
-	set_tracing_events_path(mntpt);
+	set_tracing_events_path("tracing/", mntpt);
+}
+
+static const char *find_tracefs(void)
+{
+	const char *path = __perf_tracefs_mount(NULL);
+
+	return path;
 }
 
 static const char *find_debugfs(void)
 {
-	const char *path = perf_debugfs_mount(NULL);
+	const char *path = __perf_debugfs_mount(NULL);
 
 	if (!path)
 		fprintf(stderr, "Your kernel does not support the debugfs filesystem");
@@ -344,6 +384,7 @@
  */
 const char *find_tracing_dir(void)
 {
+	const char *tracing_dir = "";
 	static char *tracing;
 	static int tracing_found;
 	const char *debugfs;
@@ -351,11 +392,15 @@
 	if (tracing_found)
 		return tracing;
 
-	debugfs = find_debugfs();
-	if (!debugfs)
-		return NULL;
+	debugfs = find_tracefs();
+	if (!debugfs) {
+		tracing_dir = "/tracing";
+		debugfs = find_debugfs();
+		if (!debugfs)
+			return NULL;
+	}
 
-	if (asprintf(&tracing, "%s/tracing", debugfs) < 0)
+	if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0)
 		return NULL;
 
 	tracing_found = 1;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 027a515..1ff23e0 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -75,6 +75,7 @@
 #include <linux/types.h>
 #include <sys/ttydefaults.h>
 #include <api/fs/debugfs.h>
+#include <api/fs/tracefs.h>
 #include <termios.h>
 #include <linux/bitops.h>
 #include <termios.h>
@@ -276,6 +277,7 @@
 char *rtrim(char *s);
 
 void dump_stack(void);
+void sighandler_dump_stack(int sig);
 
 extern unsigned int page_size;
 extern int cacheline_size;
@@ -327,4 +329,8 @@
 int gzip_decompress_to_file(const char *input, int output_fd);
 #endif
 
+#ifdef HAVE_LZMA_SUPPORT
+int lzma_decompress_to_file(const char *input, int output_fd);
+#endif
+
 #endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 368d64a..dd2812c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -310,7 +310,7 @@
 			cfr[jn] = cf[j] "." cfrep[cf[j]];
 		}
 		if (cpusr[jn] > ncpus && ncpus != 0)
-			ovf = "(!)";
+			ovf = "-ovf";
 		else
 			ovf = "";
 		print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`";
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index d2d2a86..4970121 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -1,2 +1,3 @@
 CONFIG_RCU_TORTURE_TEST=y
 CONFIG_PRINTK_TIME=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y