Merge branch 'linus' into perf/core

Conflicts:
	tools/perf/Makefile
	tools/perf/builtin-test.c
	tools/perf/perf.h
	tools/perf/tests/parse-events.c
	tools/perf/util/evsel.h

Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9776f06..2b48c52 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2859,6 +2859,22 @@
 			to facilitate early boot debugging.
 			See also Documentation/trace/events.txt
 
+	trace_options=[option-list]
+			[FTRACE] Enable or disable tracer options at boot.
+			The option-list is a comma delimited list of options
+			that can be enabled or disabled just as if you were
+			to echo the option name into
+
+			    /sys/kernel/debug/tracing/trace_options
+
+			For example, to enable stacktrace option (to dump the
+			stack trace of each event), add to the command line:
+
+			      trace_options=stacktrace
+
+			See also Documentation/trace/ftrace.txt "trace options"
+			section.
+
 	transparent_hugepage=
 			[KNL]
 			Format: [always|madvise|never]
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 91ce48f..9fd5eed 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -9,7 +9,6 @@
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_tsc.o = -pg
-CFLAGS_REMOVE_rtc.o = -pg
 CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 CFLAGS_REMOVE_pvclock.o = -pg
 CFLAGS_REMOVE_kvmclock.o = -pg
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4a3374e..4428fd1 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1316,6 +1316,121 @@
 	.attrs = NULL,
 };
 
+struct perf_pmu_events_attr {
+	struct device_attribute attr;
+	u64 id;
+};
+
+/*
+ * Remove all undefined events (x86_pmu.event_map(id) == 0)
+ * out of events_attr attributes.
+ */
+static void __init filter_events(struct attribute **attrs)
+{
+	int i, j;
+
+	for (i = 0; attrs[i]; i++) {
+		if (x86_pmu.event_map(i))
+			continue;
+
+		for (j = i; attrs[j]; j++)
+			attrs[j] = attrs[j + 1];
+
+		/* Check the shifted attr. */
+		i--;
+	}
+}
+
+static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+			  char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr = \
+		container_of(attr, struct perf_pmu_events_attr, attr);
+
+	u64 config = x86_pmu.event_map(pmu_attr->id);
+	return x86_pmu.events_sysfs_show(page, config);
+}
+
+#define EVENT_VAR(_id)  event_attr_##_id
+#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+
+#define EVENT_ATTR(_name, _id)					\
+static struct perf_pmu_events_attr EVENT_VAR(_id) = {		\
+	.attr = __ATTR(_name, 0444, events_sysfs_show, NULL),	\
+	.id   =  PERF_COUNT_HW_##_id,				\
+};
+
+EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
+EVENT_ATTR(instructions,		INSTRUCTIONS		);
+EVENT_ATTR(cache-references,		CACHE_REFERENCES	);
+EVENT_ATTR(cache-misses, 		CACHE_MISSES		);
+EVENT_ATTR(branch-instructions,		BRANCH_INSTRUCTIONS	);
+EVENT_ATTR(branch-misses,		BRANCH_MISSES		);
+EVENT_ATTR(bus-cycles,			BUS_CYCLES		);
+EVENT_ATTR(stalled-cycles-frontend,	STALLED_CYCLES_FRONTEND	);
+EVENT_ATTR(stalled-cycles-backend,	STALLED_CYCLES_BACKEND	);
+EVENT_ATTR(ref-cycles,			REF_CPU_CYCLES		);
+
+static struct attribute *empty_attrs;
+
+static struct attribute *events_attr[] = {
+	EVENT_PTR(CPU_CYCLES),
+	EVENT_PTR(INSTRUCTIONS),
+	EVENT_PTR(CACHE_REFERENCES),
+	EVENT_PTR(CACHE_MISSES),
+	EVENT_PTR(BRANCH_INSTRUCTIONS),
+	EVENT_PTR(BRANCH_MISSES),
+	EVENT_PTR(BUS_CYCLES),
+	EVENT_PTR(STALLED_CYCLES_FRONTEND),
+	EVENT_PTR(STALLED_CYCLES_BACKEND),
+	EVENT_PTR(REF_CPU_CYCLES),
+	NULL,
+};
+
+static struct attribute_group x86_pmu_events_group = {
+	.name = "events",
+	.attrs = events_attr,
+};
+
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
+{
+	u64 umask  = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+	u64 cmask  = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
+	bool edge  = (config & ARCH_PERFMON_EVENTSEL_EDGE);
+	bool pc    = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
+	bool any   = (config & ARCH_PERFMON_EVENTSEL_ANY);
+	bool inv   = (config & ARCH_PERFMON_EVENTSEL_INV);
+	ssize_t ret;
+
+	/*
+	* We have whole page size to spend and just little data
+	* to write, so we can safely use sprintf.
+	*/
+	ret = sprintf(page, "event=0x%02llx", event);
+
+	if (umask)
+		ret += sprintf(page + ret, ",umask=0x%02llx", umask);
+
+	if (edge)
+		ret += sprintf(page + ret, ",edge");
+
+	if (pc)
+		ret += sprintf(page + ret, ",pc");
+
+	if (any)
+		ret += sprintf(page + ret, ",any");
+
+	if (inv)
+		ret += sprintf(page + ret, ",inv");
+
+	if (cmask)
+		ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
+
+	ret += sprintf(page + ret, "\n");
+
+	return ret;
+}
+
 static int __init init_hw_perf_events(void)
 {
 	struct x86_pmu_quirk *quirk;
@@ -1362,6 +1477,11 @@
 	x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
 	x86_pmu_format_group.attrs = x86_pmu.format_attrs;
 
+	if (!x86_pmu.events_sysfs_show)
+		x86_pmu_events_group.attrs = &empty_attrs;
+	else
+		filter_events(x86_pmu_events_group.attrs);
+
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
 	pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
@@ -1651,6 +1771,7 @@
 static const struct attribute_group *x86_pmu_attr_groups[] = {
 	&x86_pmu_attr_group,
 	&x86_pmu_format_group,
+	&x86_pmu_events_group,
 	NULL,
 };
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 271d257..115c1ea 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -354,6 +354,8 @@
 	int		attr_rdpmc;
 	struct attribute **format_attrs;
 
+	ssize_t		(*events_sysfs_show)(char *page, u64 config);
+
 	/*
 	 * CPU Hotplug hooks
 	 */
@@ -536,6 +538,9 @@
 	regs->ip = ip;
 }
 
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
+ssize_t intel_event_sysfs_show(char *page, u64 config);
+
 #ifdef CONFIG_CPU_SUP_AMD
 
 int amd_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 4528ae7..c93bc4e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -568,6 +568,14 @@
 	}
 }
 
+static ssize_t amd_event_sysfs_show(char *page, u64 config)
+{
+	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
+		    (config & AMD64_EVENTSEL_EVENT) >> 24;
+
+	return x86_event_sysfs_show(page, config, event);
+}
+
 static __initconst const struct x86_pmu amd_pmu = {
 	.name			= "AMD",
 	.handle_irq		= x86_pmu_handle_irq,
@@ -591,6 +599,7 @@
 	.put_event_constraints	= amd_put_event_constraints,
 
 	.format_attrs		= amd_format_attr,
+	.events_sysfs_show	= amd_event_sysfs_show,
 
 	.cpu_prepare		= amd_pmu_cpu_prepare,
 	.cpu_starting		= amd_pmu_cpu_starting,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 324bb52..93b9e11 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1603,6 +1603,13 @@
 	NULL,
 };
 
+ssize_t intel_event_sysfs_show(char *page, u64 config)
+{
+	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
+
+	return x86_event_sysfs_show(page, config, event);
+}
+
 static __initconst const struct x86_pmu core_pmu = {
 	.name			= "core",
 	.handle_irq		= x86_pmu_handle_irq,
@@ -1628,6 +1635,7 @@
 	.event_constraints	= intel_core_event_constraints,
 	.guest_get_msrs		= core_guest_get_msrs,
 	.format_attrs		= intel_arch_formats_attr,
+	.events_sysfs_show	= intel_event_sysfs_show,
 };
 
 struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1766,6 +1774,7 @@
 	.pebs_aliases		= intel_pebs_aliases_core2,
 
 	.format_attrs		= intel_arch3_formats_attr,
+	.events_sysfs_show	= intel_event_sysfs_show,
 
 	.cpu_prepare		= intel_pmu_cpu_prepare,
 	.cpu_starting		= intel_pmu_cpu_starting,
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 7d0270b..f2af39f 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -227,6 +227,8 @@
 	.event_constraints	= p6_event_constraints,
 
 	.format_attrs		= intel_p6_formats_attr,
+	.events_sysfs_show	= intel_event_sysfs_show,
+
 };
 
 __init int p6_pmu_init(void)
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 4929c1b..801602b 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -195,12 +195,6 @@
 	ts->tv_nsec = 0;
 }
 
-unsigned long long native_read_tsc(void)
-{
-	return __native_read_tsc();
-}
-EXPORT_SYMBOL(native_read_tsc);
-
 
 static struct resource rtc_resources[] = {
 	[0] = {
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cfa5d4f..06ccb50 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -77,6 +77,12 @@
 sched_clock(void) __attribute__((alias("native_sched_clock")));
 #endif
 
+unsigned long long native_read_tsc(void)
+{
+	return __native_read_tsc();
+}
+EXPORT_SYMBOL(native_read_tsc);
+
 int check_tsc_unstable(void)
 {
 	return tsc_unstable;
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 642928c..b80c8dd 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -127,13 +127,13 @@
 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
 					struct ring_buffer_event *event,
 					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
-				       struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
-					    struct ring_buffer_event *event,
-					    unsigned long flags, int pc,
-					    struct pt_regs *regs);
+void trace_buffer_unlock_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event,
+				unsigned long flags, int pc);
+void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+				     struct ring_buffer_event *event,
+				     unsigned long flags, int pc,
+				     struct pt_regs *regs);
 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
 					 struct ring_buffer_event *event);
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7d8dfc7..3993095 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -527,9 +527,6 @@
 
 extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 #else
-static inline __printf(1, 2)
-int trace_printk(const char *fmt, ...);
-
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
 static inline void ftrace_off_permanent(void) { }
@@ -539,8 +536,8 @@
 static inline void tracing_off(void) { }
 static inline int tracing_is_on(void) { return 0; }
 
-static inline int
-trace_printk(const char *fmt, ...)
+static inline __printf(1, 2)
+int trace_printk(const char *fmt, ...)
 {
 	return 0;
 }
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 6c8835f..519777e 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -159,13 +159,14 @@
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
-unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu);
+u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_entries(struct ring_buffer *buffer);
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a763888..698f2a8 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -545,8 +545,7 @@
 	{ assign; }							\
 									\
 	if (!filter_current_check_discard(buffer, event_call, entry, event)) \
-		trace_nowake_buffer_unlock_commit(buffer,		\
-						  event, irq_flags, pc); \
+		trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \
 }
 /*
  * The ftrace_test_probe is compiled out, it is only here as a build time check
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 31966a4..84bc419 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -31,27 +31,4 @@
 	struct ftrace_event_call *exit_event;
 };
 
-#ifdef CONFIG_FTRACE_SYSCALLS
-extern unsigned long arch_syscall_addr(int nr);
-extern int init_syscall_trace(struct ftrace_event_call *call);
-
-extern int reg_event_syscall_enter(struct ftrace_event_call *call);
-extern void unreg_event_syscall_enter(struct ftrace_event_call *call);
-extern int reg_event_syscall_exit(struct ftrace_event_call *call);
-extern void unreg_event_syscall_exit(struct ftrace_event_call *call);
-extern int
-ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
-enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags,
-				      struct trace_event *event);
-enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags,
-				     struct trace_event *event);
-#endif
-
-#ifdef CONFIG_PERF_EVENTS
-int perf_sysenter_enable(struct ftrace_event_call *call);
-void perf_sysenter_disable(struct ftrace_event_call *call);
-int perf_sysexit_enable(struct ftrace_event_call *call);
-void perf_sysexit_disable(struct ftrace_event_call *call);
-#endif
-
 #endif /* _TRACE_SYSCALL_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4cea4f4..5d89335 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -119,6 +119,7 @@
 	select BINARY_PRINTF
 	select EVENT_TRACING
 	select TRACE_CLOCK
+	select IRQ_WORK
 
 config GENERIC_TRACER
 	bool
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9dcf15d..4451aa3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2868,7 +2868,7 @@
 {
 	return register_ftrace_command(&ftrace_mod_cmd);
 }
-device_initcall(ftrace_mod_cmd_init);
+core_initcall(ftrace_mod_cmd_init);
 
 static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
 				      struct ftrace_ops *op, struct pt_regs *pt_regs)
@@ -4055,7 +4055,7 @@
 	ftrace_enabled = 1;
 	return 0;
 }
-device_initcall(ftrace_nodyn_init);
+core_initcall(ftrace_nodyn_init);
 
 static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
 static inline void ftrace_startup_enable(int command) { }
@@ -4381,7 +4381,7 @@
 	if (strlen(tmp) == 0)
 		return 1;
 
-	ret = strict_strtol(tmp, 10, &val);
+	ret = kstrtol(tmp, 10, &val);
 	if (ret < 0)
 		return ret;
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b979426..3c7834c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -460,9 +460,10 @@
 	unsigned long			lost_events;
 	unsigned long			last_overrun;
 	local_t				entries_bytes;
-	local_t				commit_overrun;
-	local_t				overrun;
 	local_t				entries;
+	local_t				overrun;
+	local_t				commit_overrun;
+	local_t				dropped_events;
 	local_t				committing;
 	local_t				commits;
 	unsigned long			read;
@@ -1820,7 +1821,7 @@
 }
 
 /**
- * ring_buffer_update_event - update event type and data
+ * rb_update_event - update event type and data
  * @event: the even to update
  * @type: the type of event
  * @length: the size of the event field in the ring buffer
@@ -2155,8 +2156,10 @@
 			 * If we are not in overwrite mode,
 			 * this is easy, just stop here.
 			 */
-			if (!(buffer->flags & RB_FL_OVERWRITE))
+			if (!(buffer->flags & RB_FL_OVERWRITE)) {
+				local_inc(&cpu_buffer->dropped_events);
 				goto out_reset;
+			}
 
 			ret = rb_handle_head_page(cpu_buffer,
 						  tail_page,
@@ -2720,8 +2723,8 @@
  * and not the length of the event which would hold the header.
  */
 int ring_buffer_write(struct ring_buffer *buffer,
-			unsigned long length,
-			void *data)
+		      unsigned long length,
+		      void *data)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
@@ -2929,12 +2932,12 @@
  * @buffer: The ring buffer
  * @cpu: The per CPU buffer to read from.
  */
-unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
+u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
 {
 	unsigned long flags;
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct buffer_page *bpage;
-	unsigned long ret;
+	u64 ret;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 0;
@@ -2995,7 +2998,8 @@
 EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
 
 /**
- * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
+ * ring_buffer_overrun_cpu - get the number of overruns caused by the ring
+ * buffer wrapping around (only if RB_FL_OVERWRITE is on).
  * @buffer: The ring buffer
  * @cpu: The per CPU buffer to get the number of overruns from
  */
@@ -3015,7 +3019,9 @@
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
 /**
- * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
+ * ring_buffer_commit_overrun_cpu - get the number of overruns caused by
+ * commits failing due to the buffer wrapping around while there are uncommitted
+ * events, such as during an interrupt storm.
  * @buffer: The ring buffer
  * @cpu: The per CPU buffer to get the number of overruns from
  */
@@ -3036,6 +3042,28 @@
 EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
 
 /**
+ * ring_buffer_dropped_events_cpu - get the number of dropped events caused by
+ * the ring buffer filling up (only if RB_FL_OVERWRITE is off).
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long
+ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = local_read(&cpu_buffer->dropped_events);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
+
+/**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
  *
@@ -3864,9 +3892,10 @@
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
-	local_set(&cpu_buffer->commit_overrun, 0);
 	local_set(&cpu_buffer->entries_bytes, 0);
 	local_set(&cpu_buffer->overrun, 0);
+	local_set(&cpu_buffer->commit_overrun, 0);
+	local_set(&cpu_buffer->dropped_events, 0);
 	local_set(&cpu_buffer->entries, 0);
 	local_set(&cpu_buffer->committing, 0);
 	local_set(&cpu_buffer->commits, 0);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 31e4f55..c1434b5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -19,6 +19,7 @@
 #include <linux/seq_file.h>
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
+#include <linux/irq_work.h>
 #include <linux/debugfs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
@@ -78,6 +79,21 @@
 }
 
 /*
+ * To prevent the comm cache from being overwritten when no
+ * tracing is active, only save the comm when a trace event
+ * occurred.
+ */
+static DEFINE_PER_CPU(bool, trace_cmdline_save);
+
+/*
+ * When a reader is waiting for data, then this variable is
+ * set to true.
+ */
+static bool trace_wakeup_needed;
+
+static struct irq_work trace_work_wakeup;
+
+/*
  * Kill all tracing for good (never come back).
  * It is initialized to 1 but will turn to zero if the initialization
  * of the tracer is successful. But that is the only place that sets
@@ -139,6 +155,18 @@
 }
 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 
+
+static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
+static char *trace_boot_options __initdata;
+
+static int __init set_trace_boot_options(char *str)
+{
+	strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
+	trace_boot_options = trace_boot_options_buf;
+	return 0;
+}
+__setup("trace_options=", set_trace_boot_options);
+
 unsigned long long ns2usecs(cycle_t nsec)
 {
 	nsec += 500;
@@ -198,20 +226,9 @@
 
 static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
 
-/* tracer_enabled is used to toggle activation of a tracer */
-static int			tracer_enabled = 1;
-
-/**
- * tracing_is_enabled - return tracer_enabled status
- *
- * This function is used by other tracers to know the status
- * of the tracer_enabled flag.  Tracers may use this function
- * to know if it should enable their features when starting
- * up. See irqsoff tracer for an example (start_irqsoff_tracer).
- */
 int tracing_is_enabled(void)
 {
-	return tracer_enabled;
+	return tracing_is_on();
 }
 
 /*
@@ -333,12 +350,18 @@
 static int trace_stop_count;
 static DEFINE_RAW_SPINLOCK(tracing_start_lock);
 
-static void wakeup_work_handler(struct work_struct *work)
+/**
+ * trace_wake_up - wake up tasks waiting for trace input
+ *
+ * Schedules a delayed work to wake up any task that is blocked on the
+ * trace_wait queue. These is used with trace_poll for tasks polling the
+ * trace.
+ */
+static void trace_wake_up(struct irq_work *work)
 {
-	wake_up(&trace_wait);
-}
+	wake_up_all(&trace_wait);
 
-static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
+}
 
 /**
  * tracing_on - enable tracing buffers
@@ -393,22 +416,6 @@
 }
 EXPORT_SYMBOL_GPL(tracing_is_on);
 
-/**
- * trace_wake_up - wake up tasks waiting for trace input
- *
- * Schedules a delayed work to wake up any task that is blocked on the
- * trace_wait queue. These is used with trace_poll for tasks polling the
- * trace.
- */
-void trace_wake_up(void)
-{
-	const unsigned long delay = msecs_to_jiffies(2);
-
-	if (trace_flags & TRACE_ITER_BLOCK)
-		return;
-	schedule_delayed_work(&wakeup_work, delay);
-}
-
 static int __init set_buf_size(char *str)
 {
 	unsigned long buf_size;
@@ -431,7 +438,7 @@
 
 	if (!str)
 		return 0;
-	ret = strict_strtoul(str, 0, &threshold);
+	ret = kstrtoul(str, 0, &threshold);
 	if (ret < 0)
 		return 0;
 	tracing_thresh = threshold * 1000;
@@ -757,6 +764,40 @@
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
+static void default_wait_pipe(struct trace_iterator *iter)
+{
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
+
+	/*
+	 * The events can happen in critical sections where
+	 * checking a work queue can cause deadlocks.
+	 * After adding a task to the queue, this flag is set
+	 * only to notify events to try to wake up the queue
+	 * using irq_work.
+	 *
+	 * We don't clear it even if the buffer is no longer
+	 * empty. The flag only causes the next event to run
+	 * irq_work to do the work queue wake up. The worse
+	 * that can happen if we race with !trace_empty() is that
+	 * an event will cause an irq_work to try to wake up
+	 * an empty queue.
+	 *
+	 * There's no reason to protect this flag either, as
+	 * the work queue and irq_work logic will do the necessary
+	 * synchronization for the wake ups. The only thing
+	 * that is necessary is that the wake up happens after
+	 * a task has been queued. It's OK for spurious wake ups.
+	 */
+	trace_wakeup_needed = true;
+
+	if (trace_empty(iter))
+		schedule();
+
+	finish_wait(&trace_wait, &wait);
+}
+
 /**
  * register_tracer - register a tracer with the ftrace system.
  * @type - the plugin for the tracer
@@ -875,32 +916,6 @@
 	return ret;
 }
 
-void unregister_tracer(struct tracer *type)
-{
-	struct tracer **t;
-
-	mutex_lock(&trace_types_lock);
-	for (t = &trace_types; *t; t = &(*t)->next) {
-		if (*t == type)
-			goto found;
-	}
-	pr_info("Tracer %s not registered\n", type->name);
-	goto out;
-
- found:
-	*t = (*t)->next;
-
-	if (type == current_trace && tracer_enabled) {
-		tracer_enabled = 0;
-		tracing_stop();
-		if (current_trace->stop)
-			current_trace->stop(&global_trace);
-		current_trace = &nop_trace;
-	}
-out:
-	mutex_unlock(&trace_types_lock);
-}
-
 void tracing_reset(struct trace_array *tr, int cpu)
 {
 	struct ring_buffer *buffer = tr->buffer;
@@ -1131,10 +1146,14 @@
 
 void tracing_record_cmdline(struct task_struct *tsk)
 {
-	if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
-	    !tracing_is_on())
+	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
 		return;
 
+	if (!__this_cpu_read(trace_cmdline_save))
+		return;
+
+	__this_cpu_write(trace_cmdline_save, false);
+
 	trace_save_cmdline(tsk);
 }
 
@@ -1178,27 +1197,36 @@
 	return event;
 }
 
+void
+__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
+{
+	__this_cpu_write(trace_cmdline_save, true);
+	if (trace_wakeup_needed) {
+		trace_wakeup_needed = false;
+		/* irq_work_queue() supplies it's own memory barriers */
+		irq_work_queue(&trace_work_wakeup);
+	}
+	ring_buffer_unlock_commit(buffer, event);
+}
+
 static inline void
 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
 			     struct ring_buffer_event *event,
-			     unsigned long flags, int pc,
-			     int wake)
+			     unsigned long flags, int pc)
 {
-	ring_buffer_unlock_commit(buffer, event);
+	__buffer_unlock_commit(buffer, event);
 
 	ftrace_trace_stack(buffer, flags, 6, pc);
 	ftrace_trace_userstack(buffer, flags, pc);
-
-	if (wake)
-		trace_wake_up();
 }
 
 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc)
 {
-	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
+EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
 
 struct ring_buffer_event *
 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
@@ -1215,29 +1243,21 @@
 					struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
 
-void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
-				       struct ring_buffer_event *event,
-				       unsigned long flags, int pc)
+void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+				     struct ring_buffer_event *event,
+				     unsigned long flags, int pc,
+				     struct pt_regs *regs)
 {
-	__trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
-}
-EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
-
-void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
-					    struct ring_buffer_event *event,
-					    unsigned long flags, int pc,
-					    struct pt_regs *regs)
-{
-	ring_buffer_unlock_commit(buffer, event);
+	__buffer_unlock_commit(buffer, event);
 
 	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
 	ftrace_trace_userstack(buffer, flags, pc);
 }
-EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
+EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
 
 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
 					 struct ring_buffer_event *event)
@@ -1269,7 +1289,7 @@
 	entry->parent_ip		= parent_ip;
 
 	if (!filter_check_discard(call, entry, buffer, event))
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 }
 
 void
@@ -1362,7 +1382,7 @@
 	entry->size = trace.nr_entries;
 
 	if (!filter_check_discard(call, entry, buffer, event))
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 
  out:
 	/* Again, don't let gcc optimize things here */
@@ -1458,7 +1478,7 @@
 
 	save_stack_trace_user(&trace);
 	if (!filter_check_discard(call, entry, buffer, event))
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 
  out_drop_count:
 	__this_cpu_dec(user_stack_count);
@@ -1559,10 +1579,10 @@
 	return -ENOMEM;
 }
 
+static int buffers_allocated;
+
 void trace_printk_init_buffers(void)
 {
-	static int buffers_allocated;
-
 	if (buffers_allocated)
 		return;
 
@@ -1571,7 +1591,38 @@
 
 	pr_info("ftrace: Allocated trace_printk buffers\n");
 
+	/* Expand the buffers to set size */
+	tracing_update_buffers();
+
 	buffers_allocated = 1;
+
+	/*
+	 * trace_printk_init_buffers() can be called by modules.
+	 * If that happens, then we need to start cmdline recording
+	 * directly here. If the global_trace.buffer is already
+	 * allocated here, then this was called by module code.
+	 */
+	if (global_trace.buffer)
+		tracing_start_cmdline_record();
+}
+
+void trace_printk_start_comm(void)
+{
+	/* Start tracing comms if trace printk is set */
+	if (!buffers_allocated)
+		return;
+	tracing_start_cmdline_record();
+}
+
+static void trace_printk_start_stop_comm(int enabled)
+{
+	if (!buffers_allocated)
+		return;
+
+	if (enabled)
+		tracing_start_cmdline_record();
+	else
+		tracing_stop_cmdline_record();
 }
 
 /**
@@ -1622,7 +1673,7 @@
 
 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
 	if (!filter_check_discard(call, entry, buffer, event)) {
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 		ftrace_trace_stack(buffer, flags, 6, pc);
 	}
 
@@ -1693,7 +1744,7 @@
 	memcpy(&entry->buf, tbuffer, len);
 	entry->buf[len] = '\0';
 	if (!filter_check_discard(call, entry, buffer, event)) {
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 		ftrace_trace_stack(buffer, flags, 6, pc);
 	}
  out:
@@ -2794,26 +2845,19 @@
 
 	if (mask == TRACE_ITER_OVERWRITE)
 		ring_buffer_change_overwrite(global_trace.buffer, enabled);
+
+	if (mask == TRACE_ITER_PRINTK)
+		trace_printk_start_stop_comm(enabled);
 }
 
-static ssize_t
-tracing_trace_options_write(struct file *filp, const char __user *ubuf,
-			size_t cnt, loff_t *ppos)
+static int trace_set_options(char *option)
 {
-	char buf[64];
 	char *cmp;
 	int neg = 0;
-	int ret;
+	int ret = 0;
 	int i;
 
-	if (cnt >= sizeof(buf))
-		return -EINVAL;
-
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-
-	buf[cnt] = 0;
-	cmp = strstrip(buf);
+	cmp = strstrip(option);
 
 	if (strncmp(cmp, "no", 2) == 0) {
 		neg = 1;
@@ -2832,10 +2876,25 @@
 		mutex_lock(&trace_types_lock);
 		ret = set_tracer_option(current_trace, cmp, neg);
 		mutex_unlock(&trace_types_lock);
-		if (ret)
-			return ret;
 	}
 
+	return ret;
+}
+
+static ssize_t
+tracing_trace_options_write(struct file *filp, const char __user *ubuf,
+			size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	trace_set_options(buf);
+
 	*ppos += cnt;
 
 	return cnt;
@@ -2940,56 +2999,6 @@
 };
 
 static ssize_t
-tracing_ctrl_read(struct file *filp, char __user *ubuf,
-		  size_t cnt, loff_t *ppos)
-{
-	char buf[64];
-	int r;
-
-	r = sprintf(buf, "%u\n", tracer_enabled);
-	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
-static ssize_t
-tracing_ctrl_write(struct file *filp, const char __user *ubuf,
-		   size_t cnt, loff_t *ppos)
-{
-	struct trace_array *tr = filp->private_data;
-	unsigned long val;
-	int ret;
-
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
-		return ret;
-
-	val = !!val;
-
-	mutex_lock(&trace_types_lock);
-	if (tracer_enabled ^ val) {
-
-		/* Only need to warn if this is used to change the state */
-		WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on");
-
-		if (val) {
-			tracer_enabled = 1;
-			if (current_trace->start)
-				current_trace->start(tr);
-			tracing_start();
-		} else {
-			tracer_enabled = 0;
-			tracing_stop();
-			if (current_trace->stop)
-				current_trace->stop(tr);
-		}
-	}
-	mutex_unlock(&trace_types_lock);
-
-	*ppos += cnt;
-
-	return cnt;
-}
-
-static ssize_t
 tracing_set_trace_read(struct file *filp, char __user *ubuf,
 		       size_t cnt, loff_t *ppos)
 {
@@ -3030,6 +3039,10 @@
 	 */
 	ring_buffer_expanded = 1;
 
+	/* May be called before buffers are initialized */
+	if (!global_trace.buffer)
+		return 0;
+
 	ret = ring_buffer_resize(global_trace.buffer, size, cpu);
 	if (ret < 0)
 		return ret;
@@ -3385,19 +3398,6 @@
 	}
 }
 
-
-void default_wait_pipe(struct trace_iterator *iter)
-{
-	DEFINE_WAIT(wait);
-
-	prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
-
-	if (trace_empty(iter))
-		schedule();
-
-	finish_wait(&trace_wait, &wait);
-}
-
 /*
  * This is a make-shift waitqueue.
  * A tracer might use this callback on some rare cases:
@@ -3438,7 +3438,7 @@
 			return -EINTR;
 
 		/*
-		 * We block until we read something and tracing is disabled.
+		 * We block until we read something and tracing is enabled.
 		 * We still block if tracing is disabled, but we have never
 		 * read anything. This allows a user to cat this file, and
 		 * then enable tracing. But after we have read something,
@@ -3446,7 +3446,7 @@
 		 *
 		 * iter->pos will be 0 if we haven't read anything.
 		 */
-		if (!tracer_enabled && iter->pos)
+		if (tracing_is_enabled() && iter->pos)
 			break;
 	}
 
@@ -3955,7 +3955,7 @@
 	} else
 		entry->buf[cnt] = '\0';
 
-	ring_buffer_unlock_commit(buffer, event);
+	__buffer_unlock_commit(buffer, event);
 
 	written = cnt;
 
@@ -4016,6 +4016,14 @@
 	if (max_tr.buffer)
 		ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
 
+	/*
+	 * New clock may not be consistent with the previous clock.
+	 * Reset the buffer so that it doesn't have incomparable timestamps.
+	 */
+	tracing_reset_online_cpus(&global_trace);
+	if (max_tr.buffer)
+		tracing_reset_online_cpus(&max_tr);
+
 	mutex_unlock(&trace_types_lock);
 
 	*fpos += cnt;
@@ -4037,13 +4045,6 @@
 	.llseek		= generic_file_llseek,
 };
 
-static const struct file_operations tracing_ctrl_fops = {
-	.open		= tracing_open_generic,
-	.read		= tracing_ctrl_read,
-	.write		= tracing_ctrl_write,
-	.llseek		= generic_file_llseek,
-};
-
 static const struct file_operations set_tracer_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_set_trace_read,
@@ -4385,6 +4386,9 @@
 	usec_rem = do_div(t, USEC_PER_SEC);
 	trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
 
+	cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "dropped events: %ld\n", cnt);
+
 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -4815,9 +4819,6 @@
 
 	d_tracer = tracing_init_dentry();
 
-	trace_create_file("tracing_enabled", 0644, d_tracer,
-			&global_trace, &tracing_ctrl_fops);
-
 	trace_create_file("trace_options", 0644, d_tracer,
 			NULL, &tracing_iter_fops);
 
@@ -5089,6 +5090,7 @@
 
 	/* Only allocate trace_printk buffers if a trace_printk exists */
 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
+		/* Must be called before global_trace.buffer is allocated */
 		trace_printk_init_buffers();
 
 	/* To save memory, keep the ring buffer size to its minimum */
@@ -5136,6 +5138,7 @@
 #endif
 
 	trace_init_cmdlines();
+	init_irq_work(&trace_work_wakeup, trace_wake_up);
 
 	register_tracer(&nop_trace);
 	current_trace = &nop_trace;
@@ -5147,6 +5150,13 @@
 
 	register_die_notifier(&trace_die_notifier);
 
+	while (trace_boot_options) {
+		char *option;
+
+		option = strsep(&trace_boot_options, ",");
+		trace_set_options(option);
+	}
+
 	return 0;
 
 out_free_cpumask:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c15f528..55010ed 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -285,8 +285,8 @@
 	int			(*set_flag)(u32 old_flags, u32 bit, int set);
 	struct tracer		*next;
 	struct tracer_flags	*flags;
-	int			print_max;
-	int			use_max_tr;
+	bool			print_max;
+	bool			use_max_tr;
 };
 
 
@@ -327,7 +327,6 @@
 
 int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
-void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
 void tracing_reset_online_cpus(struct trace_array *tr);
 void tracing_reset_current(int cpu);
@@ -349,9 +348,6 @@
 			  unsigned long len,
 			  unsigned long flags,
 			  int pc);
-void trace_buffer_unlock_commit(struct ring_buffer *buffer,
-				struct ring_buffer_event *event,
-				unsigned long flags, int pc);
 
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
@@ -359,6 +355,9 @@
 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
 					  int *ent_cpu, u64 *ent_ts);
 
+void __buffer_unlock_commit(struct ring_buffer *buffer,
+			    struct ring_buffer_event *event);
+
 int trace_empty(struct trace_iterator *iter);
 
 void *trace_find_next_entry_inc(struct trace_iterator *iter);
@@ -367,7 +366,6 @@
 
 void tracing_iter_reset(struct trace_iterator *iter, int cpu);
 
-void default_wait_pipe(struct trace_iterator *iter);
 void poll_wait_pipe(struct trace_iterator *iter);
 
 void ftrace(struct trace_array *tr,
@@ -407,7 +405,6 @@
 void tracing_stop_sched_switch_record(void);
 void tracing_start_sched_switch_record(void);
 int register_tracer(struct tracer *type);
-void unregister_tracer(struct tracer *type);
 int is_tracing_stopped(void);
 enum trace_file_type {
 	TRACE_FILE_LAT_FMT	= 1,
@@ -841,6 +838,7 @@
 extern const char *__stop___trace_bprintk_fmt[];
 
 void trace_printk_init_buffers(void);
+void trace_printk_start_comm(void);
 
 #undef FTRACE_ENTRY
 #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter)	\
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 8d3538b..95e9684 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -77,7 +77,7 @@
 	entry->correct = val == expect;
 
 	if (!filter_check_discard(call, entry, buffer, event))
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 
  out:
 	atomic_dec(&tr->data[cpu]->disabled);
@@ -199,7 +199,7 @@
 	}
 	return register_tracer(&branch_trace);
 }
-device_initcall(init_branch_tracer);
+core_initcall(init_branch_tracer);
 
 #else
 static inline
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d608d09..880073d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -491,19 +491,6 @@
 	mutex_unlock(&event_mutex);
 }
 
-static int
-ftrace_event_seq_open(struct inode *inode, struct file *file)
-{
-	const struct seq_operations *seq_ops;
-
-	if ((file->f_mode & FMODE_WRITE) &&
-	    (file->f_flags & O_TRUNC))
-		ftrace_clear_events();
-
-	seq_ops = inode->i_private;
-	return seq_open(file, seq_ops);
-}
-
 static ssize_t
 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 		  loff_t *ppos)
@@ -980,6 +967,9 @@
 	return r;
 }
 
+static int ftrace_event_avail_open(struct inode *inode, struct file *file);
+static int ftrace_event_set_open(struct inode *inode, struct file *file);
+
 static const struct seq_operations show_event_seq_ops = {
 	.start = t_start,
 	.next = t_next,
@@ -995,14 +985,14 @@
 };
 
 static const struct file_operations ftrace_avail_fops = {
-	.open = ftrace_event_seq_open,
+	.open = ftrace_event_avail_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
 	.release = seq_release,
 };
 
 static const struct file_operations ftrace_set_event_fops = {
-	.open = ftrace_event_seq_open,
+	.open = ftrace_event_set_open,
 	.read = seq_read,
 	.write = ftrace_event_write,
 	.llseek = seq_lseek,
@@ -1078,6 +1068,26 @@
 	return d_events;
 }
 
+static int
+ftrace_event_avail_open(struct inode *inode, struct file *file)
+{
+	const struct seq_operations *seq_ops = &show_event_seq_ops;
+
+	return seq_open(file, seq_ops);
+}
+
+static int
+ftrace_event_set_open(struct inode *inode, struct file *file)
+{
+	const struct seq_operations *seq_ops = &show_set_event_seq_ops;
+
+	if ((file->f_mode & FMODE_WRITE) &&
+	    (file->f_flags & O_TRUNC))
+		ftrace_clear_events();
+
+	return seq_open(file, seq_ops);
+}
+
 static struct dentry *
 event_subsystem_dir(const char *name, struct dentry *d_events)
 {
@@ -1489,6 +1499,9 @@
 		if (ret)
 			pr_warn("Failed to enable trace event: %s\n", token);
 	}
+
+	trace_printk_start_comm();
+
 	return 0;
 }
 
@@ -1505,15 +1518,13 @@
 		return 0;
 
 	entry = debugfs_create_file("available_events", 0444, d_tracer,
-				    (void *)&show_event_seq_ops,
-				    &ftrace_avail_fops);
+				    NULL, &ftrace_avail_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
 			   "'available_events' entry\n");
 
 	entry = debugfs_create_file("set_event", 0644, d_tracer,
-				    (void *)&show_set_event_seq_ops,
-				    &ftrace_set_event_fops);
+				    NULL, &ftrace_set_event_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs "
 			   "'set_event' entry\n");
@@ -1749,7 +1760,7 @@
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
 
-	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
+	trace_buffer_unlock_commit(buffer, event, flags, pc);
 
  out:
 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index c154797..e5b0ca8 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1000,9 +1000,9 @@
 		}
 	} else {
 		if (field->is_signed)
-			ret = strict_strtoll(pred->regex.pattern, 0, &val);
+			ret = kstrtoll(pred->regex.pattern, 0, &val);
 		else
-			ret = strict_strtoull(pred->regex.pattern, 0, &val);
+			ret = kstrtoull(pred->regex.pattern, 0, &val);
 		if (ret) {
 			parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
 			return -EINVAL;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 507a7a9..bb227e3 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -366,7 +366,7 @@
 	 * We use the callback data field (which is a pointer)
 	 * as our counter.
 	 */
-	ret = strict_strtoul(number, 0, (unsigned long *)&count);
+	ret = kstrtoul(number, 0, (unsigned long *)&count);
 	if (ret)
 		return ret;
 
@@ -411,5 +411,4 @@
 	init_func_cmd_traceon();
 	return register_tracer(&function_trace);
 }
-device_initcall(init_function_trace);
-
+core_initcall(init_function_trace);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 99b4378..4edb4b7 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -223,7 +223,7 @@
 	entry	= ring_buffer_event_data(event);
 	entry->graph_ent			= *trace;
 	if (!filter_current_check_discard(buffer, call, entry, event))
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 
 	return 1;
 }
@@ -327,7 +327,7 @@
 	entry	= ring_buffer_event_data(event);
 	entry->ret				= *trace;
 	if (!filter_current_check_discard(buffer, call, entry, event))
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 }
 
 void trace_graph_return(struct ftrace_graph_ret *trace)
@@ -1474,4 +1474,4 @@
 	return register_tracer(&graph_trace);
 }
 
-device_initcall(init_graph_trace);
+core_initcall(init_graph_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index d98ee82..5ffce7b 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -604,7 +604,7 @@
 	.reset		= irqsoff_tracer_reset,
 	.start		= irqsoff_tracer_start,
 	.stop		= irqsoff_tracer_stop,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header   = irqsoff_print_header,
 	.print_line     = irqsoff_print_line,
 	.flags		= &tracer_flags,
@@ -614,7 +614,7 @@
 #endif
 	.open           = irqsoff_trace_open,
 	.close          = irqsoff_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 # define register_irqsoff(trace) register_tracer(&trace)
 #else
@@ -637,7 +637,7 @@
 	.reset		= irqsoff_tracer_reset,
 	.start		= irqsoff_tracer_start,
 	.stop		= irqsoff_tracer_stop,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header   = irqsoff_print_header,
 	.print_line     = irqsoff_print_line,
 	.flags		= &tracer_flags,
@@ -647,7 +647,7 @@
 #endif
 	.open		= irqsoff_trace_open,
 	.close		= irqsoff_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 # define register_preemptoff(trace) register_tracer(&trace)
 #else
@@ -672,7 +672,7 @@
 	.reset		= irqsoff_tracer_reset,
 	.start		= irqsoff_tracer_start,
 	.stop		= irqsoff_tracer_stop,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header   = irqsoff_print_header,
 	.print_line     = irqsoff_print_line,
 	.flags		= &tracer_flags,
@@ -682,7 +682,7 @@
 #endif
 	.open		= irqsoff_trace_open,
 	.close		= irqsoff_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 
 # define register_preemptirqsoff(trace) register_tracer(&trace)
@@ -698,4 +698,4 @@
 
 	return 0;
 }
-device_initcall(init_irqsoff_tracer);
+core_initcall(init_irqsoff_tracer);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 1a21170..1865d5f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -444,7 +444,7 @@
 			return -EINVAL;
 		}
 		/* an address specified */
-		ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
+		ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
 		if (ret) {
 			pr_info("Failed to parse address.\n");
 			return ret;
@@ -751,8 +751,8 @@
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
-		trace_nowake_buffer_unlock_commit_regs(buffer, event,
-						       irq_flags, pc, regs);
+		trace_buffer_unlock_commit_regs(buffer, event,
+						irq_flags, pc, regs);
 }
 
 /* Kretprobe handler */
@@ -784,8 +784,8 @@
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
-		trace_nowake_buffer_unlock_commit_regs(buffer, event,
-						       irq_flags, pc, regs);
+		trace_buffer_unlock_commit_regs(buffer, event,
+						irq_flags, pc, regs);
 }
 
 /* Event entry printers */
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index daa9980..412e959 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -441,7 +441,7 @@
 			goto fail;
 
 		type++;
-		if (strict_strtoul(type, 0, &bs))
+		if (kstrtoul(type, 0, &bs))
 			goto fail;
 
 		switch (bs) {
@@ -501,8 +501,8 @@
 
 	tmp = strchr(symbol, '+');
 	if (tmp) {
-		/* skip sign because strict_strtol doesn't accept '+' */
-		ret = strict_strtoul(tmp + 1, 0, offset);
+		/* skip sign because kstrtoul doesn't accept '+' */
+		ret = kstrtoul(tmp + 1, 0, offset);
 		if (ret)
 			return ret;
 
@@ -533,7 +533,7 @@
 			else
 				ret = -EINVAL;
 		} else if (isdigit(arg[5])) {
-			ret = strict_strtoul(arg + 5, 10, &param);
+			ret = kstrtoul(arg + 5, 10, &param);
 			if (ret || param > PARAM_MAX_STACK)
 				ret = -EINVAL;
 			else {
@@ -579,7 +579,7 @@
 
 	case '@':	/* memory or symbol */
 		if (isdigit(arg[1])) {
-			ret = strict_strtoul(arg + 1, 0, &param);
+			ret = kstrtoul(arg + 1, 0, &param);
 			if (ret)
 				break;
 
@@ -597,14 +597,14 @@
 		break;
 
 	case '+':	/* deref memory */
-		arg++;	/* Skip '+', because strict_strtol() rejects it. */
+		arg++;	/* Skip '+', because kstrtol() rejects it. */
 	case '-':
 		tmp = strchr(arg, '(');
 		if (!tmp)
 			break;
 
 		*tmp = '\0';
-		ret = strict_strtol(arg, 0, &offset);
+		ret = kstrtol(arg, 0, &offset);
 
 		if (ret)
 			break;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 7e62c0a..3374c79 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -102,9 +102,7 @@
 	entry->next_cpu			= task_cpu(wakee);
 
 	if (!filter_check_discard(call, entry, buffer, event))
-		ring_buffer_unlock_commit(buffer, event);
-	ftrace_trace_stack(tr->buffer, flags, 6, pc);
-	ftrace_trace_userstack(tr->buffer, flags, pc);
+		trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
 
 static void
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 02170c0..bc64fc1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -589,7 +589,7 @@
 	.reset		= wakeup_tracer_reset,
 	.start		= wakeup_tracer_start,
 	.stop		= wakeup_tracer_stop,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header	= wakeup_print_header,
 	.print_line	= wakeup_print_line,
 	.flags		= &tracer_flags,
@@ -599,7 +599,7 @@
 #endif
 	.open		= wakeup_trace_open,
 	.close		= wakeup_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 
 static struct tracer wakeup_rt_tracer __read_mostly =
@@ -610,7 +610,7 @@
 	.start		= wakeup_tracer_start,
 	.stop		= wakeup_tracer_stop,
 	.wait_pipe	= poll_wait_pipe,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header	= wakeup_print_header,
 	.print_line	= wakeup_print_line,
 	.flags		= &tracer_flags,
@@ -620,7 +620,7 @@
 #endif
 	.open		= wakeup_trace_open,
 	.close		= wakeup_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 
 __init static int init_wakeup_tracer(void)
@@ -637,4 +637,4 @@
 
 	return 0;
 }
-device_initcall(init_wakeup_tracer);
+core_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 2c00a69..4762316 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -320,7 +320,6 @@
 					   int (*func)(void))
 {
 	int save_ftrace_enabled = ftrace_enabled;
-	int save_tracer_enabled = tracer_enabled;
 	unsigned long count;
 	char *func_name;
 	int ret;
@@ -331,7 +330,6 @@
 
 	/* enable tracing, and record the filter function */
 	ftrace_enabled = 1;
-	tracer_enabled = 1;
 
 	/* passed in by parameter to fool gcc from optimizing */
 	func();
@@ -395,7 +393,6 @@
 
  out:
 	ftrace_enabled = save_ftrace_enabled;
-	tracer_enabled = save_tracer_enabled;
 
 	/* Enable tracing on all functions again */
 	ftrace_set_global_filter(NULL, 0, 1);
@@ -452,7 +449,6 @@
 trace_selftest_function_recursion(void)
 {
 	int save_ftrace_enabled = ftrace_enabled;
-	int save_tracer_enabled = tracer_enabled;
 	char *func_name;
 	int len;
 	int ret;
@@ -465,7 +461,6 @@
 
 	/* enable tracing, and record the filter function */
 	ftrace_enabled = 1;
-	tracer_enabled = 1;
 
 	/* Handle PPC64 '.' name */
 	func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
@@ -534,7 +529,6 @@
 	ret = 0;
 out:
 	ftrace_enabled = save_ftrace_enabled;
-	tracer_enabled = save_tracer_enabled;
 
 	return ret;
 }
@@ -569,7 +563,6 @@
 trace_selftest_function_regs(void)
 {
 	int save_ftrace_enabled = ftrace_enabled;
-	int save_tracer_enabled = tracer_enabled;
 	char *func_name;
 	int len;
 	int ret;
@@ -586,7 +579,6 @@
 
 	/* enable tracing, and record the filter function */
 	ftrace_enabled = 1;
-	tracer_enabled = 1;
 
 	/* Handle PPC64 '.' name */
 	func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
@@ -648,7 +640,6 @@
 	ret = 0;
 out:
 	ftrace_enabled = save_ftrace_enabled;
-	tracer_enabled = save_tracer_enabled;
 
 	return ret;
 }
@@ -662,7 +653,6 @@
 trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
 {
 	int save_ftrace_enabled = ftrace_enabled;
-	int save_tracer_enabled = tracer_enabled;
 	unsigned long count;
 	int ret;
 
@@ -671,7 +661,6 @@
 
 	/* start the tracing */
 	ftrace_enabled = 1;
-	tracer_enabled = 1;
 
 	ret = tracer_init(trace, tr);
 	if (ret) {
@@ -708,7 +697,6 @@
 	ret = trace_selftest_function_regs();
  out:
 	ftrace_enabled = save_ftrace_enabled;
-	tracer_enabled = save_tracer_enabled;
 
 	/* kill ftrace totally if we failed */
 	if (ret)
@@ -1106,6 +1094,7 @@
 	tracing_stop();
 	/* check both trace buffers */
 	ret = trace_test_buffer(tr, NULL);
+	printk("ret = %d\n", ret);
 	if (!ret)
 		ret = trace_test_buffer(&max_tr, &count);
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 2485a7d..7609dd6 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -21,9 +21,6 @@
 static int syscall_exit_register(struct ftrace_event_call *event,
 				 enum trace_reg type, void *data);
 
-static int syscall_enter_define_fields(struct ftrace_event_call *call);
-static int syscall_exit_define_fields(struct ftrace_event_call *call);
-
 static struct list_head *
 syscall_get_enter_fields(struct ftrace_event_call *call)
 {
@@ -32,30 +29,6 @@
 	return &entry->enter_fields;
 }
 
-struct trace_event_functions enter_syscall_print_funcs = {
-	.trace		= print_syscall_enter,
-};
-
-struct trace_event_functions exit_syscall_print_funcs = {
-	.trace		= print_syscall_exit,
-};
-
-struct ftrace_event_class event_class_syscall_enter = {
-	.system		= "syscalls",
-	.reg		= syscall_enter_register,
-	.define_fields	= syscall_enter_define_fields,
-	.get_fields	= syscall_get_enter_fields,
-	.raw_init	= init_syscall_trace,
-};
-
-struct ftrace_event_class event_class_syscall_exit = {
-	.system		= "syscalls",
-	.reg		= syscall_exit_register,
-	.define_fields	= syscall_exit_define_fields,
-	.fields		= LIST_HEAD_INIT(event_class_syscall_exit.fields),
-	.raw_init	= init_syscall_trace,
-};
-
 extern struct syscall_metadata *__start_syscalls_metadata[];
 extern struct syscall_metadata *__stop_syscalls_metadata[];
 
@@ -432,7 +405,7 @@
 	mutex_unlock(&syscall_trace_lock);
 }
 
-int init_syscall_trace(struct ftrace_event_call *call)
+static int init_syscall_trace(struct ftrace_event_call *call)
 {
 	int id;
 	int num;
@@ -457,6 +430,30 @@
 	return id;
 }
 
+struct trace_event_functions enter_syscall_print_funcs = {
+	.trace		= print_syscall_enter,
+};
+
+struct trace_event_functions exit_syscall_print_funcs = {
+	.trace		= print_syscall_exit,
+};
+
+struct ftrace_event_class event_class_syscall_enter = {
+	.system		= "syscalls",
+	.reg		= syscall_enter_register,
+	.define_fields	= syscall_enter_define_fields,
+	.get_fields	= syscall_get_enter_fields,
+	.raw_init	= init_syscall_trace,
+};
+
+struct ftrace_event_class event_class_syscall_exit = {
+	.system		= "syscalls",
+	.reg		= syscall_exit_register,
+	.define_fields	= syscall_exit_define_fields,
+	.fields		= LIST_HEAD_INIT(event_class_syscall_exit.fields),
+	.raw_init	= init_syscall_trace,
+};
+
 unsigned long __init __weak arch_syscall_addr(int nr)
 {
 	return (unsigned long)sys_call_table[nr];
@@ -537,7 +534,7 @@
 	perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 
-int perf_sysenter_enable(struct ftrace_event_call *call)
+static int perf_sysenter_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
@@ -558,7 +555,7 @@
 	return ret;
 }
 
-void perf_sysenter_disable(struct ftrace_event_call *call)
+static void perf_sysenter_disable(struct ftrace_event_call *call)
 {
 	int num;
 
@@ -615,7 +612,7 @@
 	perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 
-int perf_sysexit_enable(struct ftrace_event_call *call)
+static int perf_sysexit_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
@@ -636,7 +633,7 @@
 	return ret;
 }
 
-void perf_sysexit_disable(struct ftrace_event_call *call)
+static void perf_sysexit_disable(struct ftrace_event_call *call)
 {
 	int num;
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 03003cd..4ff9ca4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -252,7 +252,7 @@
 	if (ret)
 		goto fail_address_parse;
 
-	ret = strict_strtoul(arg, 0, &offset);
+	ret = kstrtoul(arg, 0, &offset);
 	if (ret)
 		goto fail_address_parse;
 
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 04d959f..a20e320 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -253,7 +253,7 @@
 # let .d file also depends on the source and header files
 define check_deps
 		@set -e; $(RM) $@; \
-		$(CC) -M $(CFLAGS) $< > $@.$$$$; \
+		$(CC) -MM $(CFLAGS) $< > $@.$$$$; \
 		sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
 		$(RM) $@.$$$$
 endef
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index f2989c5..5a824e3 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -174,7 +174,7 @@
 	return 0;
 }
 
-static char *find_cmdline(struct pevent *pevent, int pid)
+static const char *find_cmdline(struct pevent *pevent, int pid)
 {
 	const struct cmdline *comm;
 	struct cmdline key;
@@ -2637,7 +2637,7 @@
 	struct print_arg *farg;
 	enum event_type type;
 	char *token;
-	char *test;
+	const char *test;
 	int i;
 
 	arg->type = PRINT_FUNC;
@@ -3889,7 +3889,7 @@
 			  struct event_format *event, struct print_arg *arg)
 {
 	unsigned char *buf;
-	char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
+	const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
 
 	if (arg->type == PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
@@ -3931,7 +3931,8 @@
 	return 1;
 }
 
-static void print_event_fields(struct trace_seq *s, void *data, int size,
+static void print_event_fields(struct trace_seq *s, void *data,
+			       int size __maybe_unused,
 			       struct event_format *event)
 {
 	struct format_field *field;
@@ -4408,7 +4409,7 @@
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 			struct pevent_record *record)
 {
-	static char *spaces = "                    "; /* 20 spaces */
+	static const char *spaces = "                    "; /* 20 spaces */
 	struct event_format *event;
 	unsigned long secs;
 	unsigned long usecs;
@@ -5070,8 +5071,8 @@
 };
 #undef _PE
 
-int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum,
-		    char *buf, size_t buflen)
+int pevent_strerror(struct pevent *pevent __maybe_unused,
+		    enum pevent_errno errnum, char *buf, size_t buflen)
 {
 	int idx;
 	const char *msg;
@@ -5100,6 +5101,7 @@
 	case PEVENT_ERRNO__READ_FORMAT_FAILED:
 	case PEVENT_ERRNO__READ_PRINT_FAILED:
 	case PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED:
+	case PEVENT_ERRNO__INVALID_ARG_TYPE:
 		snprintf(buf, buflen, "%s", msg);
 		break;
 
@@ -5362,7 +5364,7 @@
 		if (type == PEVENT_FUNC_ARG_VOID)
 			break;
 
-		if (type < 0 || type >= PEVENT_FUNC_ARG_MAX_TYPES) {
+		if (type >= PEVENT_FUNC_ARG_MAX_TYPES) {
 			do_warning("Invalid argument type %d", type);
 			ret = PEVENT_ERRNO__INVALID_ARG_TYPE;
 			goto out_free;
@@ -5560,7 +5562,7 @@
 	}
 
 	if (pevent->func_map) {
-		for (i = 0; i < pevent->func_count; i++) {
+		for (i = 0; i < (int)pevent->func_count; i++) {
 			free(pevent->func_map[i].func);
 			free(pevent->func_map[i].mod);
 		}
@@ -5582,7 +5584,7 @@
 	}
 
 	if (pevent->printk_map) {
-		for (i = 0; i < pevent->printk_count; i++)
+		for (i = 0; i < (int)pevent->printk_count; i++)
 			free(pevent->printk_map[i].printk);
 		free(pevent->printk_map);
 	}
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 9f2e44f..ef6d22e 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -1,3 +1,5 @@
+include ../config/utilities.mak
+
 OUTPUT := ./
 ifeq ("$(origin O)", "command line")
   ifneq ($(O),)
@@ -64,6 +66,7 @@
 INSTALL_INFO=install-info
 DOCBOOK2X_TEXI=docbook2x-texi
 DBLATEX=dblatex
+XMLTO=xmlto
 ifndef PERL_PATH
 	PERL_PATH = /usr/bin/perl
 endif
@@ -71,6 +74,16 @@
 -include ../config.mak.autogen
 -include ../config.mak
 
+_tmp_tool_path := $(call get-executable,$(ASCIIDOC))
+ifeq ($(_tmp_tool_path),)
+	missing_tools = $(ASCIIDOC)
+endif
+
+_tmp_tool_path := $(call get-executable,$(XMLTO))
+ifeq ($(_tmp_tool_path),)
+	missing_tools += $(XMLTO)
+endif
+
 #
 # For asciidoc ...
 #	-7.1.2,	no extra settings are needed.
@@ -170,7 +183,12 @@
 
 install: install-man
 
-install-man: man
+check-man-tools:
+ifdef missing_tools
+	$(error "You need to install $(missing_tools) for man pages")
+endif
+
+do-install-man: man
 	$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
 #	$(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
 #	$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
@@ -178,6 +196,15 @@
 #	$(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
 #	$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
 
+install-man: check-man-tools man
+
+try-install-man:
+ifdef missing_tools
+	$(warning Please install $(missing_tools) to have the man pages installed)
+else
+	$(MAKE) do-install-man
+endif
+
 install-info: info
 	$(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
 	$(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir)
@@ -246,7 +273,7 @@
 
 $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
 	$(QUIET_XMLTO)$(RM) $@ && \
-	xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+	$(XMLTO) -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
 
 $(OUTPUT)%.xml : %.txt
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
diff --git a/tools/perf/Documentation/android.txt b/tools/perf/Documentation/android.txt
new file mode 100644
index 0000000..8484c3a
--- /dev/null
+++ b/tools/perf/Documentation/android.txt
@@ -0,0 +1,78 @@
+How to compile perf for Android
+=========================================
+
+I. Set the Android NDK environment
+------------------------------------------------
+
+(a). Use the Android NDK
+------------------------------------------------
+1. You need to download and install the Android Native Development Kit (NDK).
+Set the NDK variable to point to the path where you installed the NDK:
+  export NDK=/path/to/android-ndk
+
+2. Set cross-compiling environment variables for NDK toolchain and sysroot.
+For arm:
+  export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.6/prebuilt/linux-x86/bin/arm-linux-androideabi-
+  export NDK_SYSROOT=${NDK}/platforms/android-9/arch-arm
+For x86:
+  export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.6/prebuilt/linux-x86/bin/i686-linux-android-
+  export NDK_SYSROOT=${NDK}/platforms/android-9/arch-x86
+
+This method is not working for Android NDK versions up to Revision 8b.
+perf uses some bionic enhancements that are not included in these NDK versions.
+You can use method (b) described below instead.
+
+(b). Use the Android source tree
+-----------------------------------------------
+1. Download the master branch of the Android source tree.
+Set the environment for the target you want using:
+  source build/envsetup.sh
+  lunch
+
+2. Build your own NDK sysroot to contain latest bionic changes and set the
+NDK sysroot environment variable.
+  cd ${ANDROID_BUILD_TOP}/ndk
+For arm:
+  ./build/tools/build-ndk-sysroot.sh --abi=arm
+  export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-arm
+For x86:
+  ./build/tools/build-ndk-sysroot.sh --abi=x86
+  export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-x86
+
+3. Set the NDK toolchain environment variable.
+For arm:
+  export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/arm-linux-androideabi-
+For x86:
+  export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/i686-linux-android-
+
+II. Compile perf for Android
+------------------------------------------------
+You need to run make with the NDK toolchain and sysroot defined above:
+For arm:
+  make ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}"
+For x86:
+  make ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}"
+
+III. Install perf
+-----------------------------------------------
+You need to connect to your Android device/emulator using adb.
+Install perf using:
+  adb push perf /data/perf
+
+If you also want to use perf-archive you need busybox tools for Android.
+For installing perf-archive, you first need to replace #!/bin/bash with #!/system/bin/sh:
+  sed 's/#!\/bin\/bash/#!\/system\/bin\/sh/g' perf-archive >> /tmp/perf-archive
+  chmod +x /tmp/perf-archive
+  adb push /tmp/perf-archive /data/perf-archive
+
+IV. Environment settings for running perf
+------------------------------------------------
+Some perf features need environment variables to run properly.
+You need to set these before running perf on the target:
+  adb shell
+  # PERF_PAGER=cat
+
+IV. Run perf
+------------------------------------------------
+Run perf on your device/emulator to which you previously connected using adb:
+  # ./data/perf
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index ab7f667..194f37d 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -72,6 +72,66 @@
 --symfs=<directory>::
         Look for files with symbols relative to this directory.
 
+-b::
+--baseline-only::
+        Show only items with match in baseline.
+
+-c::
+--compute::
+        Differential computation selection - delta,ratio,wdiff (default is delta).
+        If '+' is specified as a first character, the output is sorted based
+        on the computation results.
+        See COMPARISON METHODS section for more info.
+
+-p::
+--period::
+        Show period values for both compared hist entries.
+
+-F::
+--formula::
+        Show formula for given computation.
+
+COMPARISON METHODS
+------------------
+delta
+~~~~~
+If specified the 'Delta' column is displayed with value 'd' computed as:
+
+  d = A->period_percent - B->period_percent
+
+with:
+  - A/B being matching hist entry from first/second file specified
+    (or perf.data/perf.data.old) respectively.
+
+  - period_percent being the % of the hist entry period value within
+    single data file
+
+ratio
+~~~~~
+If specified the 'Ratio' column is displayed with value 'r' computed as:
+
+  r = A->period / B->period
+
+with:
+  - A/B being matching hist entry from first/second file specified
+    (or perf.data/perf.data.old) respectively.
+
+  - period being the hist entry period value
+
+wdiff
+~~~~~
+If specified the 'Weighted diff' column is displayed with value 'd' computed as:
+
+   d = B->period * WEIGHT-A - A->period * WEIGHT-B
+
+  - A/B being matching hist entry from first/second file specified
+    (or perf.data/perf.data.old) respectively.
+
+  - period being the hist entry period value
+
+  - WEIGHT-A/WEIGHT-B being user suplied weights in the the '-c' option
+    behind ':' separator like '-c wdiff:1,2'.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 025630d..a00a342 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -29,6 +29,17 @@
 -v::
 --verbose::
 	Be more verbose.
+-i::
+--input=::
+	Input file name. (default: stdin)
+-o::
+--output=::
+	Output file name. (default: stdout)
+-s::
+--sched-stat::
+	Merge sched_stat and sched_switch for getting events where and how long
+	tasks slept. sched_switch contains a callchain where a task slept and
+	sched_stat contains a timeslice how long a task slept.
 
 SEE ALSO
 --------
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2fa173b..cf0c310 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -108,6 +108,11 @@
      3>results  perf stat --log-fd 3          -- $cmd
      3>>results perf stat --log-fd 3 --append -- $cmd
 
+--pre::
+--post::
+	Pre and post measurement hooks, e.g.:
+
+perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
 
 
 EXAMPLES
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 3a2ae37..68718cc 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -48,6 +48,12 @@
 In per-thread mode with inheritance mode on (default), Events are captured only when
 the thread executes on the designated CPUs. Default is to monitor all CPUs.
 
+--duration:
+	Show only events that had a duration greater than N.M ms.
+
+--sched:
+	Accrue thread runtime and provide a summary at the end of the session.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0a619af..891bc77 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -155,15 +155,15 @@
 
 -include config/feature-tests.mak
 
-ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
+ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
 	CFLAGS := $(CFLAGS) -fstack-protector-all
 endif
 
-ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -Wstack-protector),y)
+ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wstack-protector,-Wstack-protector),y)
        CFLAGS := $(CFLAGS) -Wstack-protector
 endif
 
-ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -Wvolatile-register-var),y)
+ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-Wvolatile-register-var),y)
        CFLAGS := $(CFLAGS) -Wvolatile-register-var
 endif
 
@@ -197,8 +197,16 @@
 	-I. \
 	-I$(TRACE_EVENT_DIR) \
 	-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+
 BASIC_LDFLAGS =
 
+ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y)
+	BIONIC := 1
+	EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
+	EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
+	BASIC_CFLAGS += -I.
+endif
+
 # Guard against environment variables
 BUILTIN_OBJS =
 LIB_H =
@@ -330,6 +338,7 @@
 LIB_H += util/exec_cmd.h
 LIB_H += util/types.h
 LIB_H += util/levenshtein.h
+LIB_H += util/machine.h
 LIB_H += util/map.h
 LIB_H += util/parse-options.h
 LIB_H += util/parse-events.h
@@ -346,6 +355,7 @@
 LIB_H += util/tool.h
 LIB_H += util/run-command.h
 LIB_H += util/sigchain.h
+LIB_H += util/dso.h
 LIB_H += util/symbol.h
 LIB_H += util/color.h
 LIB_H += util/values.h
@@ -389,7 +399,6 @@
 LIB_OBJS += $(OUTPUT)util/levenshtein.o
 LIB_OBJS += $(OUTPUT)util/parse-options.o
 LIB_OBJS += $(OUTPUT)util/parse-events.o
-LIB_OBJS += $(OUTPUT)util/parse-events-test.o
 LIB_OBJS += $(OUTPUT)util/path.o
 LIB_OBJS += $(OUTPUT)util/rbtree.o
 LIB_OBJS += $(OUTPUT)util/bitmap.o
@@ -404,15 +413,16 @@
 LIB_OBJS += $(OUTPUT)util/usage.o
 LIB_OBJS += $(OUTPUT)util/wrapper.o
 LIB_OBJS += $(OUTPUT)util/sigchain.o
+LIB_OBJS += $(OUTPUT)util/dso.o
 LIB_OBJS += $(OUTPUT)util/symbol.o
 LIB_OBJS += $(OUTPUT)util/symbol-elf.o
-LIB_OBJS += $(OUTPUT)util/dso-test-data.o
 LIB_OBJS += $(OUTPUT)util/color.o
 LIB_OBJS += $(OUTPUT)util/pager.o
 LIB_OBJS += $(OUTPUT)util/header.o
 LIB_OBJS += $(OUTPUT)util/callchain.o
 LIB_OBJS += $(OUTPUT)util/values.o
 LIB_OBJS += $(OUTPUT)util/debug.o
+LIB_OBJS += $(OUTPUT)util/machine.o
 LIB_OBJS += $(OUTPUT)util/map.o
 LIB_OBJS += $(OUTPUT)util/pstack.o
 LIB_OBJS += $(OUTPUT)util/session.o
@@ -440,10 +450,29 @@
 LIB_OBJS += $(OUTPUT)util/vdso.o
 LIB_OBJS += $(OUTPUT)util/stat.o
 
+LIB_OBJS += $(OUTPUT)ui/setup.o
 LIB_OBJS += $(OUTPUT)ui/helpline.o
+LIB_OBJS += $(OUTPUT)ui/progress.o
 LIB_OBJS += $(OUTPUT)ui/hist.o
 LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
 
+LIB_OBJS += $(OUTPUT)arch/common.o
+
+LIB_OBJS += $(OUTPUT)tests/parse-events.o
+LIB_OBJS += $(OUTPUT)tests/dso-data.o
+LIB_OBJS += $(OUTPUT)tests/attr.o
+LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o
+LIB_OBJS += $(OUTPUT)tests/mmap-basic.o
+LIB_OBJS += $(OUTPUT)tests/perf-record.o
+LIB_OBJS += $(OUTPUT)tests/rdpmc.o
+LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
+LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
+LIB_OBJS += $(OUTPUT)tests/pmu.o
+LIB_OBJS += $(OUTPUT)tests/util.o
+
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
 # Benchmark modules
@@ -473,8 +502,8 @@
 BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
 BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
 BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
-BUILTIN_OBJS += $(OUTPUT)builtin-test.o
 BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
+BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
 
 PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT)
 
@@ -495,18 +524,33 @@
 	NO_LIBUNWIND := 1
 else
 FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
-ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y)
+ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF),libelf),y)
 	FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS)
-	ifneq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC)),y)
-		msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
-	else
+	ifeq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC),glibc),y)
+		LIBC_SUPPORT := 1
+	endif
+	ifeq ($(BIONIC),1)
+		LIBC_SUPPORT := 1
+	endif
+	ifeq ($(LIBC_SUPPORT),1)
+		msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev);
+
 		NO_LIBELF := 1
 		NO_DWARF := 1
 		NO_DEMANGLE := 1
+	else
+		msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
 	endif
 else
-	FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
-	ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
+	# for linking with debug library, run like:
+	# make DEBUG=1 LIBDW_DIR=/opt/libdw/
+	ifdef LIBDW_DIR
+		LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
+		LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
+	endif
+
+	FLAGS_DWARF=$(ALL_CFLAGS) $(LIBDW_CFLAGS) -ldw -lelf $(LIBDW_LDFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
+	ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF),libdw),y)
 		msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
 		NO_DWARF := 1
 	endif # Dwarf support
@@ -522,7 +566,7 @@
 endif
 
 FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(ALL_CFLAGS) $(LIBUNWIND_LDFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS)
-ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND)),y)
+ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND),libunwind),y)
 	msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99);
 	NO_LIBUNWIND := 1
 endif # Libunwind support
@@ -551,7 +595,8 @@
 else # NO_LIBELF
 BASIC_CFLAGS += -DLIBELF_SUPPORT
 
-ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_COMMON)),y)
+FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
+ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
 	BASIC_CFLAGS += -DLIBELF_MMAP
 endif
 
@@ -559,7 +604,8 @@
 ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
 	msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
 else
-	BASIC_CFLAGS += -DDWARF_SUPPORT
+	BASIC_CFLAGS := -DDWARF_SUPPORT $(LIBDW_CFLAGS) $(BASIC_CFLAGS)
+	BASIC_LDFLAGS := $(LIBDW_LDFLAGS) $(BASIC_LDFLAGS)
 	EXTLIBS += -lelf -ldw
 	LIB_OBJS += $(OUTPUT)util/probe-finder.o
 	LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
@@ -577,7 +623,7 @@
 
 ifndef NO_LIBAUDIT
 	FLAGS_LIBAUDIT = $(ALL_CFLAGS) $(ALL_LDFLAGS) -laudit
-	ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT)),y)
+	ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT),libaudit),y)
 		msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
 	else
 		BASIC_CFLAGS += -DLIBAUDIT_SUPPORT
@@ -588,23 +634,23 @@
 
 ifndef NO_NEWT
 	FLAGS_NEWT=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lnewt
-	ifneq ($(call try-cc,$(SOURCE_NEWT),$(FLAGS_NEWT)),y)
+	ifneq ($(call try-cc,$(SOURCE_NEWT),$(FLAGS_NEWT),libnewt),y)
 		msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev);
 	else
 		# Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
 		BASIC_CFLAGS += -I/usr/include/slang
 		BASIC_CFLAGS += -DNEWT_SUPPORT
 		EXTLIBS += -lnewt -lslang
-		LIB_OBJS += $(OUTPUT)ui/setup.o
 		LIB_OBJS += $(OUTPUT)ui/browser.o
 		LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
 		LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
 		LIB_OBJS += $(OUTPUT)ui/browsers/map.o
-		LIB_OBJS += $(OUTPUT)ui/progress.o
+		LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
 		LIB_OBJS += $(OUTPUT)ui/util.o
 		LIB_OBJS += $(OUTPUT)ui/tui/setup.o
 		LIB_OBJS += $(OUTPUT)ui/tui/util.o
 		LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
+		LIB_OBJS += $(OUTPUT)ui/tui/progress.o
 		LIB_H += ui/browser.h
 		LIB_H += ui/browsers/map.h
 		LIB_H += ui/keysyms.h
@@ -617,10 +663,10 @@
 
 ifndef NO_GTK2
 	FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null)
-	ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2)),y)
+	ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2),gtk2),y)
 		msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
 	else
-		ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y)
+		ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2),-DHAVE_GTK_INFO_BAR),y)
 			BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR
 		endif
 		BASIC_CFLAGS += -DGTK2_SUPPORT
@@ -630,9 +676,9 @@
 		LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
 		LIB_OBJS += $(OUTPUT)ui/gtk/util.o
 		LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
+		LIB_OBJS += $(OUTPUT)ui/gtk/progress.o
 		# Make sure that it'd be included only once.
 		ifeq ($(findstring -DNEWT_SUPPORT,$(BASIC_CFLAGS)),)
-			LIB_OBJS += $(OUTPUT)ui/setup.o
 			LIB_OBJS += $(OUTPUT)ui/util.o
 		endif
 	endif
@@ -647,7 +693,7 @@
 	PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
 	FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
 
-	ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y)
+	ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED),perl),y)
 		BASIC_CFLAGS += -DNO_LIBPERL
 	else
                ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS)
@@ -701,11 +747,11 @@
       PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
       FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 
-      ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
+      ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED),python),y)
         $(call disable-python,Python.h (for Python 2.x))
       else
 
-        ifneq ($(call try-cc,$(SOURCE_PYTHON_VERSION),$(FLAGS_PYTHON_EMBED)),y)
+        ifneq ($(call try-cc,$(SOURCE_PYTHON_VERSION),$(FLAGS_PYTHON_EMBED),python version),y)
           $(warning Python 3 is not yet supported; please set)
           $(warning PYTHON and/or PYTHON_CONFIG appropriately.)
           $(warning If you also have Python 2 installed, then)
@@ -739,22 +785,22 @@
 		BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
         else
 		FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd
-		has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD))
+		has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD),libbfd)
 		ifeq ($(has_bfd),y)
 			EXTLIBS += -lbfd
 		else
 			FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
-			has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY))
+			has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY),liberty)
 			ifeq ($(has_bfd_iberty),y)
 				EXTLIBS += -lbfd -liberty
 			else
 				FLAGS_BFD_IBERTY_Z=$(FLAGS_BFD_IBERTY) -lz
-				has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z))
+				has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z),libz)
 				ifeq ($(has_bfd_iberty_z),y)
 					EXTLIBS += -lbfd -liberty -lz
 				else
 					FLAGS_CPLUS_DEMANGLE=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -liberty
-					has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE))
+					has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE),demangle)
 					ifeq ($(has_cplus_demangle),y)
 						EXTLIBS += -liberty
 						BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
@@ -776,13 +822,19 @@
 endif
 
 ifndef NO_STRLCPY
-	ifeq ($(call try-cc,$(SOURCE_STRLCPY),),y)
+	ifeq ($(call try-cc,$(SOURCE_STRLCPY),,-DHAVE_STRLCPY),y)
 		BASIC_CFLAGS += -DHAVE_STRLCPY
 	endif
 endif
 
+ifndef NO_ON_EXIT
+	ifeq ($(call try-cc,$(SOURCE_ON_EXIT),,-DHAVE_ON_EXIT),y)
+		BASIC_CFLAGS += -DHAVE_ON_EXIT
+	endif
+endif
+
 ifndef NO_BACKTRACE
-       ifeq ($(call try-cc,$(SOURCE_BACKTRACE),),y)
+       ifeq ($(call try-cc,$(SOURCE_BACKTRACE),,-DBACKTRACE_SUPPORT),y)
                BASIC_CFLAGS += -DBACKTRACE_SUPPORT
        endif
 endif
@@ -891,10 +943,14 @@
 $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
 		'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
-		'-DBINDIR="$(bindir_relative_SQ)"' \
 		'-DPREFIX="$(prefix_SQ)"' \
 		$<
 
+$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
+		'-DBINDIR="$(bindir_SQ)"' \
+		$<
+
 $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
@@ -910,6 +966,9 @@
 $(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
 
+$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
 $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
@@ -981,20 +1040,15 @@
 	@echo 'Perf maintainer targets:'
 	@echo '  clean			- clean all binary objects and build output'
 
-doc:
-	$(MAKE) -C Documentation all
 
-man:
-	$(MAKE) -C Documentation man
+DOC_TARGETS := doc man html info pdf
 
-html:
-	$(MAKE) -C Documentation html
+INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
+INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
 
-info:
-	$(MAKE) -C Documentation info
-
-pdf:
-	$(MAKE) -C Documentation pdf
+# 'make doc' should call 'make -C Documentation all'
+$(DOC_TARGETS):
+	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
 
 TAGS:
 	$(RM) TAGS
@@ -1045,7 +1099,7 @@
 endif
 perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
 
-install: all
+install: all try-install-man
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
@@ -1061,33 +1115,17 @@
 	$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'
 	$(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'
+	$(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
+	$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
 
 install-python_ext:
 	$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
 
-install-doc:
-	$(MAKE) -C Documentation install
-
-install-man:
-	$(MAKE) -C Documentation install-man
-
-install-html:
-	$(MAKE) -C Documentation install-html
-
-install-info:
-	$(MAKE) -C Documentation install-info
-
-install-pdf:
-	$(MAKE) -C Documentation install-pdf
-
-quick-install-doc:
-	$(MAKE) -C Documentation quick-install
-
-quick-install-man:
-	$(MAKE) -C Documentation quick-install-man
-
-quick-install-html:
-	$(MAKE) -C Documentation quick-install-html
+# 'make install-doc' should call 'make -C Documentation install'
+$(INSTALL_DOC_TARGETS):
+	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
 
 ### Cleaning rules
 
@@ -1095,7 +1133,7 @@
 	$(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS)
 	$(RM) $(ALL_PROGRAMS) perf
 	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
-	$(MAKE) -C Documentation/ clean
+	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
 	$(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS
 	$(RM) $(OUTPUT)util/*-bison*
 	$(RM) $(OUTPUT)util/*-flex*
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
new file mode 100644
index 0000000..3e975cb
--- /dev/null
+++ b/tools/perf/arch/common.c
@@ -0,0 +1,211 @@
+#include <stdio.h>
+#include <sys/utsname.h>
+#include "common.h"
+#include "../util/debug.h"
+
+const char *const arm_triplets[] = {
+	"arm-eabi-",
+	"arm-linux-androideabi-",
+	"arm-unknown-linux-",
+	"arm-unknown-linux-gnu-",
+	"arm-unknown-linux-gnueabi-",
+	NULL
+};
+
+const char *const powerpc_triplets[] = {
+	"powerpc-unknown-linux-gnu-",
+	"powerpc64-unknown-linux-gnu-",
+	NULL
+};
+
+const char *const s390_triplets[] = {
+	"s390-ibm-linux-",
+	NULL
+};
+
+const char *const sh_triplets[] = {
+	"sh-unknown-linux-gnu-",
+	"sh64-unknown-linux-gnu-",
+	NULL
+};
+
+const char *const sparc_triplets[] = {
+	"sparc-unknown-linux-gnu-",
+	"sparc64-unknown-linux-gnu-",
+	NULL
+};
+
+const char *const x86_triplets[] = {
+	"x86_64-pc-linux-gnu-",
+	"x86_64-unknown-linux-gnu-",
+	"i686-pc-linux-gnu-",
+	"i586-pc-linux-gnu-",
+	"i486-pc-linux-gnu-",
+	"i386-pc-linux-gnu-",
+	"i686-linux-android-",
+	"i686-android-linux-",
+	NULL
+};
+
+const char *const mips_triplets[] = {
+	"mips-unknown-linux-gnu-",
+	"mipsel-linux-android-",
+	NULL
+};
+
+static bool lookup_path(char *name)
+{
+	bool found = false;
+	char *path, *tmp;
+	char buf[PATH_MAX];
+	char *env = getenv("PATH");
+
+	if (!env)
+		return false;
+
+	env = strdup(env);
+	if (!env)
+		return false;
+
+	path = strtok_r(env, ":", &tmp);
+	while (path) {
+		scnprintf(buf, sizeof(buf), "%s/%s", path, name);
+		if (access(buf, F_OK) == 0) {
+			found = true;
+			break;
+		}
+		path = strtok_r(NULL, ":", &tmp);
+	}
+	free(env);
+	return found;
+}
+
+static int lookup_triplets(const char *const *triplets, const char *name)
+{
+	int i;
+	char buf[PATH_MAX];
+
+	for (i = 0; triplets[i] != NULL; i++) {
+		scnprintf(buf, sizeof(buf), "%s%s", triplets[i], name);
+		if (lookup_path(buf))
+			return i;
+	}
+	return -1;
+}
+
+/*
+ * Return architecture name in a normalized form.
+ * The conversion logic comes from the Makefile.
+ */
+static const char *normalize_arch(char *arch)
+{
+	if (!strcmp(arch, "x86_64"))
+		return "x86";
+	if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
+		return "x86";
+	if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
+		return "sparc";
+	if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
+		return "arm";
+	if (!strncmp(arch, "s390", 4))
+		return "s390";
+	if (!strncmp(arch, "parisc", 6))
+		return "parisc";
+	if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
+		return "powerpc";
+	if (!strncmp(arch, "mips", 4))
+		return "mips";
+	if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
+		return "sh";
+
+	return arch;
+}
+
+static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
+						  const char *name,
+						  const char **path)
+{
+	int idx;
+	const char *arch, *cross_env;
+	struct utsname uts;
+	const char *const *path_list;
+	char *buf = NULL;
+
+	arch = normalize_arch(env->arch);
+
+	if (uname(&uts) < 0)
+		goto out;
+
+	/*
+	 * We don't need to try to find objdump path for native system.
+	 * Just use default binutils path (e.g.: "objdump").
+	 */
+	if (!strcmp(normalize_arch(uts.machine), arch))
+		goto out;
+
+	cross_env = getenv("CROSS_COMPILE");
+	if (cross_env) {
+		if (asprintf(&buf, "%s%s", cross_env, name) < 0)
+			goto out_error;
+		if (buf[0] == '/') {
+			if (access(buf, F_OK) == 0)
+				goto out;
+			goto out_error;
+		}
+		if (lookup_path(buf))
+			goto out;
+		free(buf);
+	}
+
+	if (!strcmp(arch, "arm"))
+		path_list = arm_triplets;
+	else if (!strcmp(arch, "powerpc"))
+		path_list = powerpc_triplets;
+	else if (!strcmp(arch, "sh"))
+		path_list = sh_triplets;
+	else if (!strcmp(arch, "s390"))
+		path_list = s390_triplets;
+	else if (!strcmp(arch, "sparc"))
+		path_list = sparc_triplets;
+	else if (!strcmp(arch, "x86"))
+		path_list = x86_triplets;
+	else if (!strcmp(arch, "mips"))
+		path_list = mips_triplets;
+	else {
+		ui__error("binutils for %s not supported.\n", arch);
+		goto out_error;
+	}
+
+	idx = lookup_triplets(path_list, name);
+	if (idx < 0) {
+		ui__error("Please install %s for %s.\n"
+			  "You can add it to PATH, set CROSS_COMPILE or "
+			  "override the default using --%s.\n",
+			  name, arch, name);
+		goto out_error;
+	}
+
+	if (asprintf(&buf, "%s%s", path_list[idx], name) < 0)
+		goto out_error;
+
+out:
+	*path = buf;
+	return 0;
+out_error:
+	free(buf);
+	*path = NULL;
+	return -1;
+}
+
+int perf_session_env__lookup_objdump(struct perf_session_env *env)
+{
+	/*
+	 * For live mode, env->arch will be NULL and we can use
+	 * the native objdump tool.
+	 */
+	if (env->arch == NULL)
+		return 0;
+
+	return perf_session_env__lookup_binutils_path(env, "objdump",
+						      &objdump_path);
+}
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
new file mode 100644
index 0000000..ede246e
--- /dev/null
+++ b/tools/perf/arch/common.h
@@ -0,0 +1,10 @@
+#ifndef ARCH_PERF_COMMON_H
+#define ARCH_PERF_COMMON_H
+
+#include "../util/session.h"
+
+extern const char *objdump_path;
+
+int perf_session_env__lookup_objdump(struct perf_session_env *env);
+
+#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 9ea3854..dc870cf 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,12 +28,12 @@
 #include "util/hist.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "arch/common.h"
 
 #include <linux/bitmap.h>
 
 struct perf_annotate {
 	struct perf_tool tool;
-	char const *input_name;
 	bool	   force, use_tui, use_stdio;
 	bool	   full_paths;
 	bool	   print_line;
@@ -139,7 +139,7 @@
 		}
 
 		if (use_browser > 0) {
-			key = hist_entry__tui_annotate(he, evidx, NULL, NULL, 0);
+			key = hist_entry__tui_annotate(he, evidx, NULL);
 			switch (key) {
 			case K_RIGHT:
 				next = rb_next(nd);
@@ -174,7 +174,7 @@
 	struct perf_evsel *pos;
 	u64 total_nr_samples;
 
-	session = perf_session__new(ann->input_name, O_RDONLY,
+	session = perf_session__new(input_name, O_RDONLY,
 				    ann->force, false, &ann->tool);
 	if (session == NULL)
 		return -ENOMEM;
@@ -186,6 +186,12 @@
 			goto out_delete;
 	}
 
+	if (!objdump_path) {
+		ret = perf_session_env__lookup_objdump(&session->header.env);
+		if (ret)
+			goto out_delete;
+	}
+
 	ret = perf_session__process_events(session, &ann->tool);
 	if (ret)
 		goto out_delete;
@@ -246,13 +252,14 @@
 			.sample	= process_sample_event,
 			.mmap	= perf_event__process_mmap,
 			.comm	= perf_event__process_comm,
-			.fork	= perf_event__process_task,
+			.exit	= perf_event__process_exit,
+			.fork	= perf_event__process_fork,
 			.ordered_samples = true,
 			.ordering_requires_timestamps = true,
 		},
 	};
 	const struct option options[] = {
-	OPT_STRING('i', "input", &annotate.input_name, "file",
+	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index d37e077..fae8b25 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -13,6 +13,7 @@
 #include "util/header.h"
 #include "util/parse-options.h"
 #include "util/strlist.h"
+#include "util/build-id.h"
 #include "util/symbol.h"
 
 static int build_id_cache__add_file(const char *filename, const char *debugdir)
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index a0e94ff..a82d99f 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -44,8 +44,7 @@
 	return fprintf(fp, "%s\n", sbuild_id);
 }
 
-static int perf_session__list_build_ids(const char *input_name,
-					bool force, bool with_hits)
+static int perf_session__list_build_ids(bool force, bool with_hits)
 {
 	struct perf_session *session;
 
@@ -81,7 +80,6 @@
 	bool show_kernel = false;
 	bool with_hits = false;
 	bool force = false;
-	const char *input_name = NULL;
 	const struct option options[] = {
 	OPT_BOOLEAN('H', "with-hits", &with_hits, "Show only DSOs with hits"),
 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
@@ -101,5 +99,5 @@
 	if (show_kernel)
 		return sysfs__fprintf_build_id(stdout);
 
-	return perf_session__list_build_ids(input_name, force, with_hits);
+	return perf_session__list_build_ids(force, with_hits);
 }
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index a0b531c..93b852f 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -24,6 +24,228 @@
 static char	  diff__default_sort_order[] = "dso,symbol";
 static bool  force;
 static bool show_displacement;
+static bool show_period;
+static bool show_formula;
+static bool show_baseline_only;
+static bool sort_compute;
+
+static s64 compute_wdiff_w1;
+static s64 compute_wdiff_w2;
+
+enum {
+	COMPUTE_DELTA,
+	COMPUTE_RATIO,
+	COMPUTE_WEIGHTED_DIFF,
+	COMPUTE_MAX,
+};
+
+const char *compute_names[COMPUTE_MAX] = {
+	[COMPUTE_DELTA] = "delta",
+	[COMPUTE_RATIO] = "ratio",
+	[COMPUTE_WEIGHTED_DIFF] = "wdiff",
+};
+
+static int compute;
+
+static int setup_compute_opt_wdiff(char *opt)
+{
+	char *w1_str = opt;
+	char *w2_str;
+
+	int ret = -EINVAL;
+
+	if (!opt)
+		goto out;
+
+	w2_str = strchr(opt, ',');
+	if (!w2_str)
+		goto out;
+
+	*w2_str++ = 0x0;
+	if (!*w2_str)
+		goto out;
+
+	compute_wdiff_w1 = strtol(w1_str, NULL, 10);
+	compute_wdiff_w2 = strtol(w2_str, NULL, 10);
+
+	if (!compute_wdiff_w1 || !compute_wdiff_w2)
+		goto out;
+
+	pr_debug("compute wdiff w1(%" PRId64 ") w2(%" PRId64 ")\n",
+		  compute_wdiff_w1, compute_wdiff_w2);
+
+	ret = 0;
+
+ out:
+	if (ret)
+		pr_err("Failed: wrong weight data, use 'wdiff:w1,w2'\n");
+
+	return ret;
+}
+
+static int setup_compute_opt(char *opt)
+{
+	if (compute == COMPUTE_WEIGHTED_DIFF)
+		return setup_compute_opt_wdiff(opt);
+
+	if (opt) {
+		pr_err("Failed: extra option specified '%s'", opt);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int setup_compute(const struct option *opt, const char *str,
+			 int unset __maybe_unused)
+{
+	int *cp = (int *) opt->value;
+	char *cstr = (char *) str;
+	char buf[50];
+	unsigned i;
+	char *option;
+
+	if (!str) {
+		*cp = COMPUTE_DELTA;
+		return 0;
+	}
+
+	if (*str == '+') {
+		sort_compute = true;
+		cstr = (char *) ++str;
+		if (!*str)
+			return 0;
+	}
+
+	option = strchr(str, ':');
+	if (option) {
+		unsigned len = option++ - str;
+
+		/*
+		 * The str data are not writeable, so we need
+		 * to use another buffer.
+		 */
+
+		/* No option value is longer. */
+		if (len >= sizeof(buf))
+			return -EINVAL;
+
+		strncpy(buf, str, len);
+		buf[len] = 0x0;
+		cstr = buf;
+	}
+
+	for (i = 0; i < COMPUTE_MAX; i++)
+		if (!strcmp(cstr, compute_names[i])) {
+			*cp = i;
+			return setup_compute_opt(option);
+		}
+
+	pr_err("Failed: '%s' is not computation method "
+	       "(use 'delta','ratio' or 'wdiff')\n", str);
+	return -EINVAL;
+}
+
+static double get_period_percent(struct hist_entry *he, u64 period)
+{
+	u64 total = he->hists->stats.total_period;
+	return (period * 100.0) / total;
+}
+
+double perf_diff__compute_delta(struct hist_entry *he)
+{
+	struct hist_entry *pair = hist_entry__next_pair(he);
+	double new_percent = get_period_percent(he, he->stat.period);
+	double old_percent = pair ? get_period_percent(pair, pair->stat.period) : 0.0;
+
+	he->diff.period_ratio_delta = new_percent - old_percent;
+	he->diff.computed = true;
+	return he->diff.period_ratio_delta;
+}
+
+double perf_diff__compute_ratio(struct hist_entry *he)
+{
+	struct hist_entry *pair = hist_entry__next_pair(he);
+	double new_period = he->stat.period;
+	double old_period = pair ? pair->stat.period : 0;
+
+	he->diff.computed = true;
+	he->diff.period_ratio = pair ? (new_period / old_period) : 0;
+	return he->diff.period_ratio;
+}
+
+s64 perf_diff__compute_wdiff(struct hist_entry *he)
+{
+	struct hist_entry *pair = hist_entry__next_pair(he);
+	u64 new_period = he->stat.period;
+	u64 old_period = pair ? pair->stat.period : 0;
+
+	he->diff.computed = true;
+
+	if (!pair)
+		he->diff.wdiff = 0;
+	else
+		he->diff.wdiff = new_period * compute_wdiff_w2 -
+				 old_period * compute_wdiff_w1;
+
+	return he->diff.wdiff;
+}
+
+static int formula_delta(struct hist_entry *he, char *buf, size_t size)
+{
+	struct hist_entry *pair = hist_entry__next_pair(he);
+
+	if (!pair)
+		return -1;
+
+	return scnprintf(buf, size,
+			 "(%" PRIu64 " * 100 / %" PRIu64 ") - "
+			 "(%" PRIu64 " * 100 / %" PRIu64 ")",
+			  he->stat.period, he->hists->stats.total_period,
+			  pair->stat.period, pair->hists->stats.total_period);
+}
+
+static int formula_ratio(struct hist_entry *he, char *buf, size_t size)
+{
+	struct hist_entry *pair = hist_entry__next_pair(he);
+	double new_period = he->stat.period;
+	double old_period = pair ? pair->stat.period : 0;
+
+	if (!pair)
+		return -1;
+
+	return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period);
+}
+
+static int formula_wdiff(struct hist_entry *he, char *buf, size_t size)
+{
+	struct hist_entry *pair = hist_entry__next_pair(he);
+	u64 new_period = he->stat.period;
+	u64 old_period = pair ? pair->stat.period : 0;
+
+	if (!pair)
+		return -1;
+
+	return scnprintf(buf, size,
+		  "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")",
+		  new_period, compute_wdiff_w2, old_period, compute_wdiff_w1);
+}
+
+int perf_diff__formula(char *buf, size_t size, struct hist_entry *he)
+{
+	switch (compute) {
+	case COMPUTE_DELTA:
+		return formula_delta(he, buf, size);
+	case COMPUTE_RATIO:
+		return formula_ratio(he, buf, size);
+	case COMPUTE_WEIGHTED_DIFF:
+		return formula_wdiff(he, buf, size);
+	default:
+		BUG_ON(1);
+	}
+
+	return -1;
+}
 
 static int hists__add_entry(struct hists *self,
 			    struct addr_location *al, u64 period)
@@ -47,7 +269,7 @@
 		return -1;
 	}
 
-	if (al.filtered || al.sym == NULL)
+	if (al.filtered)
 		return 0;
 
 	if (hists__add_entry(&evsel->hists, &al, sample->period)) {
@@ -63,8 +285,8 @@
 	.sample	= diff__process_sample_event,
 	.mmap	= perf_event__process_mmap,
 	.comm	= perf_event__process_comm,
-	.exit	= perf_event__process_task,
-	.fork	= perf_event__process_task,
+	.exit	= perf_event__process_exit,
+	.fork	= perf_event__process_fork,
 	.lost	= perf_event__process_lost,
 	.ordered_samples = true,
 	.ordering_requires_timestamps = true,
@@ -112,36 +334,6 @@
 		self->entries = tmp;
 }
 
-static struct hist_entry *hists__find_entry(struct hists *self,
-					    struct hist_entry *he)
-{
-	struct rb_node *n = self->entries.rb_node;
-
-	while (n) {
-		struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
-		int64_t cmp = hist_entry__cmp(he, iter);
-
-		if (cmp < 0)
-			n = n->rb_left;
-		else if (cmp > 0)
-			n = n->rb_right;
-		else
-			return iter;
-	}
-
-	return NULL;
-}
-
-static void hists__match(struct hists *older, struct hists *newer)
-{
-	struct rb_node *nd;
-
-	for (nd = rb_first(&newer->entries); nd; nd = rb_next(nd)) {
-		struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
-		pos->pair = hists__find_entry(older, pos);
-	}
-}
-
 static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
 				      struct perf_evlist *evlist)
 {
@@ -172,6 +364,144 @@
 	}
 }
 
+static void hists__baseline_only(struct hists *hists)
+{
+	struct rb_node *next = rb_first(&hists->entries);
+
+	while (next != NULL) {
+		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
+
+		next = rb_next(&he->rb_node);
+		if (!hist_entry__next_pair(he)) {
+			rb_erase(&he->rb_node, &hists->entries);
+			hist_entry__free(he);
+		}
+	}
+}
+
+static void hists__precompute(struct hists *hists)
+{
+	struct rb_node *next = rb_first(&hists->entries);
+
+	while (next != NULL) {
+		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
+
+		next = rb_next(&he->rb_node);
+
+		switch (compute) {
+		case COMPUTE_DELTA:
+			perf_diff__compute_delta(he);
+			break;
+		case COMPUTE_RATIO:
+			perf_diff__compute_ratio(he);
+			break;
+		case COMPUTE_WEIGHTED_DIFF:
+			perf_diff__compute_wdiff(he);
+			break;
+		default:
+			BUG_ON(1);
+		}
+	}
+}
+
+static int64_t cmp_doubles(double l, double r)
+{
+	if (l > r)
+		return -1;
+	else if (l < r)
+		return 1;
+	else
+		return 0;
+}
+
+static int64_t
+hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
+			int c)
+{
+	switch (c) {
+	case COMPUTE_DELTA:
+	{
+		double l = left->diff.period_ratio_delta;
+		double r = right->diff.period_ratio_delta;
+
+		return cmp_doubles(l, r);
+	}
+	case COMPUTE_RATIO:
+	{
+		double l = left->diff.period_ratio;
+		double r = right->diff.period_ratio;
+
+		return cmp_doubles(l, r);
+	}
+	case COMPUTE_WEIGHTED_DIFF:
+	{
+		s64 l = left->diff.wdiff;
+		s64 r = right->diff.wdiff;
+
+		return r - l;
+	}
+	default:
+		BUG_ON(1);
+	}
+
+	return 0;
+}
+
+static void insert_hist_entry_by_compute(struct rb_root *root,
+					 struct hist_entry *he,
+					 int c)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+		if (hist_entry__cmp_compute(he, iter, c) < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+}
+
+static void hists__compute_resort(struct hists *hists)
+{
+	struct rb_root tmp = RB_ROOT;
+	struct rb_node *next = rb_first(&hists->entries);
+
+	while (next != NULL) {
+		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
+
+		next = rb_next(&he->rb_node);
+
+		rb_erase(&he->rb_node, &hists->entries);
+		insert_hist_entry_by_compute(&tmp, he, compute);
+	}
+
+	hists->entries = tmp;
+}
+
+static void hists__process(struct hists *old, struct hists *new)
+{
+	hists__match(new, old);
+
+	if (show_baseline_only)
+		hists__baseline_only(new);
+	else
+		hists__link(new, old);
+
+	if (sort_compute) {
+		hists__precompute(new);
+		hists__compute_resort(new);
+	}
+
+	hists__fprintf(new, true, 0, 0, stdout);
+}
+
 static int __cmd_diff(void)
 {
 	int ret, i;
@@ -213,8 +543,7 @@
 
 		first = false;
 
-		hists__match(&evsel_old->hists, &evsel->hists);
-		hists__fprintf(&evsel->hists, true, 0, 0, stdout);
+		hists__process(&evsel_old->hists, &evsel->hists);
 	}
 
 out_delete:
@@ -235,6 +564,16 @@
 		    "be more verbose (show symbol address, etc)"),
 	OPT_BOOLEAN('M', "displacement", &show_displacement,
 		    "Show position displacement relative to baseline"),
+	OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
+		    "Show only items with match in baseline"),
+	OPT_CALLBACK('c', "compute", &compute,
+		     "delta,ratio,wdiff:w1,w2 (default delta)",
+		     "Entries differential computation selection",
+		     setup_compute),
+	OPT_BOOLEAN('p', "period", &show_period,
+		    "Show period values."),
+	OPT_BOOLEAN('F', "formula", &show_formula,
+		    "Show formula."),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -263,12 +602,36 @@
 	/* No overhead column. */
 	perf_hpp__column_enable(PERF_HPP__OVERHEAD, false);
 
-	/* Display baseline/delta/displacement columns. */
+	/*
+	 * Display baseline/delta/ratio/displacement/
+	 * formula/periods columns.
+	 */
 	perf_hpp__column_enable(PERF_HPP__BASELINE, true);
-	perf_hpp__column_enable(PERF_HPP__DELTA, true);
+
+	switch (compute) {
+	case COMPUTE_DELTA:
+		perf_hpp__column_enable(PERF_HPP__DELTA, true);
+		break;
+	case COMPUTE_RATIO:
+		perf_hpp__column_enable(PERF_HPP__RATIO, true);
+		break;
+	case COMPUTE_WEIGHTED_DIFF:
+		perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF, true);
+		break;
+	default:
+		BUG_ON(1);
+	};
 
 	if (show_displacement)
 		perf_hpp__column_enable(PERF_HPP__DISPL, true);
+
+	if (show_formula)
+		perf_hpp__column_enable(PERF_HPP__FORMULA, true);
+
+	if (show_period) {
+		perf_hpp__column_enable(PERF_HPP__PERIOD, true);
+		perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE, true);
+	}
 }
 
 int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 997afb8..c20f1dc 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -48,12 +48,12 @@
 
 #define if_print(field) __if_print(&first, #field, pos->attr.field)
 
-static int __cmd_evlist(const char *input_name, struct perf_attr_details *details)
+static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
 {
 	struct perf_session *session;
 	struct perf_evsel *pos;
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false, NULL);
+	session = perf_session__new(file_name, O_RDONLY, 0, false, NULL);
 	if (session == NULL)
 		return -ENOMEM;
 
@@ -111,7 +111,6 @@
 int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct perf_attr_details details = { .verbose = false, };
-	const char *input_name = NULL;
 	const struct option options[] = {
 	OPT_STRING('i', "input", &input_name, "file", "Input file name"),
 	OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"),
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 4688bea..84ad6ab 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -8,33 +8,53 @@
 #include "builtin.h"
 
 #include "perf.h"
+#include "util/color.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
 #include "util/session.h"
 #include "util/tool.h"
 #include "util/debug.h"
+#include "util/build-id.h"
 
 #include "util/parse-options.h"
 
+#include <linux/list.h>
+
 struct perf_inject {
 	struct perf_tool tool;
 	bool		 build_ids;
+	bool		 sched_stat;
+	const char	 *input_name;
+	int		 pipe_output,
+			 output;
+	u64		 bytes_written;
+	struct list_head samples;
 };
 
-static int perf_event__repipe_synth(struct perf_tool *tool __maybe_unused,
+struct event_entry {
+	struct list_head node;
+	u32		 tid;
+	union perf_event event[0];
+};
+
+static int perf_event__repipe_synth(struct perf_tool *tool,
 				    union perf_event *event,
 				    struct machine *machine __maybe_unused)
 {
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	uint32_t size;
 	void *buf = event;
 
 	size = event->header.size;
 
 	while (size) {
-		int ret = write(STDOUT_FILENO, buf, size);
+		int ret = write(inject->output, buf, size);
 		if (ret < 0)
 			return -errno;
 
 		size -= ret;
 		buf += ret;
+		inject->bytes_written += ret;
 	}
 
 	return 0;
@@ -80,12 +100,25 @@
 	return perf_event__repipe_synth(tool, event, machine);
 }
 
+typedef int (*inject_handler)(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct perf_evsel *evsel,
+			      struct machine *machine);
+
 static int perf_event__repipe_sample(struct perf_tool *tool,
 				     union perf_event *event,
-			      struct perf_sample *sample __maybe_unused,
-			      struct perf_evsel *evsel __maybe_unused,
-			      struct machine *machine)
+				     struct perf_sample *sample,
+				     struct perf_evsel *evsel,
+				     struct machine *machine)
 {
+	if (evsel->handler.func) {
+		inject_handler f = evsel->handler.func;
+		return f(tool, event, sample, evsel, machine);
+	}
+
+	build_id__mark_dso_hit(tool, event, sample, evsel, machine);
+
 	return perf_event__repipe_synth(tool, event, machine);
 }
 
@@ -102,14 +135,14 @@
 	return err;
 }
 
-static int perf_event__repipe_task(struct perf_tool *tool,
+static int perf_event__repipe_fork(struct perf_tool *tool,
 				   union perf_event *event,
 				   struct perf_sample *sample,
 				   struct machine *machine)
 {
 	int err;
 
-	err = perf_event__process_task(tool, event, sample, machine);
+	err = perf_event__process_fork(tool, event, sample, machine);
 	perf_event__repipe(tool, event, sample, machine);
 
 	return err;
@@ -210,6 +243,80 @@
 	return 0;
 }
 
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+					   union perf_event *event __maybe_unused,
+					   struct perf_sample *sample,
+					   struct perf_evsel *evsel __maybe_unused,
+					   struct machine *machine __maybe_unused)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	struct event_entry *ent;
+
+	list_for_each_entry(ent, &inject->samples, node) {
+		if (sample->tid == ent->tid) {
+			list_del_init(&ent->node);
+			free(ent);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct perf_evsel *evsel,
+				     struct machine *machine)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	struct event_entry *ent;
+
+	perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
+
+	ent = malloc(event->header.size + sizeof(struct event_entry));
+	if (ent == NULL) {
+		color_fprintf(stderr, PERF_COLOR_RED,
+			     "Not enough memory to process sched switch event!");
+		return -1;
+	}
+
+	ent->tid = sample->tid;
+	memcpy(&ent->event, event, event->header.size);
+	list_add(&ent->node, &inject->samples);
+	return 0;
+}
+
+static int perf_inject__sched_stat(struct perf_tool *tool,
+				   union perf_event *event __maybe_unused,
+				   struct perf_sample *sample,
+				   struct perf_evsel *evsel,
+				   struct machine *machine)
+{
+	struct event_entry *ent;
+	union perf_event *event_sw;
+	struct perf_sample sample_sw;
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	u32 pid = perf_evsel__intval(evsel, sample, "pid");
+
+	list_for_each_entry(ent, &inject->samples, node) {
+		if (pid == ent->tid)
+			goto found;
+	}
+
+	return 0;
+found:
+	event_sw = &ent->event[0];
+	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+	sample_sw.period = sample->period;
+	sample_sw.time	 = sample->time;
+	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+				      &sample_sw, false);
+	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
+
 extern volatile int session_done;
 
 static void sig_handler(int sig __maybe_unused)
@@ -217,6 +324,21 @@
 	session_done = 1;
 }
 
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+				   u64 sample_type, const char *sample_msg)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	const char *name = perf_evsel__name(evsel);
+
+	if (!(attr->sample_type & sample_type)) {
+		pr_err("Samples for %s event do not have %s attribute set.",
+			name, sample_msg);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int __cmd_inject(struct perf_inject *inject)
 {
 	struct perf_session *session;
@@ -224,19 +346,48 @@
 
 	signal(SIGINT, sig_handler);
 
-	if (inject->build_ids) {
-		inject->tool.sample	  = perf_event__inject_buildid;
+	if (inject->build_ids || inject->sched_stat) {
 		inject->tool.mmap	  = perf_event__repipe_mmap;
-		inject->tool.fork	  = perf_event__repipe_task;
+		inject->tool.fork	  = perf_event__repipe_fork;
 		inject->tool.tracing_data = perf_event__repipe_tracing_data;
 	}
 
-	session = perf_session__new("-", O_RDONLY, false, true, &inject->tool);
+	session = perf_session__new(inject->input_name, O_RDONLY, false, true, &inject->tool);
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (inject->build_ids) {
+		inject->tool.sample = perf_event__inject_buildid;
+	} else if (inject->sched_stat) {
+		struct perf_evsel *evsel;
+
+		inject->tool.ordered_samples = true;
+
+		list_for_each_entry(evsel, &session->evlist->entries, node) {
+			const char *name = perf_evsel__name(evsel);
+
+			if (!strcmp(name, "sched:sched_switch")) {
+				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+					return -EINVAL;
+
+				evsel->handler.func = perf_inject__sched_switch;
+			} else if (!strcmp(name, "sched:sched_process_exit"))
+				evsel->handler.func = perf_inject__sched_process_exit;
+			else if (!strncmp(name, "sched:sched_stat_", 17))
+				evsel->handler.func = perf_inject__sched_stat;
+		}
+	}
+
+	if (!inject->pipe_output)
+		lseek(inject->output, session->header.data_offset, SEEK_SET);
+
 	ret = perf_session__process_events(session, &inject->tool);
 
+	if (!inject->pipe_output) {
+		session->header.data_size = inject->bytes_written;
+		perf_session__write_header(session, session->evlist, inject->output, true);
+	}
+
 	perf_session__delete(session);
 
 	return ret;
@@ -260,10 +411,20 @@
 			.tracing_data	= perf_event__repipe_tracing_data_synth,
 			.build_id	= perf_event__repipe_op2_synth,
 		},
+		.input_name  = "-",
+		.samples = LIST_HEAD_INIT(inject.samples),
 	};
+	const char *output_name = "-";
 	const struct option options[] = {
 		OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
 			    "Inject build-ids into the output stream"),
+		OPT_STRING('i', "input", &inject.input_name, "file",
+			   "input file name"),
+		OPT_STRING('o', "output", &output_name, "file",
+			   "output file name"),
+		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
+			    "Merge sched-stat and sched-switch for getting events "
+			    "where and how long tasks slept"),
 		OPT_INCR('v', "verbose", &verbose,
 			 "be more verbose (show build ids, etc)"),
 		OPT_END()
@@ -281,6 +442,18 @@
 	if (argc)
 		usage_with_options(inject_usage, options);
 
+	if (!strcmp(output_name, "-")) {
+		inject.pipe_output = 1;
+		inject.output = STDOUT_FILENO;
+	} else {
+		inject.output = open(output_name, O_CREAT | O_WRONLY | O_TRUNC,
+						  S_IRUSR | S_IWUSR);
+		if (inject.output < 0) {
+			perror("failed to create output file");
+			return -1;
+		}
+	}
+
 	if (symbol__init() < 0)
 		return -1;
 
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 14bf82f..0b4b796 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -477,7 +477,7 @@
 	__sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
 }
 
-static int __cmd_kmem(const char *input_name)
+static int __cmd_kmem(void)
 {
 	int err = -EINVAL;
 	struct perf_session *session;
@@ -743,7 +743,6 @@
 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char * const default_sort_order = "frag,hit,bytes";
-	const char *input_name = NULL;
 	const struct option kmem_options[] = {
 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
 	OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
@@ -779,7 +778,7 @@
 		if (list_empty(&alloc_sort))
 			setup_sorting(&alloc_sort, default_sort_order);
 
-		return __cmd_kmem(input_name);
+		return __cmd_kmem();
 	} else
 		usage_with_options(kmem_usage, kmem_options);
 
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 283b439..ca3f80e 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -314,9 +314,9 @@
 
 static void init_kvm_event_record(struct perf_kvm_stat *kvm)
 {
-	int i;
+	unsigned int i;
 
-	for (i = 0; i < (int)EVENTS_CACHE_SIZE; i++)
+	for (i = 0; i < EVENTS_CACHE_SIZE; i++)
 		INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
 }
 
@@ -370,9 +370,10 @@
 	BUG_ON(key->key == INVALID_KEY);
 
 	head = &kvm->kvm_events_cache[kvm_events_hash_fn(key->key)];
-	list_for_each_entry(event, head, hash_entry)
+	list_for_each_entry(event, head, hash_entry) {
 		if (event->key.key == key->key && event->key.info == key->info)
 			return event;
+	}
 
 	event = kvm_alloc_init_event(key);
 	if (!event)
@@ -417,7 +418,10 @@
 static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
 			     u64 time_diff)
 {
-	kvm_update_event_stats(&event->total, time_diff);
+	if (vcpu_id == -1) {
+		kvm_update_event_stats(&event->total, time_diff);
+		return true;
+	}
 
 	if (!kvm_event_expand(event, vcpu_id))
 		return false;
@@ -433,6 +437,12 @@
 {
 	struct kvm_event *event;
 	u64 time_begin, time_diff;
+	int vcpu;
+
+	if (kvm->trace_vcpu == -1)
+		vcpu = -1;
+	else
+		vcpu = vcpu_record->vcpu_id;
 
 	event = vcpu_record->last_event;
 	time_begin = vcpu_record->start_time;
@@ -462,7 +472,7 @@
 	BUG_ON(timestamp < time_begin);
 
 	time_diff = timestamp - time_begin;
-	return update_kvm_event(event, vcpu_record->vcpu_id, time_diff);
+	return update_kvm_event(event, vcpu, time_diff);
 }
 
 static
@@ -499,6 +509,11 @@
 	if (!vcpu_record)
 		return true;
 
+	/* only process events for vcpus user cares about */
+	if ((kvm->trace_vcpu != -1) &&
+	    (kvm->trace_vcpu != vcpu_record->vcpu_id))
+		return true;
+
 	if (kvm->events_ops->is_begin_event(evsel, sample, &key))
 		return handle_begin_event(kvm, vcpu_record, &key, sample->time);
 
@@ -598,13 +613,15 @@
 	int vcpu = kvm->trace_vcpu;
 	struct kvm_event *event;
 
-	for (i = 0; i < EVENTS_CACHE_SIZE; i++)
-		list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry)
+	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
+		list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry) {
 			if (event_is_valid(event, vcpu)) {
 				update_total_count(kvm, event);
 				insert_to_result(&kvm->result, event,
 						 kvm->compare, vcpu);
 			}
+		}
+	}
 }
 
 /* returns left most element of result, and erase it */
@@ -661,8 +678,8 @@
 		pr_info("\n");
 	}
 
-	pr_info("\nTotal Samples:%lld, Total events handled time:%.2fus.\n\n",
-		(unsigned long long)kvm->total_count, kvm->total_time / 1e3);
+	pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
+		kvm->total_count, kvm->total_time / 1e3);
 }
 
 static int process_sample_event(struct perf_tool *tool,
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 6f5f328..4258300 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -335,8 +335,6 @@
 	return NULL;
 }
 
-static const char *input_name;
-
 struct trace_lock_handler {
 	int (*acquire_event)(struct perf_evsel *evsel,
 			     struct perf_sample *sample);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e9231659..f3151d3 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -31,6 +31,38 @@
 #include <sched.h>
 #include <sys/mman.h>
 
+#ifndef HAVE_ON_EXIT
+#ifndef ATEXIT_MAX
+#define ATEXIT_MAX 32
+#endif
+static int __on_exit_count = 0;
+typedef void (*on_exit_func_t) (int, void *);
+static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
+static void *__on_exit_args[ATEXIT_MAX];
+static int __exitcode = 0;
+static void __handle_on_exit_funcs(void);
+static int on_exit(on_exit_func_t function, void *arg);
+#define exit(x) (exit)(__exitcode = (x))
+
+static int on_exit(on_exit_func_t function, void *arg)
+{
+	if (__on_exit_count == ATEXIT_MAX)
+		return -ENOMEM;
+	else if (__on_exit_count == 0)
+		atexit(__handle_on_exit_funcs);
+	__on_exit_funcs[__on_exit_count] = function;
+	__on_exit_args[__on_exit_count++] = arg;
+	return 0;
+}
+
+static void __handle_on_exit_funcs(void)
+{
+	int i;
+	for (i = 0; i < __on_exit_count; i++)
+		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
+}
+#endif
+
 enum write_mode_t {
 	WRITE_FORCE,
 	WRITE_APPEND
@@ -198,11 +230,15 @@
 	struct perf_record_opts *opts = &rec->opts;
 	int rc = 0;
 
-	perf_evlist__config_attrs(evlist, opts);
-
+	/*
+	 * Set the evsel leader links before we configure attributes,
+	 * since some might depend on this info.
+	 */
 	if (opts->group)
 		perf_evlist__set_leader(evlist);
 
+	perf_evlist__config_attrs(evlist, opts);
+
 	list_for_each_entry(pos, &evlist->entries, node) {
 		struct perf_event_attr *attr = &pos->attr;
 		/*
@@ -285,6 +321,11 @@
 					  perf_evsel__name(pos));
 				rc = -err;
 				goto out;
+			} else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
+				ui__error("\'precise\' request may not be supported. "
+					  "Try removing 'p' modifier\n");
+				rc = -err;
+				goto out;
 			}
 
 			printf("\n");
@@ -326,7 +367,8 @@
 			       "or try again with a smaller value of -m/--mmap_pages.\n"
 			       "(current value: %d)\n", opts->mmap_pages);
 			rc = -errno;
-		} else if (!is_power_of_2(opts->mmap_pages)) {
+		} else if (!is_power_of_2(opts->mmap_pages) &&
+			   (opts->mmap_pages != UINT_MAX)) {
 			pr_err("--mmap_pages/-m value must be a power of two.");
 			rc = -EINVAL;
 		} else {
@@ -460,6 +502,7 @@
 	struct perf_evlist *evsel_list = rec->evlist;
 	const char *output_name = rec->output_name;
 	struct perf_session *session;
+	bool disabled = false;
 
 	rec->progname = argv[0];
 
@@ -659,7 +702,13 @@
 		}
 	}
 
-	perf_evlist__enable(evsel_list);
+	/*
+	 * When perf is starting the traced process, all the events
+	 * (apart from group members) have enable_on_exec=1 set,
+	 * so don't spoil it by prematurely enabling them.
+	 */
+	if (!perf_target__none(&opts->target))
+		perf_evlist__enable(evsel_list);
 
 	/*
 	 * Let the child rip
@@ -682,8 +731,15 @@
 			waking++;
 		}
 
-		if (done)
+		/*
+		 * When perf is starting the traced process, at the end events
+		 * die with the process and we wait for that. Thus no need to
+		 * disable events in this case.
+		 */
+		if (done && !disabled && !perf_target__none(&opts->target)) {
 			perf_evlist__disable(evsel_list);
+			disabled = true;
+		}
 	}
 
 	if (quiet || signr == SIGUSR1)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index a61725d..fc25100 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -33,13 +33,13 @@
 #include "util/thread.h"
 #include "util/sort.h"
 #include "util/hist.h"
+#include "arch/common.h"
 
 #include <linux/bitmap.h>
 
 struct perf_report {
 	struct perf_tool	tool;
 	struct perf_session	*session;
-	char const		*input_name;
 	bool			force, use_tui, use_gtk, use_stdio;
 	bool			hide_unresolved;
 	bool			dont_use_callchains;
@@ -428,10 +428,11 @@
 	if (use_browser > 0) {
 		if (use_browser == 1) {
 			perf_evlist__tui_browse_hists(session->evlist, help,
-						      NULL, NULL, 0);
+						      NULL,
+						      &session->header.env);
 		} else if (use_browser == 2) {
 			perf_evlist__gtk_browse_hists(session->evlist, help,
-						      NULL, NULL, 0);
+						      NULL);
 		}
 	} else
 		perf_evlist__tty_browse_hists(session->evlist, rep, help);
@@ -556,8 +557,8 @@
 			.sample		 = process_sample_event,
 			.mmap		 = perf_event__process_mmap,
 			.comm		 = perf_event__process_comm,
-			.exit		 = perf_event__process_task,
-			.fork		 = perf_event__process_task,
+			.exit		 = perf_event__process_exit,
+			.fork		 = perf_event__process_fork,
 			.lost		 = perf_event__process_lost,
 			.read		 = process_read_event,
 			.attr		 = perf_event__process_attr,
@@ -570,7 +571,7 @@
 		.pretty_printing_style	 = "normal",
 	};
 	const struct option options[] = {
-	OPT_STRING('i', "input", &report.input_name, "file",
+	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
@@ -656,13 +657,13 @@
 	if (report.inverted_callchain)
 		callchain_param.order = ORDER_CALLER;
 
-	if (!report.input_name || !strlen(report.input_name)) {
+	if (!input_name || !strlen(input_name)) {
 		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
-			report.input_name = "-";
+			input_name = "-";
 		else
-			report.input_name = "perf.data";
+			input_name = "perf.data";
 	}
-	session = perf_session__new(report.input_name, O_RDONLY,
+	session = perf_session__new(input_name, O_RDONLY,
 				    report.force, false, &report.tool);
 	if (session == NULL)
 		return -ENOMEM;
@@ -687,7 +688,7 @@
 
 	}
 
-	if (strcmp(report.input_name, "-") != 0)
+	if (strcmp(input_name, "-") != 0)
 		setup_browser(true);
 	else {
 		use_browser = 0;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 3488ead..cc28b85 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -120,7 +120,6 @@
 
 struct perf_sched {
 	struct perf_tool tool;
-	const char	 *input_name;
 	const char	 *sort_order;
 	unsigned long	 nr_tasks;
 	struct task_desc *pid_to_task[MAX_PID];
@@ -1460,7 +1459,7 @@
 	};
 	struct perf_session *session;
 
-	session = perf_session__new(sched->input_name, O_RDONLY, 0, false, &sched->tool);
+	session = perf_session__new(input_name, O_RDONLY, 0, false, &sched->tool);
 	if (session == NULL) {
 		pr_debug("No Memory for session\n");
 		return -1;
@@ -1672,7 +1671,8 @@
 			.sample		 = perf_sched__process_tracepoint_sample,
 			.comm		 = perf_event__process_comm,
 			.lost		 = perf_event__process_lost,
-			.fork		 = perf_event__process_task,
+			.exit		 = perf_event__process_exit,
+			.fork		 = perf_event__process_fork,
 			.ordered_samples = true,
 		},
 		.cmp_pid	      = LIST_HEAD_INIT(sched.cmp_pid),
@@ -1707,7 +1707,7 @@
 	OPT_END()
 	};
 	const struct option sched_options[] = {
-	OPT_STRING('i', "input", &sched.input_name, "file",
+	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index fb96250..b363e7b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -520,8 +520,8 @@
 	.sample		 = process_sample_event,
 	.mmap		 = perf_event__process_mmap,
 	.comm		 = perf_event__process_comm,
-	.exit		 = perf_event__process_task,
-	.fork		 = perf_event__process_task,
+	.exit		 = perf_event__process_exit,
+	.fork		 = perf_event__process_fork,
 	.attr		 = perf_event__process_attr,
 	.event_type	 = perf_event__process_event_type,
 	.tracing_data	 = perf_event__process_tracing_data,
@@ -1030,6 +1030,68 @@
 }
 
 /*
+ * Some scripts specify the required events in their "xxx-record" file,
+ * this function will check if the events in perf.data match those
+ * mentioned in the "xxx-record".
+ *
+ * Fixme: All existing "xxx-record" are all in good formats "-e event ",
+ * which is covered well now. And new parsing code should be added to
+ * cover the future complexing formats like event groups etc.
+ */
+static int check_ev_match(char *dir_name, char *scriptname,
+			struct perf_session *session)
+{
+	char filename[MAXPATHLEN], evname[128];
+	char line[BUFSIZ], *p;
+	struct perf_evsel *pos;
+	int match, len;
+	FILE *fp;
+
+	sprintf(filename, "%s/bin/%s-record", dir_name, scriptname);
+
+	fp = fopen(filename, "r");
+	if (!fp)
+		return -1;
+
+	while (fgets(line, sizeof(line), fp)) {
+		p = ltrim(line);
+		if (*p == '#')
+			continue;
+
+		while (strlen(p)) {
+			p = strstr(p, "-e");
+			if (!p)
+				break;
+
+			p += 2;
+			p = ltrim(p);
+			len = strcspn(p, " \t");
+			if (!len)
+				break;
+
+			snprintf(evname, len + 1, "%s", p);
+
+			match = 0;
+			list_for_each_entry(pos,
+					&session->evlist->entries, node) {
+				if (!strcmp(perf_evsel__name(pos), evname)) {
+					match = 1;
+					break;
+				}
+			}
+
+			if (!match) {
+				fclose(fp);
+				return -1;
+			}
+		}
+	}
+
+	fclose(fp);
+	return 0;
+}
+
+/*
  * Return -1 if none is found, otherwise the actual scripts number.
  *
  * Currently the only user of this function is the script browser, which
@@ -1039,17 +1101,23 @@
 int find_scripts(char **scripts_array, char **scripts_path_array)
 {
 	struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
-	char scripts_path[MAXPATHLEN];
+	char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
 	DIR *scripts_dir, *lang_dir;
-	char lang_path[MAXPATHLEN];
+	struct perf_session *session;
 	char *temp;
 	int i = 0;
 
+	session = perf_session__new(input_name, O_RDONLY, 0, false, NULL);
+	if (!session)
+		return -1;
+
 	snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
 
 	scripts_dir = opendir(scripts_path);
-	if (!scripts_dir)
+	if (!scripts_dir) {
+		perf_session__delete(session);
 		return -1;
+	}
 
 	for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
 		snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
@@ -1077,10 +1145,18 @@
 			snprintf(scripts_array[i],
 				(temp - script_dirent.d_name) + 1,
 				"%s", script_dirent.d_name);
+
+			if (check_ev_match(lang_path,
+					scripts_array[i], session))
+				continue;
+
 			i++;
 		}
+		closedir(lang_dir);
 	}
 
+	closedir(scripts_dir);
+	perf_session__delete(session);
 	return i;
 }
 
@@ -1175,7 +1251,6 @@
 int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	bool show_full_info = false;
-	const char *input_name = NULL;
 	char *rec_script_path = NULL;
 	char *rep_script_path = NULL;
 	struct perf_session *session;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 93b9011..c247fac 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -57,6 +57,7 @@
 #include "util/thread.h"
 #include "util/thread_map.h"
 
+#include <stdlib.h>
 #include <sys/prctl.h>
 #include <locale.h>
 
@@ -83,6 +84,9 @@
 static bool			csv_output			= false;
 static bool			group				= false;
 static FILE			*output				= NULL;
+static const char		*pre_cmd			= NULL;
+static const char		*post_cmd			= NULL;
+static bool			sync_run			= false;
 
 static volatile int done = 0;
 
@@ -125,8 +129,7 @@
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
 
-static int create_perf_stat_counter(struct perf_evsel *evsel,
-				    struct perf_evsel *first)
+static int create_perf_stat_counter(struct perf_evsel *evsel)
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	bool exclude_guest_missing = false;
@@ -149,7 +152,8 @@
 		return 0;
 	}
 
-	if (!perf_target__has_task(&target) && (!group || evsel == first)) {
+	if (!perf_target__has_task(&target) &&
+	    !perf_evsel__is_group_member(evsel)) {
 		attr->disabled = 1;
 		attr->enable_on_exec = 1;
 	}
@@ -265,10 +269,10 @@
 	return 0;
 }
 
-static int run_perf_stat(int argc __maybe_unused, const char **argv)
+static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 {
 	unsigned long long t0, t1;
-	struct perf_evsel *counter, *first;
+	struct perf_evsel *counter;
 	int status = 0;
 	int child_ready_pipe[2], go_pipe[2];
 	const bool forks = (argc > 0);
@@ -328,10 +332,8 @@
 	if (group)
 		perf_evlist__set_leader(evsel_list);
 
-	first = perf_evlist__first(evsel_list);
-
 	list_for_each_entry(counter, &evsel_list->entries, node) {
-		if (create_perf_stat_counter(counter, first) < 0) {
+		if (create_perf_stat_counter(counter) < 0) {
 			/*
 			 * PPC returns ENXIO for HW counters until 2.6.37
 			 * (behavior changed with commit b0a873e).
@@ -405,6 +407,32 @@
 	return WEXITSTATUS(status);
 }
 
+static int run_perf_stat(int argc __maybe_unused, const char **argv)
+{
+	int ret;
+
+	if (pre_cmd) {
+		ret = system(pre_cmd);
+		if (ret)
+			return ret;
+	}
+
+	if (sync_run)
+		sync();
+
+	ret = __run_perf_stat(argc, argv);
+	if (ret)
+		return ret;
+
+	if (post_cmd) {
+		ret = system(post_cmd);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
 static void print_noise_pct(double total, double avg)
 {
 	double pct = rel_stddev_stats(total, avg);
@@ -1069,8 +1097,7 @@
 
 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	bool append_file = false,
-	     sync_run = false;
+	bool append_file = false;
 	int output_fd = 0;
 	const char *output_name	= NULL;
 	const struct option options[] = {
@@ -1114,6 +1141,10 @@
 	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
 	OPT_INTEGER(0, "log-fd", &output_fd,
 		    "log output to fd, instead of stderr"),
+	OPT_STRING(0, "pre", &pre_cmd, "command",
+			"command to run prior to the measured command"),
+	OPT_STRING(0, "post", &post_cmd, "command",
+			"command to run after to the measured command"),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
@@ -1238,9 +1269,6 @@
 			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
 				run_idx + 1);
 
-		if (sync_run)
-			sync();
-
 		status = run_perf_stat(argc, argv);
 	}
 
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
deleted file mode 100644
index 5acd6e8..0000000
--- a/tools/perf/builtin-test.c
+++ /dev/null
@@ -1,1547 +0,0 @@
-/*
- * builtin-test.c
- *
- * Builtin regression testing command: ever growing number of sanity tests
- */
-#include "builtin.h"
-
-#include "util/cache.h"
-#include "util/debug.h"
-#include "util/debugfs.h"
-#include "util/evlist.h"
-#include "util/parse-options.h"
-#include "util/parse-events.h"
-#include "util/symbol.h"
-#include "util/thread_map.h"
-#include "util/pmu.h"
-#include "event-parse.h"
-#include <linux/hw_breakpoint.h>
-
-#include <sys/mman.h>
-
-static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
-					   struct symbol *sym)
-{
-	bool *visited = symbol__priv(sym);
-	*visited = true;
-	return 0;
-}
-
-static int test__vmlinux_matches_kallsyms(void)
-{
-	int err = -1;
-	struct rb_node *nd;
-	struct symbol *sym;
-	struct map *kallsyms_map, *vmlinux_map;
-	struct machine kallsyms, vmlinux;
-	enum map_type type = MAP__FUNCTION;
-	long page_size = sysconf(_SC_PAGE_SIZE);
-	struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", };
-
-	/*
-	 * Step 1:
-	 *
-	 * Init the machines that will hold kernel, modules obtained from
-	 * both vmlinux + .ko files and from /proc/kallsyms split by modules.
-	 */
-	machine__init(&kallsyms, "", HOST_KERNEL_ID);
-	machine__init(&vmlinux, "", HOST_KERNEL_ID);
-
-	/*
-	 * Step 2:
-	 *
-	 * Create the kernel maps for kallsyms and the DSO where we will then
-	 * load /proc/kallsyms. Also create the modules maps from /proc/modules
-	 * and find the .ko files that match them in /lib/modules/`uname -r`/.
-	 */
-	if (machine__create_kernel_maps(&kallsyms) < 0) {
-		pr_debug("machine__create_kernel_maps ");
-		return -1;
-	}
-
-	/*
-	 * Step 3:
-	 *
-	 * Load and split /proc/kallsyms into multiple maps, one per module.
-	 */
-	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, NULL) <= 0) {
-		pr_debug("dso__load_kallsyms ");
-		goto out;
-	}
-
-	/*
-	 * Step 4:
-	 *
-	 * kallsyms will be internally on demand sorted by name so that we can
-	 * find the reference relocation * symbol, i.e. the symbol we will use
-	 * to see if the running kernel was relocated by checking if it has the
-	 * same value in the vmlinux file we load.
-	 */
-	kallsyms_map = machine__kernel_map(&kallsyms, type);
-
-	sym = map__find_symbol_by_name(kallsyms_map, ref_reloc_sym.name, NULL);
-	if (sym == NULL) {
-		pr_debug("dso__find_symbol_by_name ");
-		goto out;
-	}
-
-	ref_reloc_sym.addr = sym->start;
-
-	/*
-	 * Step 5:
-	 *
-	 * Now repeat step 2, this time for the vmlinux file we'll auto-locate.
-	 */
-	if (machine__create_kernel_maps(&vmlinux) < 0) {
-		pr_debug("machine__create_kernel_maps ");
-		goto out;
-	}
-
-	vmlinux_map = machine__kernel_map(&vmlinux, type);
-	map__kmap(vmlinux_map)->ref_reloc_sym = &ref_reloc_sym;
-
-	/*
-	 * Step 6:
-	 *
-	 * Locate a vmlinux file in the vmlinux path that has a buildid that
-	 * matches the one of the running kernel.
-	 *
-	 * While doing that look if we find the ref reloc symbol, if we find it
-	 * we'll have its ref_reloc_symbol.unrelocated_addr and then
-	 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
-	 * to fixup the symbols.
-	 */
-	if (machine__load_vmlinux_path(&vmlinux, type,
-				       vmlinux_matches_kallsyms_filter) <= 0) {
-		pr_debug("machine__load_vmlinux_path ");
-		goto out;
-	}
-
-	err = 0;
-	/*
-	 * Step 7:
-	 *
-	 * Now look at the symbols in the vmlinux DSO and check if we find all of them
-	 * in the kallsyms dso. For the ones that are in both, check its names and
-	 * end addresses too.
-	 */
-	for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
-		struct symbol *pair, *first_pair;
-		bool backwards = true;
-
-		sym  = rb_entry(nd, struct symbol, rb_node);
-
-		if (sym->start == sym->end)
-			continue;
-
-		first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
-		pair = first_pair;
-
-		if (pair && pair->start == sym->start) {
-next_pair:
-			if (strcmp(sym->name, pair->name) == 0) {
-				/*
-				 * kallsyms don't have the symbol end, so we
-				 * set that by using the next symbol start - 1,
-				 * in some cases we get this up to a page
-				 * wrong, trace_kmalloc when I was developing
-				 * this code was one such example, 2106 bytes
-				 * off the real size. More than that and we
-				 * _really_ have a problem.
-				 */
-				s64 skew = sym->end - pair->end;
-				if (llabs(skew) < page_size)
-					continue;
-
-				pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
-					 sym->start, sym->name, sym->end, pair->end);
-			} else {
-				struct rb_node *nnd;
-detour:
-				nnd = backwards ? rb_prev(&pair->rb_node) :
-						  rb_next(&pair->rb_node);
-				if (nnd) {
-					struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
-
-					if (next->start == sym->start) {
-						pair = next;
-						goto next_pair;
-					}
-				}
-
-				if (backwards) {
-					backwards = false;
-					pair = first_pair;
-					goto detour;
-				}
-
-				pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
-					 sym->start, sym->name, pair->name);
-			}
-		} else
-			pr_debug("%#" PRIx64 ": %s not on kallsyms\n", sym->start, sym->name);
-
-		err = -1;
-	}
-
-	if (!verbose)
-		goto out;
-
-	pr_info("Maps only in vmlinux:\n");
-
-	for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
-		struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
-		/*
-		 * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
-		 * the kernel will have the path for the vmlinux file being used,
-		 * so use the short name, less descriptive but the same ("[kernel]" in
-		 * both cases.
-		 */
-		pair = map_groups__find_by_name(&kallsyms.kmaps, type,
-						(pos->dso->kernel ?
-							pos->dso->short_name :
-							pos->dso->name));
-		if (pair)
-			pair->priv = 1;
-		else
-			map__fprintf(pos, stderr);
-	}
-
-	pr_info("Maps in vmlinux with a different name in kallsyms:\n");
-
-	for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
-		struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
-
-		pair = map_groups__find(&kallsyms.kmaps, type, pos->start);
-		if (pair == NULL || pair->priv)
-			continue;
-
-		if (pair->start == pos->start) {
-			pair->priv = 1;
-			pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
-				pos->start, pos->end, pos->pgoff, pos->dso->name);
-			if (pos->pgoff != pair->pgoff || pos->end != pair->end)
-				pr_info(": \n*%" PRIx64 "-%" PRIx64 " %" PRIx64 "",
-					pair->start, pair->end, pair->pgoff);
-			pr_info(" %s\n", pair->dso->name);
-			pair->priv = 1;
-		}
-	}
-
-	pr_info("Maps only in kallsyms:\n");
-
-	for (nd = rb_first(&kallsyms.kmaps.maps[type]);
-	     nd; nd = rb_next(nd)) {
-		struct map *pos = rb_entry(nd, struct map, rb_node);
-
-		if (!pos->priv)
-			map__fprintf(pos, stderr);
-	}
-out:
-	return err;
-}
-
-#include "util/cpumap.h"
-#include "util/evsel.h"
-#include <sys/types.h>
-
-static int trace_event__id(const char *evname)
-{
-	char *filename;
-	int err = -1, fd;
-
-	if (asprintf(&filename,
-		     "%s/syscalls/%s/id",
-		     tracing_events_path, evname) < 0)
-		return -1;
-
-	fd = open(filename, O_RDONLY);
-	if (fd >= 0) {
-		char id[16];
-		if (read(fd, id, sizeof(id)) > 0)
-			err = atoi(id);
-		close(fd);
-	}
-
-	free(filename);
-	return err;
-}
-
-static int test__open_syscall_event(void)
-{
-	int err = -1, fd;
-	struct thread_map *threads;
-	struct perf_evsel *evsel;
-	struct perf_event_attr attr;
-	unsigned int nr_open_calls = 111, i;
-	int id = trace_event__id("sys_enter_open");
-
-	if (id < 0) {
-		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
-		return -1;
-	}
-
-	threads = thread_map__new(-1, getpid(), UINT_MAX);
-	if (threads == NULL) {
-		pr_debug("thread_map__new\n");
-		return -1;
-	}
-
-	memset(&attr, 0, sizeof(attr));
-	attr.type = PERF_TYPE_TRACEPOINT;
-	attr.config = id;
-	evsel = perf_evsel__new(&attr, 0);
-	if (evsel == NULL) {
-		pr_debug("perf_evsel__new\n");
-		goto out_thread_map_delete;
-	}
-
-	if (perf_evsel__open_per_thread(evsel, threads) < 0) {
-		pr_debug("failed to open counter: %s, "
-			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
-			 strerror(errno));
-		goto out_evsel_delete;
-	}
-
-	for (i = 0; i < nr_open_calls; ++i) {
-		fd = open("/etc/passwd", O_RDONLY);
-		close(fd);
-	}
-
-	if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
-		pr_debug("perf_evsel__read_on_cpu\n");
-		goto out_close_fd;
-	}
-
-	if (evsel->counts->cpu[0].val != nr_open_calls) {
-		pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
-			 nr_open_calls, evsel->counts->cpu[0].val);
-		goto out_close_fd;
-	}
-	
-	err = 0;
-out_close_fd:
-	perf_evsel__close_fd(evsel, 1, threads->nr);
-out_evsel_delete:
-	perf_evsel__delete(evsel);
-out_thread_map_delete:
-	thread_map__delete(threads);
-	return err;
-}
-
-#include <sched.h>
-
-static int test__open_syscall_event_on_all_cpus(void)
-{
-	int err = -1, fd, cpu;
-	struct thread_map *threads;
-	struct cpu_map *cpus;
-	struct perf_evsel *evsel;
-	struct perf_event_attr attr;
-	unsigned int nr_open_calls = 111, i;
-	cpu_set_t cpu_set;
-	int id = trace_event__id("sys_enter_open");
-
-	if (id < 0) {
-		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
-		return -1;
-	}
-
-	threads = thread_map__new(-1, getpid(), UINT_MAX);
-	if (threads == NULL) {
-		pr_debug("thread_map__new\n");
-		return -1;
-	}
-
-	cpus = cpu_map__new(NULL);
-	if (cpus == NULL) {
-		pr_debug("cpu_map__new\n");
-		goto out_thread_map_delete;
-	}
-
-
-	CPU_ZERO(&cpu_set);
-
-	memset(&attr, 0, sizeof(attr));
-	attr.type = PERF_TYPE_TRACEPOINT;
-	attr.config = id;
-	evsel = perf_evsel__new(&attr, 0);
-	if (evsel == NULL) {
-		pr_debug("perf_evsel__new\n");
-		goto out_thread_map_delete;
-	}
-
-	if (perf_evsel__open(evsel, cpus, threads) < 0) {
-		pr_debug("failed to open counter: %s, "
-			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
-			 strerror(errno));
-		goto out_evsel_delete;
-	}
-
-	for (cpu = 0; cpu < cpus->nr; ++cpu) {
-		unsigned int ncalls = nr_open_calls + cpu;
-		/*
-		 * XXX eventually lift this restriction in a way that
-		 * keeps perf building on older glibc installations
-		 * without CPU_ALLOC. 1024 cpus in 2010 still seems
-		 * a reasonable upper limit tho :-)
-		 */
-		if (cpus->map[cpu] >= CPU_SETSIZE) {
-			pr_debug("Ignoring CPU %d\n", cpus->map[cpu]);
-			continue;
-		}
-
-		CPU_SET(cpus->map[cpu], &cpu_set);
-		if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
-			pr_debug("sched_setaffinity() failed on CPU %d: %s ",
-				 cpus->map[cpu],
-				 strerror(errno));
-			goto out_close_fd;
-		}
-		for (i = 0; i < ncalls; ++i) {
-			fd = open("/etc/passwd", O_RDONLY);
-			close(fd);
-		}
-		CPU_CLR(cpus->map[cpu], &cpu_set);
-	}
-
-	/*
-	 * Here we need to explicitely preallocate the counts, as if
-	 * we use the auto allocation it will allocate just for 1 cpu,
-	 * as we start by cpu 0.
-	 */
-	if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) {
-		pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
-		goto out_close_fd;
-	}
-
-	err = 0;
-
-	for (cpu = 0; cpu < cpus->nr; ++cpu) {
-		unsigned int expected;
-
-		if (cpus->map[cpu] >= CPU_SETSIZE)
-			continue;
-
-		if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
-			pr_debug("perf_evsel__read_on_cpu\n");
-			err = -1;
-			break;
-		}
-
-		expected = nr_open_calls + cpu;
-		if (evsel->counts->cpu[cpu].val != expected) {
-			pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
-				 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
-			err = -1;
-		}
-	}
-
-out_close_fd:
-	perf_evsel__close_fd(evsel, 1, threads->nr);
-out_evsel_delete:
-	perf_evsel__delete(evsel);
-out_thread_map_delete:
-	thread_map__delete(threads);
-	return err;
-}
-
-/*
- * This test will generate random numbers of calls to some getpid syscalls,
- * then establish an mmap for a group of events that are created to monitor
- * the syscalls.
- *
- * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated
- * sample.id field to map back to its respective perf_evsel instance.
- *
- * Then it checks if the number of syscalls reported as perf events by
- * the kernel corresponds to the number of syscalls made.
- */
-static int test__basic_mmap(void)
-{
-	int err = -1;
-	union perf_event *event;
-	struct thread_map *threads;
-	struct cpu_map *cpus;
-	struct perf_evlist *evlist;
-	struct perf_event_attr attr = {
-		.type		= PERF_TYPE_TRACEPOINT,
-		.read_format	= PERF_FORMAT_ID,
-		.sample_type	= PERF_SAMPLE_ID,
-		.watermark	= 0,
-	};
-	cpu_set_t cpu_set;
-	const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
-					"getpgid", };
-	pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
-				      (void*)getpgid };
-#define nsyscalls ARRAY_SIZE(syscall_names)
-	int ids[nsyscalls];
-	unsigned int nr_events[nsyscalls],
-		     expected_nr_events[nsyscalls], i, j;
-	struct perf_evsel *evsels[nsyscalls], *evsel;
-
-	for (i = 0; i < nsyscalls; ++i) {
-		char name[64];
-
-		snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
-		ids[i] = trace_event__id(name);
-		if (ids[i] < 0) {
-			pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
-			return -1;
-		}
-		nr_events[i] = 0;
-		expected_nr_events[i] = random() % 257;
-	}
-
-	threads = thread_map__new(-1, getpid(), UINT_MAX);
-	if (threads == NULL) {
-		pr_debug("thread_map__new\n");
-		return -1;
-	}
-
-	cpus = cpu_map__new(NULL);
-	if (cpus == NULL) {
-		pr_debug("cpu_map__new\n");
-		goto out_free_threads;
-	}
-
-	CPU_ZERO(&cpu_set);
-	CPU_SET(cpus->map[0], &cpu_set);
-	sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
-	if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
-		pr_debug("sched_setaffinity() failed on CPU %d: %s ",
-			 cpus->map[0], strerror(errno));
-		goto out_free_cpus;
-	}
-
-	evlist = perf_evlist__new(cpus, threads);
-	if (evlist == NULL) {
-		pr_debug("perf_evlist__new\n");
-		goto out_free_cpus;
-	}
-
-	/* anonymous union fields, can't be initialized above */
-	attr.wakeup_events = 1;
-	attr.sample_period = 1;
-
-	for (i = 0; i < nsyscalls; ++i) {
-		attr.config = ids[i];
-		evsels[i] = perf_evsel__new(&attr, i);
-		if (evsels[i] == NULL) {
-			pr_debug("perf_evsel__new\n");
-			goto out_free_evlist;
-		}
-
-		perf_evlist__add(evlist, evsels[i]);
-
-		if (perf_evsel__open(evsels[i], cpus, threads) < 0) {
-			pr_debug("failed to open counter: %s, "
-				 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
-				 strerror(errno));
-			goto out_close_fd;
-		}
-	}
-
-	if (perf_evlist__mmap(evlist, 128, true) < 0) {
-		pr_debug("failed to mmap events: %d (%s)\n", errno,
-			 strerror(errno));
-		goto out_close_fd;
-	}
-
-	for (i = 0; i < nsyscalls; ++i)
-		for (j = 0; j < expected_nr_events[i]; ++j) {
-			int foo = syscalls[i]();
-			++foo;
-		}
-
-	while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) {
-		struct perf_sample sample;
-
-		if (event->header.type != PERF_RECORD_SAMPLE) {
-			pr_debug("unexpected %s event\n",
-				 perf_event__name(event->header.type));
-			goto out_munmap;
-		}
-
-		err = perf_evlist__parse_sample(evlist, event, &sample);
-		if (err) {
-			pr_err("Can't parse sample, err = %d\n", err);
-			goto out_munmap;
-		}
-
-		evsel = perf_evlist__id2evsel(evlist, sample.id);
-		if (evsel == NULL) {
-			pr_debug("event with id %" PRIu64
-				 " doesn't map to an evsel\n", sample.id);
-			goto out_munmap;
-		}
-		nr_events[evsel->idx]++;
-	}
-
-	list_for_each_entry(evsel, &evlist->entries, node) {
-		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
-			pr_debug("expected %d %s events, got %d\n",
-				 expected_nr_events[evsel->idx],
-				 perf_evsel__name(evsel), nr_events[evsel->idx]);
-			goto out_munmap;
-		}
-	}
-
-	err = 0;
-out_munmap:
-	perf_evlist__munmap(evlist);
-out_close_fd:
-	for (i = 0; i < nsyscalls; ++i)
-		perf_evsel__close_fd(evsels[i], 1, threads->nr);
-out_free_evlist:
-	perf_evlist__delete(evlist);
-out_free_cpus:
-	cpu_map__delete(cpus);
-out_free_threads:
-	thread_map__delete(threads);
-	return err;
-#undef nsyscalls
-}
-
-static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t **maskp,
-					 size_t *sizep)
-{
-	cpu_set_t *mask;
-	size_t size;
-	int i, cpu = -1, nrcpus = 1024;
-realloc:
-	mask = CPU_ALLOC(nrcpus);
-	size = CPU_ALLOC_SIZE(nrcpus);
-	CPU_ZERO_S(size, mask);
-
-	if (sched_getaffinity(pid, size, mask) == -1) {
-		CPU_FREE(mask);
-		if (errno == EINVAL && nrcpus < (1024 << 8)) {
-			nrcpus = nrcpus << 2;
-			goto realloc;
-		}
-		perror("sched_getaffinity");
-			return -1;
-	}
-
-	for (i = 0; i < nrcpus; i++) {
-		if (CPU_ISSET_S(i, size, mask)) {
-			if (cpu == -1) {
-				cpu = i;
-				*maskp = mask;
-				*sizep = size;
-			} else
-				CPU_CLR_S(i, size, mask);
-		}
-	}
-
-	if (cpu == -1)
-		CPU_FREE(mask);
-
-	return cpu;
-}
-
-static int test__PERF_RECORD(void)
-{
-	struct perf_record_opts opts = {
-		.target = {
-			.uid = UINT_MAX,
-			.uses_mmap = true,
-		},
-		.no_delay   = true,
-		.freq	    = 10,
-		.mmap_pages = 256,
-	};
-	cpu_set_t *cpu_mask = NULL;
-	size_t cpu_mask_size = 0;
-	struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
-	struct perf_evsel *evsel;
-	struct perf_sample sample;
-	const char *cmd = "sleep";
-	const char *argv[] = { cmd, "1", NULL, };
-	char *bname;
-	u64 prev_time = 0;
-	bool found_cmd_mmap = false,
-	     found_libc_mmap = false,
-	     found_vdso_mmap = false,
-	     found_ld_mmap = false;
-	int err = -1, errs = 0, i, wakeups = 0;
-	u32 cpu;
-	int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
-
-	if (evlist == NULL || argv == NULL) {
-		pr_debug("Not enough memory to create evlist\n");
-		goto out;
-	}
-
-	/*
-	 * We need at least one evsel in the evlist, use the default
-	 * one: "cycles".
-	 */
-	err = perf_evlist__add_default(evlist);
-	if (err < 0) {
-		pr_debug("Not enough memory to create evsel\n");
-		goto out_delete_evlist;
-	}
-
-	/*
-	 * Create maps of threads and cpus to monitor. In this case
-	 * we start with all threads and cpus (-1, -1) but then in
-	 * perf_evlist__prepare_workload we'll fill in the only thread
-	 * we're monitoring, the one forked there.
-	 */
-	err = perf_evlist__create_maps(evlist, &opts.target);
-	if (err < 0) {
-		pr_debug("Not enough memory to create thread/cpu maps\n");
-		goto out_delete_evlist;
-	}
-
-	/*
-	 * Prepare the workload in argv[] to run, it'll fork it, and then wait
-	 * for perf_evlist__start_workload() to exec it. This is done this way
-	 * so that we have time to open the evlist (calling sys_perf_event_open
-	 * on all the fds) and then mmap them.
-	 */
-	err = perf_evlist__prepare_workload(evlist, &opts, argv);
-	if (err < 0) {
-		pr_debug("Couldn't run the workload!\n");
-		goto out_delete_evlist;
-	}
-
-	/*
-	 * Config the evsels, setting attr->comm on the first one, etc.
-	 */
-	evsel = perf_evlist__first(evlist);
-	evsel->attr.sample_type |= PERF_SAMPLE_CPU;
-	evsel->attr.sample_type |= PERF_SAMPLE_TID;
-	evsel->attr.sample_type |= PERF_SAMPLE_TIME;
-	perf_evlist__config_attrs(evlist, &opts);
-
-	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask,
-					    &cpu_mask_size);
-	if (err < 0) {
-		pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno));
-		goto out_delete_evlist;
-	}
-
-	cpu = err;
-
-	/*
-	 * So that we can check perf_sample.cpu on all the samples.
-	 */
-	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
-		pr_debug("sched_setaffinity: %s\n", strerror(errno));
-		goto out_free_cpu_mask;
-	}
-
-	/*
-	 * Call sys_perf_event_open on all the fds on all the evsels,
-	 * grouping them if asked to.
-	 */
-	err = perf_evlist__open(evlist);
-	if (err < 0) {
-		pr_debug("perf_evlist__open: %s\n", strerror(errno));
-		goto out_delete_evlist;
-	}
-
-	/*
-	 * mmap the first fd on a given CPU and ask for events for the other
-	 * fds in the same CPU to be injected in the same mmap ring buffer
-	 * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
-	 */
-	err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
-	if (err < 0) {
-		pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
-		goto out_delete_evlist;
-	}
-
-	/*
-	 * Now that all is properly set up, enable the events, they will
-	 * count just on workload.pid, which will start...
-	 */
-	perf_evlist__enable(evlist);
-
-	/*
-	 * Now!
-	 */
-	perf_evlist__start_workload(evlist);
-
-	while (1) {
-		int before = total_events;
-
-		for (i = 0; i < evlist->nr_mmaps; i++) {
-			union perf_event *event;
-
-			while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
-				const u32 type = event->header.type;
-				const char *name = perf_event__name(type);
-
-				++total_events;
-				if (type < PERF_RECORD_MAX)
-					nr_events[type]++;
-
-				err = perf_evlist__parse_sample(evlist, event, &sample);
-				if (err < 0) {
-					if (verbose)
-						perf_event__fprintf(event, stderr);
-					pr_debug("Couldn't parse sample\n");
-					goto out_err;
-				}
-
-				if (verbose) {
-					pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
-					perf_event__fprintf(event, stderr);
-				}
-
-				if (prev_time > sample.time) {
-					pr_debug("%s going backwards in time, prev=%" PRIu64 ", curr=%" PRIu64 "\n",
-						 name, prev_time, sample.time);
-					++errs;
-				}
-
-				prev_time = sample.time;
-
-				if (sample.cpu != cpu) {
-					pr_debug("%s with unexpected cpu, expected %d, got %d\n",
-						 name, cpu, sample.cpu);
-					++errs;
-				}
-
-				if ((pid_t)sample.pid != evlist->workload.pid) {
-					pr_debug("%s with unexpected pid, expected %d, got %d\n",
-						 name, evlist->workload.pid, sample.pid);
-					++errs;
-				}
-
-				if ((pid_t)sample.tid != evlist->workload.pid) {
-					pr_debug("%s with unexpected tid, expected %d, got %d\n",
-						 name, evlist->workload.pid, sample.tid);
-					++errs;
-				}
-
-				if ((type == PERF_RECORD_COMM ||
-				     type == PERF_RECORD_MMAP ||
-				     type == PERF_RECORD_FORK ||
-				     type == PERF_RECORD_EXIT) &&
-				     (pid_t)event->comm.pid != evlist->workload.pid) {
-					pr_debug("%s with unexpected pid/tid\n", name);
-					++errs;
-				}
-
-				if ((type == PERF_RECORD_COMM ||
-				     type == PERF_RECORD_MMAP) &&
-				     event->comm.pid != event->comm.tid) {
-					pr_debug("%s with different pid/tid!\n", name);
-					++errs;
-				}
-
-				switch (type) {
-				case PERF_RECORD_COMM:
-					if (strcmp(event->comm.comm, cmd)) {
-						pr_debug("%s with unexpected comm!\n", name);
-						++errs;
-					}
-					break;
-				case PERF_RECORD_EXIT:
-					goto found_exit;
-				case PERF_RECORD_MMAP:
-					bname = strrchr(event->mmap.filename, '/');
-					if (bname != NULL) {
-						if (!found_cmd_mmap)
-							found_cmd_mmap = !strcmp(bname + 1, cmd);
-						if (!found_libc_mmap)
-							found_libc_mmap = !strncmp(bname + 1, "libc", 4);
-						if (!found_ld_mmap)
-							found_ld_mmap = !strncmp(bname + 1, "ld", 2);
-					} else if (!found_vdso_mmap)
-						found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]");
-					break;
-
-				case PERF_RECORD_SAMPLE:
-					/* Just ignore samples for now */
-					break;
-				default:
-					pr_debug("Unexpected perf_event->header.type %d!\n",
-						 type);
-					++errs;
-				}
-			}
-		}
-
-		/*
-		 * We don't use poll here because at least at 3.1 times the
-		 * PERF_RECORD_{!SAMPLE} events don't honour
-		 * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
-		 */
-		if (total_events == before && false)
-			poll(evlist->pollfd, evlist->nr_fds, -1);
-
-		sleep(1);
-		if (++wakeups > 5) {
-			pr_debug("No PERF_RECORD_EXIT event!\n");
-			break;
-		}
-	}
-
-found_exit:
-	if (nr_events[PERF_RECORD_COMM] > 1) {
-		pr_debug("Excessive number of PERF_RECORD_COMM events!\n");
-		++errs;
-	}
-
-	if (nr_events[PERF_RECORD_COMM] == 0) {
-		pr_debug("Missing PERF_RECORD_COMM for %s!\n", cmd);
-		++errs;
-	}
-
-	if (!found_cmd_mmap) {
-		pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd);
-		++errs;
-	}
-
-	if (!found_libc_mmap) {
-		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "libc");
-		++errs;
-	}
-
-	if (!found_ld_mmap) {
-		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "ld");
-		++errs;
-	}
-
-	if (!found_vdso_mmap) {
-		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "[vdso]");
-		++errs;
-	}
-out_err:
-	perf_evlist__munmap(evlist);
-out_free_cpu_mask:
-	CPU_FREE(cpu_mask);
-out_delete_evlist:
-	perf_evlist__delete(evlist);
-out:
-	return (err < 0 || errs > 0) ? -1 : 0;
-}
-
-
-#if defined(__x86_64__) || defined(__i386__)
-
-#define barrier() asm volatile("" ::: "memory")
-
-static u64 rdpmc(unsigned int counter)
-{
-	unsigned int low, high;
-
-	asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
-
-	return low | ((u64)high) << 32;
-}
-
-static u64 rdtsc(void)
-{
-	unsigned int low, high;
-
-	asm volatile("rdtsc" : "=a" (low), "=d" (high));
-
-	return low | ((u64)high) << 32;
-}
-
-static u64 mmap_read_self(void *addr)
-{
-	struct perf_event_mmap_page *pc = addr;
-	u32 seq, idx, time_mult = 0, time_shift = 0;
-	u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
-
-	do {
-		seq = pc->lock;
-		barrier();
-
-		enabled = pc->time_enabled;
-		running = pc->time_running;
-
-		if (enabled != running) {
-			cyc = rdtsc();
-			time_mult = pc->time_mult;
-			time_shift = pc->time_shift;
-			time_offset = pc->time_offset;
-		}
-
-		idx = pc->index;
-		count = pc->offset;
-		if (idx)
-			count += rdpmc(idx - 1);
-
-		barrier();
-	} while (pc->lock != seq);
-
-	if (enabled != running) {
-		u64 quot, rem;
-
-		quot = (cyc >> time_shift);
-		rem = cyc & ((1 << time_shift) - 1);
-		delta = time_offset + quot * time_mult +
-			((rem * time_mult) >> time_shift);
-
-		enabled += delta;
-		if (idx)
-			running += delta;
-
-		quot = count / running;
-		rem = count % running;
-		count = quot * enabled + (rem * enabled) / running;
-	}
-
-	return count;
-}
-
-/*
- * If the RDPMC instruction faults then signal this back to the test parent task:
- */
-static void segfault_handler(int sig __maybe_unused,
-			     siginfo_t *info __maybe_unused,
-			     void *uc __maybe_unused)
-{
-	exit(-1);
-}
-
-static int __test__rdpmc(void)
-{
-	long page_size = sysconf(_SC_PAGE_SIZE);
-	volatile int tmp = 0;
-	u64 i, loops = 1000;
-	int n;
-	int fd;
-	void *addr;
-	struct perf_event_attr attr = {
-		.type = PERF_TYPE_HARDWARE,
-		.config = PERF_COUNT_HW_INSTRUCTIONS,
-		.exclude_kernel = 1,
-	};
-	u64 delta_sum = 0;
-        struct sigaction sa;
-
-	sigfillset(&sa.sa_mask);
-	sa.sa_sigaction = segfault_handler;
-	sigaction(SIGSEGV, &sa, NULL);
-
-	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
-	if (fd < 0) {
-		pr_err("Error: sys_perf_event_open() syscall returned "
-		       "with %d (%s)\n", fd, strerror(errno));
-		return -1;
-	}
-
-	addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
-	if (addr == (void *)(-1)) {
-		pr_err("Error: mmap() syscall returned with (%s)\n",
-		       strerror(errno));
-		goto out_close;
-	}
-
-	for (n = 0; n < 6; n++) {
-		u64 stamp, now, delta;
-
-		stamp = mmap_read_self(addr);
-
-		for (i = 0; i < loops; i++)
-			tmp++;
-
-		now = mmap_read_self(addr);
-		loops *= 10;
-
-		delta = now - stamp;
-		pr_debug("%14d: %14Lu\n", n, (long long)delta);
-
-		delta_sum += delta;
-	}
-
-	munmap(addr, page_size);
-	pr_debug("   ");
-out_close:
-	close(fd);
-
-	if (!delta_sum)
-		return -1;
-
-	return 0;
-}
-
-static int test__rdpmc(void)
-{
-	int status = 0;
-	int wret = 0;
-	int ret;
-	int pid;
-
-	pid = fork();
-	if (pid < 0)
-		return -1;
-
-	if (!pid) {
-		ret = __test__rdpmc();
-
-		exit(ret);
-	}
-
-	wret = waitpid(pid, &status, 0);
-	if (wret < 0 || status)
-		return -1;
-
-	return 0;
-}
-
-#endif
-
-static int test__perf_pmu(void)
-{
-	return perf_pmu__test();
-}
-
-static int perf_evsel__roundtrip_cache_name_test(void)
-{
-	char name[128];
-	int type, op, err = 0, ret = 0, i, idx;
-	struct perf_evsel *evsel;
-        struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
-
-        if (evlist == NULL)
-                return -ENOMEM;
-
-	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
-		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
-			/* skip invalid cache type */
-			if (!perf_evsel__is_cache_op_valid(type, op))
-				continue;
-
-			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
-				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
-									name, sizeof(name));
-				err = parse_events(evlist, name, 0);
-				if (err)
-					ret = err;
-			}
-		}
-	}
-
-	idx = 0;
-	evsel = perf_evlist__first(evlist);
-
-	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
-		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
-			/* skip invalid cache type */
-			if (!perf_evsel__is_cache_op_valid(type, op))
-				continue;
-
-			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
-				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
-									name, sizeof(name));
-				if (evsel->idx != idx)
-					continue;
-
-				++idx;
-
-				if (strcmp(perf_evsel__name(evsel), name)) {
-					pr_debug("%s != %s\n", perf_evsel__name(evsel), name);
-					ret = -1;
-				}
-
-				evsel = perf_evsel__next(evsel);
-			}
-		}
-	}
-
-	perf_evlist__delete(evlist);
-	return ret;
-}
-
-static int __perf_evsel__name_array_test(const char *names[], int nr_names)
-{
-	int i, err;
-	struct perf_evsel *evsel;
-        struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
-
-        if (evlist == NULL)
-                return -ENOMEM;
-
-	for (i = 0; i < nr_names; ++i) {
-		err = parse_events(evlist, names[i], 0);
-		if (err) {
-			pr_debug("failed to parse event '%s', err %d\n",
-				 names[i], err);
-			goto out_delete_evlist;
-		}
-	}
-
-	err = 0;
-	list_for_each_entry(evsel, &evlist->entries, node) {
-		if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
-			--err;
-			pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
-		}
-	}
-
-out_delete_evlist:
-	perf_evlist__delete(evlist);
-	return err;
-}
-
-#define perf_evsel__name_array_test(names) \
-	__perf_evsel__name_array_test(names, ARRAY_SIZE(names))
-
-static int perf_evsel__roundtrip_name_test(void)
-{
-	int err = 0, ret = 0;
-
-	err = perf_evsel__name_array_test(perf_evsel__hw_names);
-	if (err)
-		ret = err;
-
-	err = perf_evsel__name_array_test(perf_evsel__sw_names);
-	if (err)
-		ret = err;
-
-	err = perf_evsel__roundtrip_cache_name_test();
-	if (err)
-		ret = err;
-
-	return ret;
-}
-
-static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
-				  int size, bool should_be_signed)
-{
-	struct format_field *field = perf_evsel__field(evsel, name);
-	int is_signed;
-	int ret = 0;
-
-	if (field == NULL) {
-		pr_debug("%s: \"%s\" field not found!\n", evsel->name, name);
-		return -1;
-	}
-
-	is_signed = !!(field->flags | FIELD_IS_SIGNED);
-	if (should_be_signed && !is_signed) {
-		pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n",
-			 evsel->name, name, is_signed, should_be_signed);
-		ret = -1;
-	}
-
-	if (field->size != size) {
-		pr_debug("%s: \"%s\" size (%d) should be %d!\n",
-			 evsel->name, name, field->size, size);
-		ret = -1;
-	}
-
-	return ret;
-}
-
-static int perf_evsel__tp_sched_test(void)
-{
-	struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch", 0);
-	int ret = 0;
-
-	if (evsel == NULL) {
-		pr_debug("perf_evsel__new\n");
-		return -1;
-	}
-
-	if (perf_evsel__test_field(evsel, "prev_comm", 16, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "prev_pid", 4, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "prev_prio", 4, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "prev_state", 8, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "next_comm", 16, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "next_pid", 4, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "next_prio", 4, true))
-		ret = -1;
-
-	perf_evsel__delete(evsel);
-
-	evsel = perf_evsel__newtp("sched", "sched_wakeup", 0);
-
-	if (perf_evsel__test_field(evsel, "comm", 16, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "pid", 4, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "prio", 4, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "success", 4, true))
-		ret = -1;
-
-	if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
-		ret = -1;
-
-	return ret;
-}
-
-static int test__syscall_open_tp_fields(void)
-{
-	struct perf_record_opts opts = {
-		.target = {
-			.uid = UINT_MAX,
-			.uses_mmap = true,
-		},
-		.no_delay   = true,
-		.freq	    = 1,
-		.mmap_pages = 256,
-		.raw_samples = true,
-	};
-	const char *filename = "/etc/passwd";
-	int flags = O_RDONLY | O_DIRECTORY;
-	struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
-	struct perf_evsel *evsel;
-	int err = -1, i, nr_events = 0, nr_polls = 0;
-
-	if (evlist == NULL) {
-		pr_debug("%s: perf_evlist__new\n", __func__);
-		goto out;
-	}
-
-	evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0);
-	if (evsel == NULL) {
-		pr_debug("%s: perf_evsel__newtp\n", __func__);
-		goto out_delete_evlist;
-	}
-
-	perf_evlist__add(evlist, evsel);
-
-	err = perf_evlist__create_maps(evlist, &opts.target);
-	if (err < 0) {
-		pr_debug("%s: perf_evlist__create_maps\n", __func__);
-		goto out_delete_evlist;
-	}
-
-	perf_evsel__config(evsel, &opts, evsel);
-
-	evlist->threads->map[0] = getpid();
-
-	err = perf_evlist__open(evlist);
-	if (err < 0) {
-		pr_debug("perf_evlist__open: %s\n", strerror(errno));
-		goto out_delete_evlist;
-	}
-
-	err = perf_evlist__mmap(evlist, UINT_MAX, false);
-	if (err < 0) {
-		pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
-		goto out_delete_evlist;
-	}
-
-	perf_evlist__enable(evlist);
-
-	/*
- 	 * Generate the event:
- 	 */
-	open(filename, flags);
-
-	while (1) {
-		int before = nr_events;
-
-		for (i = 0; i < evlist->nr_mmaps; i++) {
-			union perf_event *event;
-
-			while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
-				const u32 type = event->header.type;
-				int tp_flags;
-				struct perf_sample sample;
-
-				++nr_events;
-
-				if (type != PERF_RECORD_SAMPLE)
-					continue;
-
-				err = perf_evsel__parse_sample(evsel, event, &sample);
-				if (err) {
-					pr_err("Can't parse sample, err = %d\n", err);
-					goto out_munmap;
-				}
-
-				tp_flags = perf_evsel__intval(evsel, &sample, "flags");
-
-				if (flags != tp_flags) {
-					pr_debug("%s: Expected flags=%#x, got %#x\n",
-						 __func__, flags, tp_flags);
-					goto out_munmap;
-				}
-
-				goto out_ok;
-			}
-		}
-
-		if (nr_events == before)
-			poll(evlist->pollfd, evlist->nr_fds, 10);
-
-		if (++nr_polls > 5) {
-			pr_debug("%s: no events!\n", __func__);
-			goto out_munmap;
-		}
-	}
-out_ok:
-	err = 0;
-out_munmap:
-	perf_evlist__munmap(evlist);
-out_delete_evlist:
-	perf_evlist__delete(evlist);
-out:
-	return err;
-}
-
-static struct test {
-	const char *desc;
-	int (*func)(void);
-} tests[] = {
-	{
-		.desc = "vmlinux symtab matches kallsyms",
-		.func = test__vmlinux_matches_kallsyms,
-	},
-	{
-		.desc = "detect open syscall event",
-		.func = test__open_syscall_event,
-	},
-	{
-		.desc = "detect open syscall event on all cpus",
-		.func = test__open_syscall_event_on_all_cpus,
-	},
-	{
-		.desc = "read samples using the mmap interface",
-		.func = test__basic_mmap,
-	},
-	{
-		.desc = "parse events tests",
-		.func = parse_events__test,
-	},
-#if defined(__x86_64__) || defined(__i386__)
-	{
-		.desc = "x86 rdpmc test",
-		.func = test__rdpmc,
-	},
-#endif
-	{
-		.desc = "Validate PERF_RECORD_* events & perf_sample fields",
-		.func = test__PERF_RECORD,
-	},
-	{
-		.desc = "Test perf pmu format parsing",
-		.func = test__perf_pmu,
-	},
-	{
-		.desc = "Test dso data interface",
-		.func = dso__test_data,
-	},
-	{
-		.desc = "roundtrip evsel->name check",
-		.func = perf_evsel__roundtrip_name_test,
-	},
-	{
-		.desc = "Check parsing of sched tracepoints fields",
-		.func = perf_evsel__tp_sched_test,
-	},
-	{
-		.desc = "Generate and check syscalls:sys_enter_open event fields",
-		.func = test__syscall_open_tp_fields,
-	},
-	{
-		.func = NULL,
-	},
-};
-
-static bool perf_test__matches(int curr, int argc, const char *argv[])
-{
-	int i;
-
-	if (argc == 0)
-		return true;
-
-	for (i = 0; i < argc; ++i) {
-		char *end;
-		long nr = strtoul(argv[i], &end, 10);
-
-		if (*end == '\0') {
-			if (nr == curr + 1)
-				return true;
-			continue;
-		}
-
-		if (strstr(tests[curr].desc, argv[i]))
-			return true;
-	}
-
-	return false;
-}
-
-static int __cmd_test(int argc, const char *argv[])
-{
-	int i = 0;
-
-	while (tests[i].func) {
-		int curr = i++, err;
-
-		if (!perf_test__matches(curr, argc, argv))
-			continue;
-
-		pr_info("%2d: %s:", i, tests[curr].desc);
-		pr_debug("\n--- start ---\n");
-		err = tests[curr].func();
-		pr_debug("---- end ----\n%s:", tests[curr].desc);
-		pr_info(" %s\n", err ? "FAILED!\n" : "Ok");
-	}
-
-	return 0;
-}
-
-static int perf_test__list(int argc, const char **argv)
-{
-	int i = 0;
-
-	while (tests[i].func) {
-		int curr = i++;
-
-		if (argc > 1 && !strstr(tests[curr].desc, argv[1]))
-			continue;
-
-		pr_info("%2d: %s\n", i, tests[curr].desc);
-	}
-
-	return 0;
-}
-
-int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
-{
-	const char * const test_usage[] = {
-	"perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
-	NULL,
-	};
-	const struct option test_options[] = {
-	OPT_INCR('v', "verbose", &verbose,
-		    "be more verbose (show symbol address, etc)"),
-	OPT_END()
-	};
-
-	argc = parse_options(argc, argv, test_options, test_usage, 0);
-	if (argc >= 1 && !strcmp(argv[0], "list"))
-		return perf_test__list(argc, argv);
-
-	symbol_conf.priv_size = sizeof(int);
-	symbol_conf.sort_by_name = true;
-	symbol_conf.try_vmlinux_path = true;
-
-	if (symbol__init() < 0)
-		return -1;
-
-	return __cmd_test(argc, argv);
-}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index f251b61..ab4cf232 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -965,7 +965,7 @@
 	svg_close();
 }
 
-static int __cmd_timechart(const char *input_name, const char *output_name)
+static int __cmd_timechart(const char *output_name)
 {
 	struct perf_tool perf_timechart = {
 		.comm		 = process_comm_event,
@@ -1061,7 +1061,6 @@
 int cmd_timechart(int argc, const char **argv,
 		  const char *prefix __maybe_unused)
 {
-	const char *input_name;
 	const char *output_name = "output.svg";
 	const struct option options[] = {
 	OPT_STRING('i', "input", &input_name, "file", "input file name"),
@@ -1092,5 +1091,5 @@
 
 	setup_pager();
 
-	return __cmd_timechart(input_name, output_name);
+	return __cmd_timechart(output_name);
 }
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ff6db80..c9ff395 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -26,6 +26,7 @@
 #include "util/color.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/machine.h"
 #include "util/session.h"
 #include "util/symbol.h"
 #include "util/thread.h"
@@ -581,6 +582,11 @@
 	struct perf_evsel *pos;
 	struct perf_top *top = arg;
 	const char *help = "For a higher level overview, try: perf top --sort comm,dso";
+	struct hist_browser_timer hbt = {
+		.timer		= perf_top__sort_new_samples,
+		.arg		= top,
+		.refresh	= top->delay_secs,
+	};
 
 	perf_top__sort_new_samples(top);
 
@@ -592,9 +598,8 @@
 	list_for_each_entry(pos, &top->evlist->entries, node)
 		pos->hists.uid_filter_str = top->target.uid_str;
 
-	perf_evlist__tui_browse_hists(top->evlist, help,
-				      perf_top__sort_new_samples,
-				      top, top->delay_secs);
+	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+				      &top->session->header.env);
 
 	exit_browser(0);
 	exit(0);
@@ -871,7 +876,7 @@
 						   &sample, machine);
 		} else if (event->header.type < PERF_RECORD_MAX) {
 			hists__inc_nr_events(&evsel->hists, event->header.type);
-			perf_event__process(&top->tool, event, &sample, machine);
+			machine__process_event(machine, event);
 		} else
 			++session->hists.stats.nr_unknown_events;
 	}
@@ -976,6 +981,10 @@
 				ui__error("Too many events are opened.\n"
 					    "Try again after reducing the number of events\n");
 				goto out_err;
+			} else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
+				ui__error("\'precise\' request may not be supported. "
+					  "Try removing 'p' modifier\n");
+				goto out_err;
 			}
 
 			ui__error("The sys_perf_event_open() syscall "
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 7aaee39..7932ffa 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1,5 +1,8 @@
 #include "builtin.h"
+#include "util/color.h"
 #include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
 #include "util/parse-options.h"
 #include "util/thread_map.h"
 #include "event-parse.h"
@@ -13,15 +16,18 @@
 	bool	   errmsg;
 	bool	   timeout;
 } syscall_fmts[] = {
+	{ .name	    = "access",	    .errmsg = true, },
 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
 	{ .name	    = "futex",	    .errmsg = true, },
+	{ .name	    = "open",	    .errmsg = true, },
 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "read",	    .errmsg = true, },
 	{ .name	    = "recvfrom",   .errmsg = true, },
 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
+	{ .name	    = "socket",	    .errmsg = true, },
 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
 };
 
@@ -43,6 +49,57 @@
 	struct syscall_fmt  *fmt;
 };
 
+static size_t fprintf_duration(unsigned long t, FILE *fp)
+{
+	double duration = (double)t / NSEC_PER_MSEC;
+	size_t printed = fprintf(fp, "(");
+
+	if (duration >= 1.0)
+		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
+	else if (duration >= 0.01)
+		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
+	else
+		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
+	return printed + fprintf(stdout, "): ");
+}
+
+struct thread_trace {
+	u64		  entry_time;
+	u64		  exit_time;
+	bool		  entry_pending;
+	unsigned long	  nr_events;
+	char		  *entry_str;
+	double		  runtime_ms;
+};
+
+static struct thread_trace *thread_trace__new(void)
+{
+	return zalloc(sizeof(struct thread_trace));
+}
+
+static struct thread_trace *thread__trace(struct thread *thread)
+{
+	struct thread_trace *ttrace;
+
+	if (thread == NULL)
+		goto fail;
+
+	if (thread->priv == NULL)
+		thread->priv = thread_trace__new();
+		
+	if (thread->priv == NULL)
+		goto fail;
+
+	ttrace = thread->priv;
+	++ttrace->nr_events;
+
+	return ttrace;
+fail:
+	color_fprintf(stdout, PERF_COLOR_RED,
+		      "WARNING: not enough memory, dropping samples!\n");
+	return NULL;
+}
+
 struct trace {
 	int			audit_machine;
 	struct {
@@ -50,8 +107,96 @@
 		struct syscall  *table;
 	} syscalls;
 	struct perf_record_opts opts;
+	struct machine		host;
+	u64			base_time;
+	unsigned long		nr_events;
+	bool			sched;
+	bool			multiple_threads;
+	double			duration_filter;
+	double			runtime_ms;
 };
 
+static bool trace__filter_duration(struct trace *trace, double t)
+{
+	return t < (trace->duration_filter * NSEC_PER_MSEC);
+}
+
+static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+{
+	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
+
+	return fprintf(fp, "%10.3f ", ts);
+}
+
+static bool done = false;
+
+static void sig_handler(int sig __maybe_unused)
+{
+	done = true;
+}
+
+static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
+					u64 duration, u64 tstamp, FILE *fp)
+{
+	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
+	printed += fprintf_duration(duration, fp);
+
+	if (trace->multiple_threads)
+		printed += fprintf(fp, "%d ", thread->pid);
+
+	return printed;
+}
+
+static int trace__process_event(struct machine *machine, union perf_event *event)
+{
+	int ret = 0;
+
+	switch (event->header.type) {
+	case PERF_RECORD_LOST:
+		color_fprintf(stdout, PERF_COLOR_RED,
+			      "LOST %" PRIu64 " events!\n", event->lost.lost);
+		ret = machine__process_lost_event(machine, event);
+	default:
+		ret = machine__process_event(machine, event);
+		break;
+	}
+
+	return ret;
+}
+
+static int trace__tool_process(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event,
+			       struct perf_sample *sample __maybe_unused,
+			       struct machine *machine)
+{
+	return trace__process_event(machine, event);
+}
+
+static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
+{
+	int err = symbol__init();
+
+	if (err)
+		return err;
+
+	machine__init(&trace->host, "", HOST_KERNEL_ID);
+	machine__create_kernel_maps(&trace->host);
+
+	if (perf_target__has_task(&trace->opts.target)) {
+		err = perf_event__synthesize_thread_map(NULL, evlist->threads,
+							trace__tool_process,
+							&trace->host);
+	} else {
+		err = perf_event__synthesize_threads(NULL, trace__tool_process,
+						     &trace->host);
+	}
+
+	if (err)
+		symbol__exit();
+
+	return err;
+}
+
 static int trace__read_syscall_info(struct trace *trace, int id)
 {
 	char tp_name[128];
@@ -93,7 +238,8 @@
 	return sc->tp_format != NULL ? 0 : -1;
 }
 
-static size_t syscall__fprintf_args(struct syscall *sc, unsigned long *args, FILE *fp)
+static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
+				      unsigned long *args)
 {
 	int i = 0;
 	size_t printed = 0;
@@ -102,12 +248,15 @@
 		struct format_field *field;
 
 		for (field = sc->tp_format->format.fields->next; field; field = field->next) {
-			printed += fprintf(fp, "%s%s: %ld", printed ? ", " : "",
-					   field->name, args[i++]);
+			printed += scnprintf(bf + printed, size - printed,
+					     "%s%s: %ld", printed ? ", " : "",
+					     field->name, args[i++]);
 		}
 	} else {
 		while (i < 6) {
-			printed += fprintf(fp, "%sarg%d: %ld", printed ? ", " : "", i, args[i]);
+			printed += scnprintf(bf + printed, size - printed,
+					     "%sarg%d: %ld",
+					     printed ? ", " : "", i, args[i]);
 			++i;
 		}
 	}
@@ -139,17 +288,24 @@
 	return &trace->syscalls.table[id];
 
 out_cant_read:
-	printf("Problems reading syscall %d information\n", id);
+	printf("Problems reading syscall %d", id);
+	if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
+		printf("(%s)", trace->syscalls.table[id].name);
+	puts(" information");
 	return NULL;
 }
 
 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 			    struct perf_sample *sample)
 {
+	char *msg;
 	void *args;
+	size_t printed = 0;
+	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
 	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
+	struct thread_trace *ttrace = thread__trace(thread);
 
-	if (sc == NULL)
+	if (ttrace == NULL || sc == NULL)
 		return -1;
 
 	args = perf_evsel__rawptr(evsel, sample, "args");
@@ -158,8 +314,27 @@
 		return -1;
 	}
 
-	printf("%s(", sc->name);
-	syscall__fprintf_args(sc, args, stdout);
+	ttrace = thread->priv;
+
+	if (ttrace->entry_str == NULL) {
+		ttrace->entry_str = malloc(1024);
+		if (!ttrace->entry_str)
+			return -1;
+	}
+
+	ttrace->entry_time = sample->time;
+	msg = ttrace->entry_str;
+	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
+
+	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
+
+	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
+		if (!trace->duration_filter) {
+			trace__fprintf_entry_head(trace, thread, 1, sample->time, stdout);
+			printf("%-70s\n", ttrace->entry_str);
+		}
+	} else
+		ttrace->entry_pending = true;
 
 	return 0;
 }
@@ -168,13 +343,37 @@
 			   struct perf_sample *sample)
 {
 	int ret;
+	u64 duration = 0;
+	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
+	struct thread_trace *ttrace = thread__trace(thread);
 	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
 
-	if (sc == NULL)
+	if (ttrace == NULL || sc == NULL)
 		return -1;
 
 	ret = perf_evsel__intval(evsel, sample, "ret");
 
+	ttrace = thread->priv;
+
+	ttrace->exit_time = sample->time;
+
+	if (ttrace->entry_time) {
+		duration = sample->time - ttrace->entry_time;
+		if (trace__filter_duration(trace, duration))
+			goto out;
+	} else if (trace->duration_filter)
+		goto out;
+
+	trace__fprintf_entry_head(trace, thread, duration, sample->time, stdout);
+
+	if (ttrace->entry_pending) {
+		printf("%-70s", ttrace->entry_str);
+	} else {
+		printf(" ... [");
+		color_fprintf(stdout, PERF_COLOR_YELLOW, "continued");
+		printf("]: %s()", sc->name);
+	}
+
 	if (ret < 0 && sc->fmt && sc->fmt->errmsg) {
 		char bf[256];
 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
@@ -187,14 +386,44 @@
 		printf(") = %d", ret);
 
 	putchar('\n');
+out:
+	ttrace->entry_pending = false;
+
 	return 0;
 }
 
-static int trace__run(struct trace *trace)
+static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
+				     struct perf_sample *sample)
+{
+        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
+	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
+	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
+	struct thread_trace *ttrace = thread__trace(thread);
+
+	if (ttrace == NULL)
+		goto out_dump;
+
+	ttrace->runtime_ms += runtime_ms;
+	trace->runtime_ms += runtime_ms;
+	return 0;
+
+out_dump:
+	printf("%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
+	       evsel->name,
+	       perf_evsel__strval(evsel, sample, "comm"),
+	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
+	       runtime,
+	       perf_evsel__intval(evsel, sample, "vruntime"));
+	return 0;
+}
+
+static int trace__run(struct trace *trace, int argc, const char **argv)
 {
 	struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
 	struct perf_evsel *evsel;
-	int err = -1, i, nr_events = 0, before;
+	int err = -1, i;
+	unsigned long before;
+	const bool forks = argc > 0;
 
 	if (evlist == NULL) {
 		printf("Not enough memory to run!\n");
@@ -207,14 +436,38 @@
 		goto out_delete_evlist;
 	}
 
+	if (trace->sched &&
+	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
+				   trace__sched_stat_runtime)) {
+		printf("Couldn't read the sched_stat_runtime tracepoint information!\n");
+		goto out_delete_evlist;
+	}
+
 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
 	if (err < 0) {
 		printf("Problems parsing the target to trace, check your options!\n");
 		goto out_delete_evlist;
 	}
 
+	err = trace__symbols_init(trace, evlist);
+	if (err < 0) {
+		printf("Problems initializing symbol libraries!\n");
+		goto out_delete_evlist;
+	}
+
 	perf_evlist__config_attrs(evlist, &trace->opts);
 
+	signal(SIGCHLD, sig_handler);
+	signal(SIGINT, sig_handler);
+
+	if (forks) {
+		err = perf_evlist__prepare_workload(evlist, &trace->opts, argv);
+		if (err < 0) {
+			printf("Couldn't run the workload!\n");
+			goto out_delete_evlist;
+		}
+	}
+
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		printf("Couldn't create the events: %s\n", strerror(errno));
@@ -228,8 +481,13 @@
 	}
 
 	perf_evlist__enable(evlist);
+
+	if (forks)
+		perf_evlist__start_workload(evlist);
+
+	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
 again:
-	before = nr_events;
+	before = trace->nr_events;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		union perf_event *event;
@@ -239,19 +497,7 @@
 			tracepoint_handler handler;
 			struct perf_sample sample;
 
-			++nr_events;
-
-			switch (type) {
-			case PERF_RECORD_SAMPLE:
-				break;
-			case PERF_RECORD_LOST:
-				printf("LOST %" PRIu64 " events!\n", event->lost.lost);
-				continue;
-			default:
-				printf("Unexpected %s event, skipping...\n",
-					perf_event__name(type));
-				continue;
-			}
+			++trace->nr_events;
 
 			err = perf_evlist__parse_sample(evlist, event, &sample);
 			if (err) {
@@ -259,14 +505,26 @@
 				continue;
 			}
 
+			if (trace->base_time == 0)
+				trace->base_time = sample.time;
+
+			if (type != PERF_RECORD_SAMPLE) {
+				trace__process_event(&trace->host, event);
+				continue;
+			}
+
 			evsel = perf_evlist__id2evsel(evlist, sample.id);
 			if (evsel == NULL) {
 				printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
 				continue;
 			}
 
-			if (evlist->threads->map[0] == -1 || evlist->threads->nr > 1)
-				printf("%d ", sample.tid);
+			if (sample.raw_data == NULL) {
+				printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
+				       perf_evsel__name(evsel), sample.tid,
+				       sample.cpu, sample.raw_size);
+				continue;
+			}
 
 			if (sample.raw_data == NULL) {
 				printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
@@ -280,8 +538,15 @@
 		}
 	}
 
-	if (nr_events == before)
+	if (trace->nr_events == before) {
+		if (done)
+			goto out_delete_evlist;
+
 		poll(evlist->pollfd, evlist->nr_fds, -1);
+	}
+
+	if (done)
+		perf_evlist__disable(evlist);
 
 	goto again;
 
@@ -291,10 +556,65 @@
 	return err;
 }
 
+static size_t trace__fprintf_threads_header(FILE *fp)
+{
+	size_t printed;
+
+	printed  = fprintf(fp, "\n _____________________________________________________________________\n");
+	printed += fprintf(fp," __)    Summary of events    (__\n\n");
+	printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
+	printed += fprintf(fp," _____________________________________________________________________\n\n");
+
+	return printed;
+}
+
+static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
+{
+	size_t printed = trace__fprintf_threads_header(fp);
+	struct rb_node *nd;
+
+	for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
+		struct thread *thread = rb_entry(nd, struct thread, rb_node);
+		struct thread_trace *ttrace = thread->priv;
+		const char *color;
+		double ratio;
+
+		if (ttrace == NULL)
+			continue;
+
+		ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
+
+		color = PERF_COLOR_NORMAL;
+		if (ratio > 50.0)
+			color = PERF_COLOR_RED;
+		else if (ratio > 25.0)
+			color = PERF_COLOR_GREEN;
+		else if (ratio > 5.0)
+			color = PERF_COLOR_YELLOW;
+
+		printed += color_fprintf(fp, color, "%20s", thread->comm);
+		printed += fprintf(fp, " - %-5d :%11lu   [", thread->pid, ttrace->nr_events);
+		printed += color_fprintf(fp, color, "%5.1f%%", ratio);
+		printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
+	}
+
+	return printed;
+}
+
+static int trace__set_duration(const struct option *opt, const char *str,
+			       int unset __maybe_unused)
+{
+	struct trace *trace = opt->value;
+
+	trace->duration_filter = atof(str);
+	return 0;
+}
+
 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char * const trace_usage[] = {
-		"perf trace [<options>]",
+		"perf trace [<options>] [<command>]",
+		"perf trace [<options>] -- <command> [<options>]",
 		NULL
 	};
 	struct trace trace = {
@@ -328,21 +648,38 @@
 		     "number of mmap data pages"),
 	OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user",
 		   "user to profile"),
+	OPT_CALLBACK(0, "duration", &trace, "float",
+		     "show only events with duration > N.M ms",
+		     trace__set_duration),
+	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
 	OPT_END()
 	};
 	int err;
+	char bf[BUFSIZ];
 
 	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
-	if (argc)
-		usage_with_options(trace_usage, trace_options);
 
-	err = perf_target__parse_uid(&trace.opts.target);
+	err = perf_target__validate(&trace.opts.target);
 	if (err) {
-		char bf[BUFSIZ];
 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
 		printf("%s", bf);
 		return err;
 	}
 
-	return trace__run(&trace);
+	err = perf_target__parse_uid(&trace.opts.target);
+	if (err) {
+		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
+		printf("%s", bf);
+		return err;
+	}
+
+	if (!argc && perf_target__none(&trace.opts.target))
+		trace.opts.target.system_wide = true;
+
+	err = trace__run(&trace, argc, argv);
+
+	if (trace.sched && !err)
+		trace__fprintf_thread_summary(&trace, stdout);
+
+	return err;
 }
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index 4add41b..f5ac774 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -43,6 +43,15 @@
 }
 endef
 
+define SOURCE_BIONIC
+#include <android/api-level.h>
+
+int main(void)
+{
+	return __ANDROID_API__;
+}
+endef
+
 define SOURCE_ELF_MMAP
 #include <libelf.h>
 int main(void)
@@ -112,7 +121,10 @@
 #if PY_VERSION_HEX >= 0x03000000
 	#error
 #endif
-int main(void){}
+int main(void)
+{
+	return 0;
+}
 endef
 define SOURCE_PYTHON_EMBED
 #include <Python.h>
@@ -203,4 +215,13 @@
 	return audit_open();
 }
 endef
-endif
\ No newline at end of file
+endif
+
+define SOURCE_ON_EXIT
+#include <stdio.h>
+
+int main(void)
+{
+	return on_exit(NULL, NULL);
+}
+endef
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index 8046182..e541312 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -180,9 +180,15 @@
 _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
 
 # try-cc
-# Usage: option = $(call try-cc, source-to-build, cc-options)
+# Usage: option = $(call try-cc, source-to-build, cc-options, msg)
+ifndef V
+TRY_CC_OUTPUT= > /dev/null 2>&1
+endif
+TRY_CC_MSG=echo "    CHK $(3)" 1>&2;
+
 try-cc = $(shell sh -c						  \
 	'TMP="$(OUTPUT)$(TMPOUT).$$$$";				  \
+	 $(TRY_CC_MSG)						  \
 	 echo "$(1)" |						  \
-	 $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \
+	 $(CC) -x c - $(2) -o "$$TMP" $(TRY_CC_OUTPUT) && echo y; \
 	 rm -f "$$TMP"')
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 6d50eb0..0f661fb 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -24,6 +24,7 @@
 
 int use_browser = -1;
 static int use_pager = -1;
+const char *input_name;
 
 struct cmd_struct {
 	const char *cmd;
@@ -84,21 +85,26 @@
 	return c.val;
 }
 
-static int tui_command_config(const char *var, const char *value, void *data)
+static int browser_command_config(const char *var, const char *value, void *data)
 {
 	struct pager_config *c = data;
 	if (!prefixcmp(var, "tui.") && !strcmp(var + 4, c->cmd))
 		c->val = perf_config_bool(var, value);
+	if (!prefixcmp(var, "gtk.") && !strcmp(var + 4, c->cmd))
+		c->val = perf_config_bool(var, value) ? 2 : 0;
 	return 0;
 }
 
-/* returns 0 for "no tui", 1 for "use tui", and -1 for "not specified" */
-static int check_tui_config(const char *cmd)
+/*
+ * returns 0 for "no tui", 1 for "use tui", 2 for "use gtk",
+ * and -1 for "not specified"
+ */
+static int check_browser_config(const char *cmd)
 {
 	struct pager_config c;
 	c.cmd = cmd;
 	c.val = -1;
-	perf_config(tui_command_config, &c);
+	perf_config(browser_command_config, &c);
 	return c.val;
 }
 
@@ -301,7 +307,7 @@
 		prefix = NULL; /* setup_perf_directory(); */
 
 	if (use_browser == -1)
-		use_browser = check_tui_config(p->cmd);
+		use_browser = check_browser_config(p->cmd);
 
 	if (use_pager == -1 && p->option & RUN_SETUP)
 		use_pager = check_pager_config(p->cmd);
@@ -440,6 +446,8 @@
 {
 	const char *cmd;
 
+	page_size = sysconf(_SC_PAGE_SIZE);
+
 	cmd = perf_extract_argv0_path(argv[0]);
 	if (!cmd)
 		cmd = "perf-help";
@@ -481,6 +489,8 @@
 	}
 	cmd = argv[0];
 
+	test_attr__init();
+
 	/*
 	 * We use PATH to find perf commands, but we prepend some higher
 	 * precedence paths: the "--exec-path" option, the PERF_EXEC_PATH
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 238f923..2c340e7 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -26,6 +26,7 @@
 #endif
 
 #ifdef __powerpc__
+#include "../../arch/powerpc/include/uapi/asm/unistd.h"
 #define rmb()		asm volatile ("sync" ::: "memory")
 #define cpu_relax()	asm volatile ("" ::: "memory");
 #define CPUINFO_PROC	"cpu"
@@ -164,13 +165,25 @@
 	(void) (&_min1 == &_min2);		\
 	_min1 < _min2 ? _min1 : _min2; })
 
+extern bool test_attr__enabled;
+void test_attr__init(void);
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+		     int fd, int group_fd, unsigned long flags);
+
 static inline int
 sys_perf_event_open(struct perf_event_attr *attr,
 		      pid_t pid, int cpu, int group_fd,
 		      unsigned long flags)
 {
-	return syscall(__NR_perf_event_open, attr, pid, cpu,
-		       group_fd, flags);
+	int fd;
+
+	fd = syscall(__NR_perf_event_open, attr, pid, cpu,
+		     group_fd, flags);
+
+	if (unlikely(test_attr__enabled))
+		test_attr__open(attr, pid, cpu, fd, group_fd, flags);
+
+	return fd;
 }
 
 #define MAX_COUNTERS			256
@@ -198,6 +211,7 @@
 	struct branch_entry	entries[0];
 };
 
+extern const char *input_name;
 extern bool perf_host, perf_guest;
 extern const char perf_version_string[];
 
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
new file mode 100644
index 0000000..25638a9
--- /dev/null
+++ b/tools/perf/tests/attr.c
@@ -0,0 +1,175 @@
+
+/*
+ * The struct perf_event_attr test support.
+ *
+ * This test is embedded inside into perf directly and is governed
+ * by the PERF_TEST_ATTR environment variable and hook inside
+ * sys_perf_event_open function.
+ *
+ * The general idea is to store 'struct perf_event_attr' details for
+ * each event created within single perf command. Each event details
+ * are stored into separate text file. Once perf command is finished
+ * these files can be checked for values we expect for command.
+ *
+ * Besides 'struct perf_event_attr' values we also store 'fd' and
+ * 'group_fd' values to allow checking for groups created.
+ *
+ * This all is triggered by setting PERF_TEST_ATTR environment variable.
+ * It must contain name of existing directory with access and write
+ * permissions. All the event text files are stored there.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include "../perf.h"
+#include "util.h"
+#include "exec_cmd.h"
+#include "tests.h"
+
+#define ENV "PERF_TEST_ATTR"
+
+extern int verbose;
+
+bool test_attr__enabled;
+
+static char *dir;
+
+void test_attr__init(void)
+{
+	dir = getenv(ENV);
+	test_attr__enabled = (dir != NULL);
+}
+
+#define BUFSIZE 1024
+
+#define __WRITE_ASS(str, fmt, data)					\
+do {									\
+	char buf[BUFSIZE];						\
+	size_t size;							\
+									\
+	size = snprintf(buf, BUFSIZE, #str "=%"fmt "\n", data);		\
+	if (1 != fwrite(buf, size, 1, file)) {				\
+		perror("test attr - failed to write event file");	\
+		fclose(file);						\
+		return -1;						\
+	}								\
+									\
+} while (0)
+
+#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)
+
+static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
+		       int fd, int group_fd, unsigned long flags)
+{
+	FILE *file;
+	char path[PATH_MAX];
+
+	snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir,
+		 attr->type, attr->config, fd);
+
+	file = fopen(path, "w+");
+	if (!file) {
+		perror("test attr - failed to open event file");
+		return -1;
+	}
+
+	if (fprintf(file, "[event-%d-%llu-%d]\n",
+		    attr->type, attr->config, fd) < 0) {
+		perror("test attr - failed to write event file");
+		fclose(file);
+		return -1;
+	}
+
+	/* syscall arguments */
+	__WRITE_ASS(fd,       "d", fd);
+	__WRITE_ASS(group_fd, "d", group_fd);
+	__WRITE_ASS(cpu,      "d", cpu);
+	__WRITE_ASS(pid,      "d", pid);
+	__WRITE_ASS(flags,   "lu", flags);
+
+	/* struct perf_event_attr */
+	WRITE_ASS(type,   PRIu32);
+	WRITE_ASS(size,   PRIu32);
+	WRITE_ASS(config,  "llu");
+	WRITE_ASS(sample_period, "llu");
+	WRITE_ASS(sample_type,   "llu");
+	WRITE_ASS(read_format,   "llu");
+	WRITE_ASS(disabled,       "d");
+	WRITE_ASS(inherit,        "d");
+	WRITE_ASS(pinned,         "d");
+	WRITE_ASS(exclusive,      "d");
+	WRITE_ASS(exclude_user,   "d");
+	WRITE_ASS(exclude_kernel, "d");
+	WRITE_ASS(exclude_hv,     "d");
+	WRITE_ASS(exclude_idle,   "d");
+	WRITE_ASS(mmap,           "d");
+	WRITE_ASS(comm,           "d");
+	WRITE_ASS(freq,           "d");
+	WRITE_ASS(inherit_stat,   "d");
+	WRITE_ASS(enable_on_exec, "d");
+	WRITE_ASS(task,           "d");
+	WRITE_ASS(watermark,      "d");
+	WRITE_ASS(precise_ip,     "d");
+	WRITE_ASS(mmap_data,      "d");
+	WRITE_ASS(sample_id_all,  "d");
+	WRITE_ASS(exclude_host,   "d");
+	WRITE_ASS(exclude_guest,  "d");
+	WRITE_ASS(exclude_callchain_kernel, "d");
+	WRITE_ASS(exclude_callchain_user, "d");
+	WRITE_ASS(wakeup_events, PRIu32);
+	WRITE_ASS(bp_type, PRIu32);
+	WRITE_ASS(config1, "llu");
+	WRITE_ASS(config2, "llu");
+	WRITE_ASS(branch_sample_type, "llu");
+	WRITE_ASS(sample_regs_user,   "llu");
+	WRITE_ASS(sample_stack_user,  PRIu32);
+
+	fclose(file);
+	return 0;
+}
+
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+		     int fd, int group_fd, unsigned long flags)
+{
+	int errno_saved = errno;
+
+	if (store_event(attr, pid, cpu, fd, group_fd, flags))
+		die("test attr FAILED");
+
+	errno = errno_saved;
+}
+
+static int run_dir(const char *d, const char *perf)
+{
+	char cmd[3*PATH_MAX];
+
+	snprintf(cmd, 3*PATH_MAX, "python %s/attr.py -d %s/attr/ -p %s %s",
+		 d, d, perf, verbose ? "-v" : "");
+
+	return system(cmd);
+}
+
+int test__attr(void)
+{
+	struct stat st;
+	char path_perf[PATH_MAX];
+	char path_dir[PATH_MAX];
+
+	/* First try developement tree tests. */
+	if (!lstat("./tests", &st))
+		return run_dir("./tests", "./perf");
+
+	/* Then installed path. */
+	snprintf(path_dir,  PATH_MAX, "%s/tests", perf_exec_path());
+	snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
+
+	if (!lstat(path_dir, &st) &&
+	    !lstat(path_perf, &st))
+		return run_dir(path_dir, path_perf);
+
+	fprintf(stderr, " (ommitted)");
+	return 0;
+}
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
new file mode 100644
index 0000000..e702b82
--- /dev/null
+++ b/tools/perf/tests/attr.py
@@ -0,0 +1,322 @@
+#! /usr/bin/python
+
+import os
+import sys
+import glob
+import optparse
+import tempfile
+import logging
+import shutil
+import ConfigParser
+
+class Fail(Exception):
+    def __init__(self, test, msg):
+        self.msg = msg
+        self.test = test
+    def getMsg(self):
+        return '\'%s\' - %s' % (self.test.path, self.msg)
+
+class Unsup(Exception):
+    def __init__(self, test):
+        self.test = test
+    def getMsg(self):
+        return '\'%s\'' % self.test.path
+
+class Event(dict):
+    terms = [
+        'flags',
+        'type',
+        'size',
+        'config',
+        'sample_period',
+        'sample_type',
+        'read_format',
+        'disabled',
+        'inherit',
+        'pinned',
+        'exclusive',
+        'exclude_user',
+        'exclude_kernel',
+        'exclude_hv',
+        'exclude_idle',
+        'mmap',
+        'comm',
+        'freq',
+        'inherit_stat',
+        'enable_on_exec',
+        'task',
+        'watermark',
+        'precise_ip',
+        'mmap_data',
+        'sample_id_all',
+        'exclude_host',
+        'exclude_guest',
+        'exclude_callchain_kernel',
+        'exclude_callchain_user',
+        'wakeup_events',
+        'bp_type',
+        'config1',
+        'config2',
+        'branch_sample_type',
+        'sample_regs_user',
+        'sample_stack_user',
+    ]
+
+    def add(self, data):
+        for key, val in data:
+            log.debug("      %s = %s" % (key, val))
+            self[key] = val
+
+    def __init__(self, name, data, base):
+        log.info("    Event %s" % name);
+        self.name  = name;
+        self.group = ''
+        self.add(base)
+        self.add(data)
+
+    def compare_data(self, a, b):
+        # Allow multiple values in assignment separated by '|'
+        a_list = a.split('|')
+        b_list = b.split('|')
+
+        for a_item in a_list:
+            for b_item in b_list:
+                if (a_item == b_item):
+                    return True
+                elif (a_item == '*') or (b_item == '*'):
+                    return True
+
+        return False
+
+    def equal(self, other):
+        for t in Event.terms:
+            log.debug("      [%s] %s %s" % (t, self[t], other[t]));
+            if not self.has_key(t) or not other.has_key(t):
+                return False
+            if not self.compare_data(self[t], other[t]):
+                return False
+        return True
+
+# Test file description needs to have following sections:
+# [config]
+#   - just single instance in file
+#   - needs to specify:
+#     'command' - perf command name
+#     'args'    - special command arguments
+#     'ret'     - expected command return value (0 by default)
+#
+# [eventX:base]
+#   - one or multiple instances in file
+#   - expected values assignments
+class Test(object):
+    def __init__(self, path, options):
+        parser = ConfigParser.SafeConfigParser()
+        parser.read(path)
+
+        log.warning("running '%s'" % path)
+
+        self.path     = path
+        self.test_dir = options.test_dir
+        self.perf     = options.perf
+        self.command  = parser.get('config', 'command')
+        self.args     = parser.get('config', 'args')
+
+        try:
+            self.ret  = parser.get('config', 'ret')
+        except:
+            self.ret  = 0
+
+        self.expect   = {}
+        self.result   = {}
+        log.info("  loading expected events");
+        self.load_events(path, self.expect)
+
+    def is_event(self, name):
+        if name.find("event") == -1:
+            return False
+        else:
+            return True
+
+    def load_events(self, path, events):
+        parser_event = ConfigParser.SafeConfigParser()
+        parser_event.read(path)
+
+        # The event record section header contains 'event' word,
+        # optionaly followed by ':' allowing to load 'parent
+        # event' first as a base
+        for section in filter(self.is_event, parser_event.sections()):
+
+            parser_items = parser_event.items(section);
+            base_items   = {}
+
+            # Read parent event if there's any
+            if (':' in section):
+                base = section[section.index(':') + 1:]
+                parser_base = ConfigParser.SafeConfigParser()
+                parser_base.read(self.test_dir + '/' + base)
+                base_items = parser_base.items('event')
+
+            e = Event(section, parser_items, base_items)
+            events[section] = e
+
+    def run_cmd(self, tempdir):
+        cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir,
+              self.perf, self.command, tempdir, self.args)
+        ret = os.WEXITSTATUS(os.system(cmd))
+
+        log.info("  running '%s' ret %d " % (cmd, ret))
+
+        if ret != int(self.ret):
+            raise Unsup(self)
+
+    def compare(self, expect, result):
+        match = {}
+
+        log.info("  compare");
+
+        # For each expected event find all matching
+        # events in result. Fail if there's not any.
+        for exp_name, exp_event in expect.items():
+            exp_list = []
+            log.debug("    matching [%s]" % exp_name)
+            for res_name, res_event in result.items():
+                log.debug("      to [%s]" % res_name)
+                if (exp_event.equal(res_event)):
+                    exp_list.append(res_name)
+                    log.debug("    ->OK")
+                else:
+                    log.debug("    ->FAIL");
+
+            log.info("    match: [%s] matches %s" % (exp_name, str(exp_list)))
+
+            # we did not any matching event - fail
+            if (not exp_list):
+                raise Fail(self, 'match failure');
+
+            match[exp_name] = exp_list
+
+        # For each defined group in the expected events
+        # check we match the same group in the result.
+        for exp_name, exp_event in expect.items():
+            group = exp_event.group
+
+            if (group == ''):
+                continue
+
+            for res_name in match[exp_name]:
+                res_group = result[res_name].group
+                if res_group not in match[group]:
+                    raise Fail(self, 'group failure')
+
+                log.info("    group: [%s] matches group leader %s" %
+                         (exp_name, str(match[group])))
+
+        log.info("  matched")
+
+    def resolve_groups(self, events):
+        for name, event in events.items():
+            group_fd = event['group_fd'];
+            if group_fd == '-1':
+                continue;
+
+            for iname, ievent in events.items():
+                if (ievent['fd'] == group_fd):
+                    event.group = iname
+                    log.debug('[%s] has group leader [%s]' % (name, iname))
+                    break;
+
+    def run(self):
+        tempdir = tempfile.mkdtemp();
+
+        try:
+            # run the test script
+            self.run_cmd(tempdir);
+
+            # load events expectation for the test
+            log.info("  loading result events");
+            for f in glob.glob(tempdir + '/event*'):
+                self.load_events(f, self.result);
+
+            # resolve group_fd to event names
+            self.resolve_groups(self.expect);
+            self.resolve_groups(self.result);
+
+            # do the expectation - results matching - both ways
+            self.compare(self.expect, self.result)
+            self.compare(self.result, self.expect)
+
+        finally:
+            # cleanup
+            shutil.rmtree(tempdir)
+
+
+def run_tests(options):
+    for f in glob.glob(options.test_dir + '/' + options.test):
+        try:
+            Test(f, options).run()
+        except Unsup, obj:
+            log.warning("unsupp  %s" % obj.getMsg())
+
+def setup_log(verbose):
+    global log
+    level = logging.CRITICAL
+
+    if verbose == 1:
+        level = logging.WARNING
+    if verbose == 2:
+        level = logging.INFO
+    if verbose >= 3:
+        level = logging.DEBUG
+
+    log = logging.getLogger('test')
+    log.setLevel(level)
+    ch  = logging.StreamHandler()
+    ch.setLevel(level)
+    formatter = logging.Formatter('%(message)s')
+    ch.setFormatter(formatter)
+    log.addHandler(ch)
+
+USAGE = '''%s [OPTIONS]
+  -d dir  # tests dir
+  -p path # perf binary
+  -t test # single test
+  -v      # verbose level
+''' % sys.argv[0]
+
+def main():
+    parser = optparse.OptionParser(usage=USAGE)
+
+    parser.add_option("-t", "--test",
+                      action="store", type="string", dest="test")
+    parser.add_option("-d", "--test-dir",
+                      action="store", type="string", dest="test_dir")
+    parser.add_option("-p", "--perf",
+                      action="store", type="string", dest="perf")
+    parser.add_option("-v", "--verbose",
+                      action="count", dest="verbose")
+
+    options, args = parser.parse_args()
+    if args:
+        parser.error('FAILED wrong arguments %s' %  ' '.join(args))
+        return -1
+
+    setup_log(options.verbose)
+
+    if not options.test_dir:
+        print 'FAILED no -d option specified'
+        sys.exit(-1)
+
+    if not options.test:
+        options.test = 'test*'
+
+    try:
+        run_tests(options)
+
+    except Fail, obj:
+        print "FAILED %s" % obj.getMsg();
+        sys.exit(-1)
+
+    sys.exit(0)
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
new file mode 100644
index 0000000..d102957
--- /dev/null
+++ b/tools/perf/tests/attr/README
@@ -0,0 +1,64 @@
+The struct perf_event_attr test (attr tests) support
+====================================================
+This testing support is embedded into perf directly and is governed
+by the PERF_TEST_ATTR environment variable and hook inside the
+sys_perf_event_open function.
+
+The general idea is to store 'struct perf_event_attr' details for
+each event created within single perf command. Each event details
+are stored into separate text file. Once perf command is finished
+these files are checked for values we expect for command.
+
+The attr tests consist of following parts:
+
+tests/attr.c
+------------
+This is the sys_perf_event_open hook implementation. The hook
+is triggered when the PERF_TEST_ATTR environment variable is
+defined. It must contain name of existing directory with access
+and write permissions.
+
+For each sys_perf_event_open call event details are stored in
+separate file. Besides 'struct perf_event_attr' values we also
+store 'fd' and 'group_fd' values to allow checking for groups.
+
+tests/attr.py
+-------------
+This is the python script that does all the hard work. It reads
+the test definition, executes it and checks results.
+
+tests/attr/
+-----------
+Directory containing all attr test definitions.
+Following tests are defined (with perf commands):
+
+  perf record kill                              (test-record-basic)
+  perf record -b kill                           (test-record-branch-any)
+  perf record -j any kill                       (test-record-branch-filter-any)
+  perf record -j any_call kill                  (test-record-branch-filter-any_call)
+  perf record -j any_ret kill                   (test-record-branch-filter-any_ret)
+  perf record -j hv kill                        (test-record-branch-filter-hv)
+  perf record -j ind_call kill                  (test-record-branch-filter-ind_call)
+  perf record -j k kill                         (test-record-branch-filter-k)
+  perf record -j u kill                         (test-record-branch-filter-u)
+  perf record -c 123 kill                       (test-record-count)
+  perf record -d kill                           (test-record-data)
+  perf record -F 100 kill                       (test-record-freq)
+  perf record -g -- kill                        (test-record-graph-default)
+  perf record -g dwarf -- kill                  (test-record-graph-dwarf)
+  perf record -g fp kill                        (test-record-graph-fp)
+  perf record --group -e cycles,instructions kill (test-record-group)
+  perf record -e '{cycles,instructions}' kill   (test-record-group1)
+  perf record -D kill                           (test-record-no-delay)
+  perf record -i kill                           (test-record-no-inherit)
+  perf record -n kill                           (test-record-no-samples)
+  perf record -c 100 -P kill                    (test-record-period)
+  perf record -R kill                           (test-record-raw)
+  perf stat -e cycles kill                      (test-stat-basic)
+  perf stat kill                                (test-stat-default)
+  perf stat -d kill                             (test-stat-detailed-1)
+  perf stat -dd kill                            (test-stat-detailed-2)
+  perf stat -ddd kill                           (test-stat-detailed-3)
+  perf stat --group -e cycles,instructions kill (test-stat-group)
+  perf stat -e '{cycles,instructions}' kill     (test-stat-group1)
+  perf stat -i -e cycles kill                   (test-stat-no-inherit)
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
new file mode 100644
index 0000000..f1485d8
--- /dev/null
+++ b/tools/perf/tests/attr/base-record
@@ -0,0 +1,39 @@
+[event]
+fd=1
+group_fd=-1
+flags=0
+type=0|1
+size=96
+config=0
+sample_period=4000
+sample_type=263
+read_format=7
+disabled=1
+inherit=1
+pinned=0
+exclusive=0
+exclude_user=0
+exclude_kernel=0
+exclude_hv=0
+exclude_idle=0
+mmap=1
+comm=1
+freq=1
+inherit_stat=0
+enable_on_exec=1
+task=0
+watermark=0
+precise_ip=0
+mmap_data=0
+sample_id_all=1
+exclude_host=0
+exclude_guest=1
+exclude_callchain_kernel=0
+exclude_callchain_user=0
+wakeup_events=0
+bp_type=0
+config1=0
+config2=0
+branch_sample_type=0
+sample_regs_user=0
+sample_stack_user=0
diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat
new file mode 100644
index 0000000..4bd79a8
--- /dev/null
+++ b/tools/perf/tests/attr/base-stat
@@ -0,0 +1,39 @@
+[event]
+fd=1
+group_fd=-1
+flags=0
+type=0
+size=96
+config=0
+sample_period=0
+sample_type=0
+read_format=3
+disabled=1
+inherit=1
+pinned=0
+exclusive=0
+exclude_user=0
+exclude_kernel=0
+exclude_hv=0
+exclude_idle=0
+mmap=0
+comm=0
+freq=0
+inherit_stat=0
+enable_on_exec=1
+task=0
+watermark=0
+precise_ip=0
+mmap_data=0
+sample_id_all=0
+exclude_host=0
+exclude_guest=1
+exclude_callchain_kernel=0
+exclude_callchain_user=0
+wakeup_events=0
+bp_type=0
+config1=0
+config2=0
+branch_sample_type=0
+sample_regs_user=0
+sample_stack_user=0
diff --git a/tools/perf/tests/attr/test-record-basic b/tools/perf/tests/attr/test-record-basic
new file mode 100644
index 0000000..55c0428
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-basic
@@ -0,0 +1,5 @@
+[config]
+command = record
+args    = kill >/dev/null 2>&1
+
+[event:base-record]
diff --git a/tools/perf/tests/attr/test-record-branch-any b/tools/perf/tests/attr/test-record-branch-any
new file mode 100644
index 0000000..1421960
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-any
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -b kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-branch-filter-any b/tools/perf/tests/attr/test-record-branch-filter-any
new file mode 100644
index 0000000..915c4df
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-any
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j any kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_call b/tools/perf/tests/attr/test-record-branch-filter-any_call
new file mode 100644
index 0000000..8708dbd
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-any_call
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j any_call kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=2311
+branch_sample_type=16
diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_ret b/tools/perf/tests/attr/test-record-branch-filter-any_ret
new file mode 100644
index 0000000..0d3607a
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-any_ret
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j any_ret kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=2311
+branch_sample_type=32
diff --git a/tools/perf/tests/attr/test-record-branch-filter-hv b/tools/perf/tests/attr/test-record-branch-filter-hv
new file mode 100644
index 0000000..f255267
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-hv
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j hv kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-branch-filter-ind_call b/tools/perf/tests/attr/test-record-branch-filter-ind_call
new file mode 100644
index 0000000..e862dd1
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-ind_call
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j ind_call kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=2311
+branch_sample_type=64
diff --git a/tools/perf/tests/attr/test-record-branch-filter-k b/tools/perf/tests/attr/test-record-branch-filter-k
new file mode 100644
index 0000000..182971e
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-k
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j k kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-branch-filter-u b/tools/perf/tests/attr/test-record-branch-filter-u
new file mode 100644
index 0000000..83449ef
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-u
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j u kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-count b/tools/perf/tests/attr/test-record-count
new file mode 100644
index 0000000..2f841de
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-count
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -c 123 kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=123
+sample_type=7
+freq=0
diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/attr/test-record-data
new file mode 100644
index 0000000..6627c3e
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-data
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -d kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=271
+mmap_data=1
diff --git a/tools/perf/tests/attr/test-record-freq b/tools/perf/tests/attr/test-record-freq
new file mode 100644
index 0000000..600d0f8
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-freq
@@ -0,0 +1,6 @@
+[config]
+command = record
+args    = -F 100 kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=100
diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default
new file mode 100644
index 0000000..833d184
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-graph-default
@@ -0,0 +1,6 @@
+[config]
+command = record
+args    = -g -- kill >/dev/null 2>&1
+
+[event:base-record]
+sample_type=295
diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/attr/test-record-graph-dwarf
new file mode 100644
index 0000000..e93e082
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-graph-dwarf
@@ -0,0 +1,10 @@
+[config]
+command = record
+args    = -g dwarf -- kill >/dev/null 2>&1
+
+[event:base-record]
+sample_type=12583
+exclude_callchain_user=1
+sample_stack_user=8192
+# TODO different for each arch, no support for that now
+sample_regs_user=*
diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp
new file mode 100644
index 0000000..7cef374
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-graph-fp
@@ -0,0 +1,6 @@
+[config]
+command = record
+args    = -g fp kill >/dev/null 2>&1
+
+[event:base-record]
+sample_type=295
diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group
new file mode 100644
index 0000000..a6599e9
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group
@@ -0,0 +1,18 @@
+[config]
+command = record
+args    = --group -e cycles,instructions kill >/dev/null 2>&1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=327
+
+[event-2:base-record]
+fd=2
+group_fd=1
+config=1
+sample_type=327
+mmap=0
+comm=0
+enable_on_exec=0
+disabled=0
diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1
new file mode 100644
index 0000000..5a8359d
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group1
@@ -0,0 +1,19 @@
+[config]
+command = record
+args    = -e '{cycles,instructions}' kill >/tmp/krava 2>&1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=327
+
+[event-2:base-record]
+fd=2
+group_fd=1
+type=0
+config=1
+sample_type=327
+mmap=0
+comm=0
+enable_on_exec=0
+disabled=0
diff --git a/tools/perf/tests/attr/test-record-no-delay b/tools/perf/tests/attr/test-record-no-delay
new file mode 100644
index 0000000..f253b78
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-no-delay
@@ -0,0 +1,9 @@
+[config]
+command = record
+args    = -D kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=263
+watermark=0
+wakeup_events=1
diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/attr/test-record-no-inherit
new file mode 100644
index 0000000..9079a25
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-no-inherit
@@ -0,0 +1,7 @@
+[config]
+command = record
+args    = -i kill >/dev/null 2>&1
+
+[event:base-record]
+sample_type=259
+inherit=0
diff --git a/tools/perf/tests/attr/test-record-no-samples b/tools/perf/tests/attr/test-record-no-samples
new file mode 100644
index 0000000..d0141b2
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-no-samples
@@ -0,0 +1,6 @@
+[config]
+command = record
+args    = -n kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=0
diff --git a/tools/perf/tests/attr/test-record-period b/tools/perf/tests/attr/test-record-period
new file mode 100644
index 0000000..8abc531
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-period
@@ -0,0 +1,7 @@
+[config]
+command = record
+args    = -c 100 -P kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=100
+freq=0
diff --git a/tools/perf/tests/attr/test-record-raw b/tools/perf/tests/attr/test-record-raw
new file mode 100644
index 0000000..4a8ef25
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-raw
@@ -0,0 +1,7 @@
+[config]
+command = record
+args    = -R kill >/dev/null 2>&1
+
+[event:base-record]
+sample_period=4000
+sample_type=1415
diff --git a/tools/perf/tests/attr/test-stat-basic b/tools/perf/tests/attr/test-stat-basic
new file mode 100644
index 0000000..74e1788
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-basic
@@ -0,0 +1,6 @@
+[config]
+command = stat
+args    = -e cycles kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-stat]
diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default
new file mode 100644
index 0000000..19270f5
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-default
@@ -0,0 +1,64 @@
+[config]
+command = stat
+args    = kill >/dev/null 2>&1
+ret     = 1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1
new file mode 100644
index 0000000..51426b8
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-detailed-1
@@ -0,0 +1,101 @@
+[config]
+command = stat
+args    = -d kill >/dev/null 2>&1
+ret     = 1
+
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event11:base-stat]
+fd=11
+type=3
+config=0
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event12:base-stat]
+fd=12
+type=3
+config=65536
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event13:base-stat]
+fd=13
+type=3
+config=2
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event14:base-stat]
+fd=14
+type=3
+config=65538
diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2
new file mode 100644
index 0000000..8de5acc
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-detailed-2
@@ -0,0 +1,155 @@
+[config]
+command = stat
+args    = -dd kill >/dev/null 2>&1
+ret     = 1
+
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event11:base-stat]
+fd=11
+type=3
+config=0
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event12:base-stat]
+fd=12
+type=3
+config=65536
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event13:base-stat]
+fd=13
+type=3
+config=2
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event14:base-stat]
+fd=14
+type=3
+config=65538
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1I                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event15:base-stat]
+fd=15
+type=3
+config=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1I                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event16:base-stat]
+fd=16
+type=3
+config=65537
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_DTLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event17:base-stat]
+fd=17
+type=3
+config=3
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_DTLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event18:base-stat]
+fd=18
+type=3
+config=65539
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_ITLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event19:base-stat]
+fd=19
+type=3
+config=4
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_ITLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event20:base-stat]
+fd=20
+type=3
+config=65540
diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3
new file mode 100644
index 0000000..0a1f45b
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-detailed-3
@@ -0,0 +1,173 @@
+[config]
+command = stat
+args    = -ddd kill >/dev/null 2>&1
+ret     = 1
+
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event11:base-stat]
+fd=11
+type=3
+config=0
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event12:base-stat]
+fd=12
+type=3
+config=65536
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event13:base-stat]
+fd=13
+type=3
+config=2
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event14:base-stat]
+fd=14
+type=3
+config=65538
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1I                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event15:base-stat]
+fd=15
+type=3
+config=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1I                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event16:base-stat]
+fd=16
+type=3
+config=65537
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_DTLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event17:base-stat]
+fd=17
+type=3
+config=3
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_DTLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event18:base-stat]
+fd=18
+type=3
+config=65539
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_ITLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event19:base-stat]
+fd=19
+type=3
+config=4
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_ITLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event20:base-stat]
+fd=20
+type=3
+config=65540
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event21:base-stat]
+fd=21
+type=3
+config=512
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event22:base-stat]
+fd=22
+type=3
+config=66048
diff --git a/tools/perf/tests/attr/test-stat-group b/tools/perf/tests/attr/test-stat-group
new file mode 100644
index 0000000..fdc1596
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-group
@@ -0,0 +1,15 @@
+[config]
+command = stat
+args    = --group -e cycles,instructions kill >/dev/null 2>&1
+ret     = 1
+
+[event-1:base-stat]
+fd=1
+group_fd=-1
+
+[event-2:base-stat]
+fd=2
+group_fd=1
+config=1
+disabled=0
+enable_on_exec=0
diff --git a/tools/perf/tests/attr/test-stat-group1 b/tools/perf/tests/attr/test-stat-group1
new file mode 100644
index 0000000..2a1f86e
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-group1
@@ -0,0 +1,15 @@
+[config]
+command = stat
+args    = -e '{cycles,instructions}' kill >/dev/null 2>&1
+ret     = 1
+
+[event-1:base-stat]
+fd=1
+group_fd=-1
+
+[event-2:base-stat]
+fd=2
+group_fd=1
+config=1
+disabled=0
+enable_on_exec=0
diff --git a/tools/perf/tests/attr/test-stat-no-inherit b/tools/perf/tests/attr/test-stat-no-inherit
new file mode 100644
index 0000000..d54b2a1e
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-no-inherit
@@ -0,0 +1,7 @@
+[config]
+command = stat
+args    = -i -e cycles kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-stat]
+inherit=0
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
new file mode 100644
index 0000000..186f675
--- /dev/null
+++ b/tools/perf/tests/builtin-test.c
@@ -0,0 +1,173 @@
+/*
+ * builtin-test.c
+ *
+ * Builtin regression testing command: ever growing number of sanity tests
+ */
+#include "builtin.h"
+#include "tests.h"
+#include "debug.h"
+#include "color.h"
+#include "parse-options.h"
+#include "symbol.h"
+
+static struct test {
+	const char *desc;
+	int (*func)(void);
+} tests[] = {
+	{
+		.desc = "vmlinux symtab matches kallsyms",
+		.func = test__vmlinux_matches_kallsyms,
+	},
+	{
+		.desc = "detect open syscall event",
+		.func = test__open_syscall_event,
+	},
+	{
+		.desc = "detect open syscall event on all cpus",
+		.func = test__open_syscall_event_on_all_cpus,
+	},
+	{
+		.desc = "read samples using the mmap interface",
+		.func = test__basic_mmap,
+	},
+	{
+		.desc = "parse events tests",
+		.func = test__parse_events,
+	},
+#if defined(__x86_64__) || defined(__i386__)
+	{
+		.desc = "x86 rdpmc test",
+		.func = test__rdpmc,
+	},
+#endif
+	{
+		.desc = "Validate PERF_RECORD_* events & perf_sample fields",
+		.func = test__PERF_RECORD,
+	},
+	{
+		.desc = "Test perf pmu format parsing",
+		.func = test__pmu,
+	},
+	{
+		.desc = "Test dso data interface",
+		.func = test__dso_data,
+	},
+	{
+		.desc = "roundtrip evsel->name check",
+		.func = test__perf_evsel__roundtrip_name_test,
+	},
+	{
+		.desc = "Check parsing of sched tracepoints fields",
+		.func = test__perf_evsel__tp_sched_test,
+	},
+	{
+		.desc = "Generate and check syscalls:sys_enter_open event fields",
+		.func = test__syscall_open_tp_fields,
+	},
+	{
+		.desc = "struct perf_event_attr setup",
+		.func = test__attr,
+	},
+	{
+		.func = NULL,
+	},
+};
+
+static bool perf_test__matches(int curr, int argc, const char *argv[])
+{
+	int i;
+
+	if (argc == 0)
+		return true;
+
+	for (i = 0; i < argc; ++i) {
+		char *end;
+		long nr = strtoul(argv[i], &end, 10);
+
+		if (*end == '\0') {
+			if (nr == curr + 1)
+				return true;
+			continue;
+		}
+
+		if (strstr(tests[curr].desc, argv[i]))
+			return true;
+	}
+
+	return false;
+}
+
+static int __cmd_test(int argc, const char *argv[])
+{
+	int i = 0;
+	int width = 0;
+
+	while (tests[i].func) {
+		int len = strlen(tests[i].desc);
+
+		if (width < len)
+			width = len;
+		++i;
+	}
+
+	i = 0;
+	while (tests[i].func) {
+		int curr = i++, err;
+
+		if (!perf_test__matches(curr, argc, argv))
+			continue;
+
+		pr_info("%2d: %-*s:", i, width, tests[curr].desc);
+		pr_debug("\n--- start ---\n");
+		err = tests[curr].func();
+		pr_debug("---- end ----\n%s:", tests[curr].desc);
+		if (err)
+			color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
+		else
+			pr_info(" Ok\n");
+	}
+
+	return 0;
+}
+
+static int perf_test__list(int argc, const char **argv)
+{
+	int i = 0;
+
+	while (tests[i].func) {
+		int curr = i++;
+
+		if (argc > 1 && !strstr(tests[curr].desc, argv[1]))
+			continue;
+
+		pr_info("%2d: %s\n", i, tests[curr].desc);
+	}
+
+	return 0;
+}
+
+int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+	const char * const test_usage[] = {
+	"perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
+	NULL,
+	};
+	const struct option test_options[] = {
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_END()
+	};
+
+	argc = parse_options(argc, argv, test_options, test_usage, 0);
+	if (argc >= 1 && !strcmp(argv[0], "list"))
+		return perf_test__list(argc, argv);
+
+	symbol_conf.priv_size = sizeof(int);
+	symbol_conf.sort_by_name = true;
+	symbol_conf.try_vmlinux_path = true;
+
+	if (symbol__init() < 0)
+		return -1;
+
+	return __cmd_test(argc, argv);
+}
diff --git a/tools/perf/util/dso-test-data.c b/tools/perf/tests/dso-data.c
similarity index 95%
rename from tools/perf/util/dso-test-data.c
rename to tools/perf/tests/dso-data.c
index c6caede..5eaffa2 100644
--- a/tools/perf/util/dso-test-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -6,7 +6,9 @@
 #include <fcntl.h>
 #include <string.h>
 
+#include "machine.h"
 #include "symbol.h"
+#include "tests.h"
 
 #define TEST_ASSERT_VAL(text, cond) \
 do { \
@@ -24,6 +26,10 @@
 	unsigned char *buf;
 
 	fd = mkstemp(templ);
+	if (fd < 0) {
+		perror("mkstemp failed");
+		return NULL;
+	}
 
 	buf = malloc(size);
 	if (!buf) {
@@ -94,7 +100,7 @@
 	},
 };
 
-int dso__test_data(void)
+int test__dso_data(void)
 {
 	struct machine machine;
 	struct dso *dso;
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
new file mode 100644
index 0000000..e61fc82
--- /dev/null
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -0,0 +1,114 @@
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "tests.h"
+
+static int perf_evsel__roundtrip_cache_name_test(void)
+{
+	char name[128];
+	int type, op, err = 0, ret = 0, i, idx;
+	struct perf_evsel *evsel;
+        struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
+
+        if (evlist == NULL)
+                return -ENOMEM;
+
+	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+			/* skip invalid cache type */
+			if (!perf_evsel__is_cache_op_valid(type, op))
+				continue;
+
+			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
+									name, sizeof(name));
+				err = parse_events(evlist, name, 0);
+				if (err)
+					ret = err;
+			}
+		}
+	}
+
+	idx = 0;
+	evsel = perf_evlist__first(evlist);
+
+	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+			/* skip invalid cache type */
+			if (!perf_evsel__is_cache_op_valid(type, op))
+				continue;
+
+			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
+									name, sizeof(name));
+				if (evsel->idx != idx)
+					continue;
+
+				++idx;
+
+				if (strcmp(perf_evsel__name(evsel), name)) {
+					pr_debug("%s != %s\n", perf_evsel__name(evsel), name);
+					ret = -1;
+				}
+
+				evsel = perf_evsel__next(evsel);
+			}
+		}
+	}
+
+	perf_evlist__delete(evlist);
+	return ret;
+}
+
+static int __perf_evsel__name_array_test(const char *names[], int nr_names)
+{
+	int i, err;
+	struct perf_evsel *evsel;
+        struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
+
+        if (evlist == NULL)
+                return -ENOMEM;
+
+	for (i = 0; i < nr_names; ++i) {
+		err = parse_events(evlist, names[i], 0);
+		if (err) {
+			pr_debug("failed to parse event '%s', err %d\n",
+				 names[i], err);
+			goto out_delete_evlist;
+		}
+	}
+
+	err = 0;
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
+			--err;
+			pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
+		}
+	}
+
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+	return err;
+}
+
+#define perf_evsel__name_array_test(names) \
+	__perf_evsel__name_array_test(names, ARRAY_SIZE(names))
+
+int test__perf_evsel__roundtrip_name_test(void)
+{
+	int err = 0, ret = 0;
+
+	err = perf_evsel__name_array_test(perf_evsel__hw_names);
+	if (err)
+		ret = err;
+
+	err = perf_evsel__name_array_test(perf_evsel__sw_names);
+	if (err)
+		ret = err;
+
+	err = perf_evsel__roundtrip_cache_name_test();
+	if (err)
+		ret = err;
+
+	return ret;
+}
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
new file mode 100644
index 0000000..a5d2fcc
--- /dev/null
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -0,0 +1,84 @@
+#include "evsel.h"
+#include "tests.h"
+#include "event-parse.h"
+
+static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
+				  int size, bool should_be_signed)
+{
+	struct format_field *field = perf_evsel__field(evsel, name);
+	int is_signed;
+	int ret = 0;
+
+	if (field == NULL) {
+		pr_debug("%s: \"%s\" field not found!\n", evsel->name, name);
+		return -1;
+	}
+
+	is_signed = !!(field->flags | FIELD_IS_SIGNED);
+	if (should_be_signed && !is_signed) {
+		pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n",
+			 evsel->name, name, is_signed, should_be_signed);
+		ret = -1;
+	}
+
+	if (field->size != size) {
+		pr_debug("%s: \"%s\" size (%d) should be %d!\n",
+			 evsel->name, name, field->size, size);
+		ret = -1;
+	}
+
+	return ret;
+}
+
+int test__perf_evsel__tp_sched_test(void)
+{
+	struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch", 0);
+	int ret = 0;
+
+	if (evsel == NULL) {
+		pr_debug("perf_evsel__new\n");
+		return -1;
+	}
+
+	if (perf_evsel__test_field(evsel, "prev_comm", 16, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "prev_pid", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "prev_prio", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "prev_state", 8, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "next_comm", 16, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "next_pid", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "next_prio", 4, true))
+		ret = -1;
+
+	perf_evsel__delete(evsel);
+
+	evsel = perf_evsel__newtp("sched", "sched_wakeup", 0);
+
+	if (perf_evsel__test_field(evsel, "comm", 16, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "pid", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "prio", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "success", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
+		ret = -1;
+
+	return ret;
+}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
new file mode 100644
index 0000000..e174681
--- /dev/null
+++ b/tools/perf/tests/mmap-basic.c
@@ -0,0 +1,162 @@
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+
+/*
+ * This test will generate random numbers of calls to some getpid syscalls,
+ * then establish an mmap for a group of events that are created to monitor
+ * the syscalls.
+ *
+ * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated
+ * sample.id field to map back to its respective perf_evsel instance.
+ *
+ * Then it checks if the number of syscalls reported as perf events by
+ * the kernel corresponds to the number of syscalls made.
+ */
+int test__basic_mmap(void)
+{
+	int err = -1;
+	union perf_event *event;
+	struct thread_map *threads;
+	struct cpu_map *cpus;
+	struct perf_evlist *evlist;
+	struct perf_event_attr attr = {
+		.type		= PERF_TYPE_TRACEPOINT,
+		.read_format	= PERF_FORMAT_ID,
+		.sample_type	= PERF_SAMPLE_ID,
+		.watermark	= 0,
+	};
+	cpu_set_t cpu_set;
+	const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
+					"getpgid", };
+	pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
+				      (void*)getpgid };
+#define nsyscalls ARRAY_SIZE(syscall_names)
+	int ids[nsyscalls];
+	unsigned int nr_events[nsyscalls],
+		     expected_nr_events[nsyscalls], i, j;
+	struct perf_evsel *evsels[nsyscalls], *evsel;
+
+	for (i = 0; i < nsyscalls; ++i) {
+		char name[64];
+
+		snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
+		ids[i] = trace_event__id(name);
+		if (ids[i] < 0) {
+			pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
+			return -1;
+		}
+		nr_events[i] = 0;
+		expected_nr_events[i] = random() % 257;
+	}
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	cpus = cpu_map__new(NULL);
+	if (cpus == NULL) {
+		pr_debug("cpu_map__new\n");
+		goto out_free_threads;
+	}
+
+	CPU_ZERO(&cpu_set);
+	CPU_SET(cpus->map[0], &cpu_set);
+	sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+	if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+		pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+			 cpus->map[0], strerror(errno));
+		goto out_free_cpus;
+	}
+
+	evlist = perf_evlist__new(cpus, threads);
+	if (evlist == NULL) {
+		pr_debug("perf_evlist__new\n");
+		goto out_free_cpus;
+	}
+
+	/* anonymous union fields, can't be initialized above */
+	attr.wakeup_events = 1;
+	attr.sample_period = 1;
+
+	for (i = 0; i < nsyscalls; ++i) {
+		attr.config = ids[i];
+		evsels[i] = perf_evsel__new(&attr, i);
+		if (evsels[i] == NULL) {
+			pr_debug("perf_evsel__new\n");
+			goto out_free_evlist;
+		}
+
+		perf_evlist__add(evlist, evsels[i]);
+
+		if (perf_evsel__open(evsels[i], cpus, threads) < 0) {
+			pr_debug("failed to open counter: %s, "
+				 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+				 strerror(errno));
+			goto out_close_fd;
+		}
+	}
+
+	if (perf_evlist__mmap(evlist, 128, true) < 0) {
+		pr_debug("failed to mmap events: %d (%s)\n", errno,
+			 strerror(errno));
+		goto out_close_fd;
+	}
+
+	for (i = 0; i < nsyscalls; ++i)
+		for (j = 0; j < expected_nr_events[i]; ++j) {
+			int foo = syscalls[i]();
+			++foo;
+		}
+
+	while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) {
+		struct perf_sample sample;
+
+		if (event->header.type != PERF_RECORD_SAMPLE) {
+			pr_debug("unexpected %s event\n",
+				 perf_event__name(event->header.type));
+			goto out_munmap;
+		}
+
+		err = perf_evlist__parse_sample(evlist, event, &sample);
+		if (err) {
+			pr_err("Can't parse sample, err = %d\n", err);
+			goto out_munmap;
+		}
+
+		evsel = perf_evlist__id2evsel(evlist, sample.id);
+		if (evsel == NULL) {
+			pr_debug("event with id %" PRIu64
+				 " doesn't map to an evsel\n", sample.id);
+			goto out_munmap;
+		}
+		nr_events[evsel->idx]++;
+	}
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
+			pr_debug("expected %d %s events, got %d\n",
+				 expected_nr_events[evsel->idx],
+				 perf_evsel__name(evsel), nr_events[evsel->idx]);
+			goto out_munmap;
+		}
+	}
+
+	err = 0;
+out_munmap:
+	perf_evlist__munmap(evlist);
+out_close_fd:
+	for (i = 0; i < nsyscalls; ++i)
+		perf_evsel__close_fd(evsels[i], 1, threads->nr);
+out_free_evlist:
+	perf_evlist__delete(evlist);
+out_free_cpus:
+	cpu_map__delete(cpus);
+out_free_threads:
+	thread_map__delete(threads);
+	return err;
+}
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c
new file mode 100644
index 0000000..31072ab
--- /dev/null
+++ b/tools/perf/tests/open-syscall-all-cpus.c
@@ -0,0 +1,120 @@
+#include "evsel.h"
+#include "tests.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "debug.h"
+
+int test__open_syscall_event_on_all_cpus(void)
+{
+	int err = -1, fd, cpu;
+	struct thread_map *threads;
+	struct cpu_map *cpus;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
+	unsigned int nr_open_calls = 111, i;
+	cpu_set_t cpu_set;
+	int id = trace_event__id("sys_enter_open");
+
+	if (id < 0) {
+		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		return -1;
+	}
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	cpus = cpu_map__new(NULL);
+	if (cpus == NULL) {
+		pr_debug("cpu_map__new\n");
+		goto out_thread_map_delete;
+	}
+
+
+	CPU_ZERO(&cpu_set);
+
+	memset(&attr, 0, sizeof(attr));
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.config = id;
+	evsel = perf_evsel__new(&attr, 0);
+	if (evsel == NULL) {
+		pr_debug("perf_evsel__new\n");
+		goto out_thread_map_delete;
+	}
+
+	if (perf_evsel__open(evsel, cpus, threads) < 0) {
+		pr_debug("failed to open counter: %s, "
+			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+			 strerror(errno));
+		goto out_evsel_delete;
+	}
+
+	for (cpu = 0; cpu < cpus->nr; ++cpu) {
+		unsigned int ncalls = nr_open_calls + cpu;
+		/*
+		 * XXX eventually lift this restriction in a way that
+		 * keeps perf building on older glibc installations
+		 * without CPU_ALLOC. 1024 cpus in 2010 still seems
+		 * a reasonable upper limit tho :-)
+		 */
+		if (cpus->map[cpu] >= CPU_SETSIZE) {
+			pr_debug("Ignoring CPU %d\n", cpus->map[cpu]);
+			continue;
+		}
+
+		CPU_SET(cpus->map[cpu], &cpu_set);
+		if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+			pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+				 cpus->map[cpu],
+				 strerror(errno));
+			goto out_close_fd;
+		}
+		for (i = 0; i < ncalls; ++i) {
+			fd = open("/etc/passwd", O_RDONLY);
+			close(fd);
+		}
+		CPU_CLR(cpus->map[cpu], &cpu_set);
+	}
+
+	/*
+	 * Here we need to explicitely preallocate the counts, as if
+	 * we use the auto allocation it will allocate just for 1 cpu,
+	 * as we start by cpu 0.
+	 */
+	if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) {
+		pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+		goto out_close_fd;
+	}
+
+	err = 0;
+
+	for (cpu = 0; cpu < cpus->nr; ++cpu) {
+		unsigned int expected;
+
+		if (cpus->map[cpu] >= CPU_SETSIZE)
+			continue;
+
+		if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+			pr_debug("perf_evsel__read_on_cpu\n");
+			err = -1;
+			break;
+		}
+
+		expected = nr_open_calls + cpu;
+		if (evsel->counts->cpu[cpu].val != expected) {
+			pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
+				 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
+			err = -1;
+		}
+	}
+
+out_close_fd:
+	perf_evsel__close_fd(evsel, 1, threads->nr);
+out_evsel_delete:
+	perf_evsel__delete(evsel);
+out_thread_map_delete:
+	thread_map__delete(threads);
+	return err;
+}
diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c
new file mode 100644
index 0000000..1c52fdc
--- /dev/null
+++ b/tools/perf/tests/open-syscall-tp-fields.c
@@ -0,0 +1,117 @@
+#include "perf.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "tests.h"
+
+int test__syscall_open_tp_fields(void)
+{
+	struct perf_record_opts opts = {
+		.target = {
+			.uid = UINT_MAX,
+			.uses_mmap = true,
+		},
+		.no_delay   = true,
+		.freq	    = 1,
+		.mmap_pages = 256,
+		.raw_samples = true,
+	};
+	const char *filename = "/etc/passwd";
+	int flags = O_RDONLY | O_DIRECTORY;
+	struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
+	struct perf_evsel *evsel;
+	int err = -1, i, nr_events = 0, nr_polls = 0;
+
+	if (evlist == NULL) {
+		pr_debug("%s: perf_evlist__new\n", __func__);
+		goto out;
+	}
+
+	evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0);
+	if (evsel == NULL) {
+		pr_debug("%s: perf_evsel__newtp\n", __func__);
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__add(evlist, evsel);
+
+	err = perf_evlist__create_maps(evlist, &opts.target);
+	if (err < 0) {
+		pr_debug("%s: perf_evlist__create_maps\n", __func__);
+		goto out_delete_evlist;
+	}
+
+	perf_evsel__config(evsel, &opts);
+
+	evlist->threads->map[0] = getpid();
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("perf_evlist__open: %s\n", strerror(errno));
+		goto out_delete_evlist;
+	}
+
+	err = perf_evlist__mmap(evlist, UINT_MAX, false);
+	if (err < 0) {
+		pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__enable(evlist);
+
+	/*
+	 * Generate the event:
+	 */
+	open(filename, flags);
+
+	while (1) {
+		int before = nr_events;
+
+		for (i = 0; i < evlist->nr_mmaps; i++) {
+			union perf_event *event;
+
+			while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+				const u32 type = event->header.type;
+				int tp_flags;
+				struct perf_sample sample;
+
+				++nr_events;
+
+				if (type != PERF_RECORD_SAMPLE)
+					continue;
+
+				err = perf_evsel__parse_sample(evsel, event, &sample);
+				if (err) {
+					pr_err("Can't parse sample, err = %d\n", err);
+					goto out_munmap;
+				}
+
+				tp_flags = perf_evsel__intval(evsel, &sample, "flags");
+
+				if (flags != tp_flags) {
+					pr_debug("%s: Expected flags=%#x, got %#x\n",
+						 __func__, flags, tp_flags);
+					goto out_munmap;
+				}
+
+				goto out_ok;
+			}
+		}
+
+		if (nr_events == before)
+			poll(evlist->pollfd, evlist->nr_fds, 10);
+
+		if (++nr_polls > 5) {
+			pr_debug("%s: no events!\n", __func__);
+			goto out_munmap;
+		}
+	}
+out_ok:
+	err = 0;
+out_munmap:
+	perf_evlist__munmap(evlist);
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+out:
+	return err;
+}
diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c
new file mode 100644
index 0000000..98be8b5
--- /dev/null
+++ b/tools/perf/tests/open-syscall.c
@@ -0,0 +1,66 @@
+#include "thread_map.h"
+#include "evsel.h"
+#include "debug.h"
+#include "tests.h"
+
+int test__open_syscall_event(void)
+{
+	int err = -1, fd;
+	struct thread_map *threads;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
+	unsigned int nr_open_calls = 111, i;
+	int id = trace_event__id("sys_enter_open");
+
+	if (id < 0) {
+		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		return -1;
+	}
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	memset(&attr, 0, sizeof(attr));
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.config = id;
+	evsel = perf_evsel__new(&attr, 0);
+	if (evsel == NULL) {
+		pr_debug("perf_evsel__new\n");
+		goto out_thread_map_delete;
+	}
+
+	if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+		pr_debug("failed to open counter: %s, "
+			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+			 strerror(errno));
+		goto out_evsel_delete;
+	}
+
+	for (i = 0; i < nr_open_calls; ++i) {
+		fd = open("/etc/passwd", O_RDONLY);
+		close(fd);
+	}
+
+	if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
+		pr_debug("perf_evsel__read_on_cpu\n");
+		goto out_close_fd;
+	}
+
+	if (evsel->counts->cpu[0].val != nr_open_calls) {
+		pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
+			 nr_open_calls, evsel->counts->cpu[0].val);
+		goto out_close_fd;
+	}
+
+	err = 0;
+out_close_fd:
+	perf_evsel__close_fd(evsel, 1, threads->nr);
+out_evsel_delete:
+	perf_evsel__delete(evsel);
+out_thread_map_delete:
+	thread_map__delete(threads);
+	return err;
+}
diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/tests/parse-events.c
similarity index 93%
rename from tools/perf/util/parse-events-test.c
rename to tools/perf/tests/parse-events.c
index 6ef213b..32ee478 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/tests/parse-events.c
@@ -3,6 +3,7 @@
 #include "evsel.h"
 #include "evlist.h"
 #include "sysfs.h"
+#include "tests.h"
 #include <linux/hw_breakpoint.h>
 
 #define TEST_ASSERT_VAL(text, cond) \
@@ -443,6 +444,23 @@
 	return 0;
 }
 
+static int test__checkevent_pmu_events(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong exclude_user",
+			!evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel",
+			evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	return 0;
+}
+
 static int test__checkterms_simple(struct list_head *terms)
 {
 	struct parse_events__term *term;
@@ -503,7 +521,7 @@
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* cycles:upp */
 	evsel = perf_evsel__next(evsel);
@@ -539,7 +557,7 @@
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* cache-references + :u modifier */
 	evsel = perf_evsel__next(evsel);
@@ -565,7 +583,7 @@
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	return 0;
 }
@@ -588,7 +606,7 @@
 	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 	TEST_ASSERT_VAL("wrong group name",
 		!strcmp(leader->group_name, "group1"));
 
@@ -618,7 +636,7 @@
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 	TEST_ASSERT_VAL("wrong group name",
 		!strcmp(leader->group_name, "group2"));
 
@@ -645,7 +663,7 @@
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	return 0;
 }
@@ -669,7 +687,7 @@
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* instructions:kp + p */
 	evsel = perf_evsel__next(evsel);
@@ -706,7 +724,7 @@
 	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* instructions + G */
 	evsel = perf_evsel__next(evsel);
@@ -733,7 +751,7 @@
 	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* instructions:G */
 	evsel = perf_evsel__next(evsel);
@@ -759,7 +777,7 @@
 	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	return 0;
 }
@@ -1024,7 +1042,52 @@
 	return !ret;
 }
 
-int parse_events__test(void)
+static int test_pmu_events(void)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	struct dirent *ent;
+	DIR *dir;
+	int ret;
+
+	snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/events/",
+		 sysfs_find_mountpoint());
+
+	ret = stat(path, &st);
+	if (ret) {
+		pr_debug("ommiting PMU cpu events tests\n");
+		return 0;
+	}
+
+	dir = opendir(path);
+	if (!dir) {
+		pr_debug("can't open pmu event dir");
+		return -1;
+	}
+
+	while (!ret && (ent = readdir(dir))) {
+#define MAX_NAME 100
+		struct test__event_st e;
+		char name[MAX_NAME];
+
+		if (!strcmp(ent->d_name, ".") ||
+		    !strcmp(ent->d_name, ".."))
+			continue;
+
+		snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name);
+
+		e.name  = name;
+		e.check = test__checkevent_pmu_events;
+
+		ret = test_event(&e);
+#undef MAX_NAME
+	}
+
+	closedir(dir);
+	return ret;
+}
+
+int test__parse_events(void)
 {
 	int ret1, ret2 = 0;
 
@@ -1040,6 +1103,12 @@
 	if (test_pmu())
 		TEST_EVENTS(test__events_pmu);
 
+	if (test_pmu()) {
+		int ret = test_pmu_events();
+		if (ret)
+			return ret;
+	}
+
 	ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms));
 	if (!ret2)
 		ret2 = ret1;
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
new file mode 100644
index 0000000..70e0d44
--- /dev/null
+++ b/tools/perf/tests/perf-record.c
@@ -0,0 +1,312 @@
+#include <sched.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "perf.h"
+#include "debug.h"
+#include "tests.h"
+
+static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
+{
+	int i, cpu = -1, nrcpus = 1024;
+realloc:
+	CPU_ZERO(maskp);
+
+	if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
+		if (errno == EINVAL && nrcpus < (1024 << 8)) {
+			nrcpus = nrcpus << 2;
+			goto realloc;
+		}
+		perror("sched_getaffinity");
+			return -1;
+	}
+
+	for (i = 0; i < nrcpus; i++) {
+		if (CPU_ISSET(i, maskp)) {
+			if (cpu == -1)
+				cpu = i;
+			else
+				CPU_CLR(i, maskp);
+		}
+	}
+
+	return cpu;
+}
+
+int test__PERF_RECORD(void)
+{
+	struct perf_record_opts opts = {
+		.target = {
+			.uid = UINT_MAX,
+			.uses_mmap = true,
+		},
+		.no_delay   = true,
+		.freq	    = 10,
+		.mmap_pages = 256,
+	};
+	cpu_set_t cpu_mask;
+	size_t cpu_mask_size = sizeof(cpu_mask);
+	struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
+	struct perf_evsel *evsel;
+	struct perf_sample sample;
+	const char *cmd = "sleep";
+	const char *argv[] = { cmd, "1", NULL, };
+	char *bname;
+	u64 prev_time = 0;
+	bool found_cmd_mmap = false,
+	     found_libc_mmap = false,
+	     found_vdso_mmap = false,
+	     found_ld_mmap = false;
+	int err = -1, errs = 0, i, wakeups = 0;
+	u32 cpu;
+	int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
+
+	if (evlist == NULL || argv == NULL) {
+		pr_debug("Not enough memory to create evlist\n");
+		goto out;
+	}
+
+	/*
+	 * We need at least one evsel in the evlist, use the default
+	 * one: "cycles".
+	 */
+	err = perf_evlist__add_default(evlist);
+	if (err < 0) {
+		pr_debug("Not enough memory to create evsel\n");
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * Create maps of threads and cpus to monitor. In this case
+	 * we start with all threads and cpus (-1, -1) but then in
+	 * perf_evlist__prepare_workload we'll fill in the only thread
+	 * we're monitoring, the one forked there.
+	 */
+	err = perf_evlist__create_maps(evlist, &opts.target);
+	if (err < 0) {
+		pr_debug("Not enough memory to create thread/cpu maps\n");
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * Prepare the workload in argv[] to run, it'll fork it, and then wait
+	 * for perf_evlist__start_workload() to exec it. This is done this way
+	 * so that we have time to open the evlist (calling sys_perf_event_open
+	 * on all the fds) and then mmap them.
+	 */
+	err = perf_evlist__prepare_workload(evlist, &opts, argv);
+	if (err < 0) {
+		pr_debug("Couldn't run the workload!\n");
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * Config the evsels, setting attr->comm on the first one, etc.
+	 */
+	evsel = perf_evlist__first(evlist);
+	evsel->attr.sample_type |= PERF_SAMPLE_CPU;
+	evsel->attr.sample_type |= PERF_SAMPLE_TID;
+	evsel->attr.sample_type |= PERF_SAMPLE_TIME;
+	perf_evlist__config_attrs(evlist, &opts);
+
+	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
+	if (err < 0) {
+		pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno));
+		goto out_delete_evlist;
+	}
+
+	cpu = err;
+
+	/*
+	 * So that we can check perf_sample.cpu on all the samples.
+	 */
+	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
+		pr_debug("sched_setaffinity: %s\n", strerror(errno));
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * Call sys_perf_event_open on all the fds on all the evsels,
+	 * grouping them if asked to.
+	 */
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("perf_evlist__open: %s\n", strerror(errno));
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * mmap the first fd on a given CPU and ask for events for the other
+	 * fds in the same CPU to be injected in the same mmap ring buffer
+	 * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
+	 */
+	err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
+	if (err < 0) {
+		pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * Now that all is properly set up, enable the events, they will
+	 * count just on workload.pid, which will start...
+	 */
+	perf_evlist__enable(evlist);
+
+	/*
+	 * Now!
+	 */
+	perf_evlist__start_workload(evlist);
+
+	while (1) {
+		int before = total_events;
+
+		for (i = 0; i < evlist->nr_mmaps; i++) {
+			union perf_event *event;
+
+			while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+				const u32 type = event->header.type;
+				const char *name = perf_event__name(type);
+
+				++total_events;
+				if (type < PERF_RECORD_MAX)
+					nr_events[type]++;
+
+				err = perf_evlist__parse_sample(evlist, event, &sample);
+				if (err < 0) {
+					if (verbose)
+						perf_event__fprintf(event, stderr);
+					pr_debug("Couldn't parse sample\n");
+					goto out_err;
+				}
+
+				if (verbose) {
+					pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
+					perf_event__fprintf(event, stderr);
+				}
+
+				if (prev_time > sample.time) {
+					pr_debug("%s going backwards in time, prev=%" PRIu64 ", curr=%" PRIu64 "\n",
+						 name, prev_time, sample.time);
+					++errs;
+				}
+
+				prev_time = sample.time;
+
+				if (sample.cpu != cpu) {
+					pr_debug("%s with unexpected cpu, expected %d, got %d\n",
+						 name, cpu, sample.cpu);
+					++errs;
+				}
+
+				if ((pid_t)sample.pid != evlist->workload.pid) {
+					pr_debug("%s with unexpected pid, expected %d, got %d\n",
+						 name, evlist->workload.pid, sample.pid);
+					++errs;
+				}
+
+				if ((pid_t)sample.tid != evlist->workload.pid) {
+					pr_debug("%s with unexpected tid, expected %d, got %d\n",
+						 name, evlist->workload.pid, sample.tid);
+					++errs;
+				}
+
+				if ((type == PERF_RECORD_COMM ||
+				     type == PERF_RECORD_MMAP ||
+				     type == PERF_RECORD_FORK ||
+				     type == PERF_RECORD_EXIT) &&
+				     (pid_t)event->comm.pid != evlist->workload.pid) {
+					pr_debug("%s with unexpected pid/tid\n", name);
+					++errs;
+				}
+
+				if ((type == PERF_RECORD_COMM ||
+				     type == PERF_RECORD_MMAP) &&
+				     event->comm.pid != event->comm.tid) {
+					pr_debug("%s with different pid/tid!\n", name);
+					++errs;
+				}
+
+				switch (type) {
+				case PERF_RECORD_COMM:
+					if (strcmp(event->comm.comm, cmd)) {
+						pr_debug("%s with unexpected comm!\n", name);
+						++errs;
+					}
+					break;
+				case PERF_RECORD_EXIT:
+					goto found_exit;
+				case PERF_RECORD_MMAP:
+					bname = strrchr(event->mmap.filename, '/');
+					if (bname != NULL) {
+						if (!found_cmd_mmap)
+							found_cmd_mmap = !strcmp(bname + 1, cmd);
+						if (!found_libc_mmap)
+							found_libc_mmap = !strncmp(bname + 1, "libc", 4);
+						if (!found_ld_mmap)
+							found_ld_mmap = !strncmp(bname + 1, "ld", 2);
+					} else if (!found_vdso_mmap)
+						found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]");
+					break;
+
+				case PERF_RECORD_SAMPLE:
+					/* Just ignore samples for now */
+					break;
+				default:
+					pr_debug("Unexpected perf_event->header.type %d!\n",
+						 type);
+					++errs;
+				}
+			}
+		}
+
+		/*
+		 * We don't use poll here because at least at 3.1 times the
+		 * PERF_RECORD_{!SAMPLE} events don't honour
+		 * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
+		 */
+		if (total_events == before && false)
+			poll(evlist->pollfd, evlist->nr_fds, -1);
+
+		sleep(1);
+		if (++wakeups > 5) {
+			pr_debug("No PERF_RECORD_EXIT event!\n");
+			break;
+		}
+	}
+
+found_exit:
+	if (nr_events[PERF_RECORD_COMM] > 1) {
+		pr_debug("Excessive number of PERF_RECORD_COMM events!\n");
+		++errs;
+	}
+
+	if (nr_events[PERF_RECORD_COMM] == 0) {
+		pr_debug("Missing PERF_RECORD_COMM for %s!\n", cmd);
+		++errs;
+	}
+
+	if (!found_cmd_mmap) {
+		pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd);
+		++errs;
+	}
+
+	if (!found_libc_mmap) {
+		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "libc");
+		++errs;
+	}
+
+	if (!found_ld_mmap) {
+		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "ld");
+		++errs;
+	}
+
+	if (!found_vdso_mmap) {
+		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "[vdso]");
+		++errs;
+	}
+out_err:
+	perf_evlist__munmap(evlist);
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+out:
+	return (err < 0 || errs > 0) ? -1 : 0;
+}
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
new file mode 100644
index 0000000..a5f3798
--- /dev/null
+++ b/tools/perf/tests/pmu.c
@@ -0,0 +1,178 @@
+#include "parse-events.h"
+#include "pmu.h"
+#include "util.h"
+#include "tests.h"
+
+/* Simulated format definitions. */
+static struct test_format {
+	const char *name;
+	const char *value;
+} test_formats[] = {
+	{ "krava01", "config:0-1,62-63\n", },
+	{ "krava02", "config:10-17\n", },
+	{ "krava03", "config:5\n", },
+	{ "krava11", "config1:0,2,4,6,8,20-28\n", },
+	{ "krava12", "config1:63\n", },
+	{ "krava13", "config1:45-47\n", },
+	{ "krava21", "config2:0-3,10-13,20-23,30-33,40-43,50-53,60-63\n", },
+	{ "krava22", "config2:8,18,48,58\n", },
+	{ "krava23", "config2:28-29,38\n", },
+};
+
+#define TEST_FORMATS_CNT (sizeof(test_formats) / sizeof(struct test_format))
+
+/* Simulated users input. */
+static struct parse_events__term test_terms[] = {
+	{
+		.config    = (char *) "krava01",
+		.val.num   = 15,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava02",
+		.val.num   = 170,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava03",
+		.val.num   = 1,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava11",
+		.val.num   = 27,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava12",
+		.val.num   = 1,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava13",
+		.val.num   = 2,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava21",
+		.val.num   = 119,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava22",
+		.val.num   = 11,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava23",
+		.val.num   = 2,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+};
+#define TERMS_CNT (sizeof(test_terms) / sizeof(struct parse_events__term))
+
+/*
+ * Prepare format directory data, exported by kernel
+ * at /sys/bus/event_source/devices/<dev>/format.
+ */
+static char *test_format_dir_get(void)
+{
+	static char dir[PATH_MAX];
+	unsigned int i;
+
+	snprintf(dir, PATH_MAX, "/tmp/perf-pmu-test-format-XXXXXX");
+	if (!mkdtemp(dir))
+		return NULL;
+
+	for (i = 0; i < TEST_FORMATS_CNT; i++) {
+		static char name[PATH_MAX];
+		struct test_format *format = &test_formats[i];
+		FILE *file;
+
+		snprintf(name, PATH_MAX, "%s/%s", dir, format->name);
+
+		file = fopen(name, "w");
+		if (!file)
+			return NULL;
+
+		if (1 != fwrite(format->value, strlen(format->value), 1, file))
+			break;
+
+		fclose(file);
+	}
+
+	return dir;
+}
+
+/* Cleanup format directory. */
+static int test_format_dir_put(char *dir)
+{
+	char buf[PATH_MAX];
+	snprintf(buf, PATH_MAX, "rm -f %s/*\n", dir);
+	if (system(buf))
+		return -1;
+
+	snprintf(buf, PATH_MAX, "rmdir %s\n", dir);
+	return system(buf);
+}
+
+static struct list_head *test_terms_list(void)
+{
+	static LIST_HEAD(terms);
+	unsigned int i;
+
+	for (i = 0; i < TERMS_CNT; i++)
+		list_add_tail(&test_terms[i].list, &terms);
+
+	return &terms;
+}
+
+#undef TERMS_CNT
+
+int test__pmu(void)
+{
+	char *format = test_format_dir_get();
+	LIST_HEAD(formats);
+	struct list_head *terms = test_terms_list();
+	int ret;
+
+	if (!format)
+		return -EINVAL;
+
+	do {
+		struct perf_event_attr attr;
+
+		memset(&attr, 0, sizeof(attr));
+
+		ret = perf_pmu__format_parse(format, &formats);
+		if (ret)
+			break;
+
+		ret = perf_pmu__config_terms(&formats, &attr, terms);
+		if (ret)
+			break;
+
+		ret = -EINVAL;
+
+		if (attr.config  != 0xc00000000002a823)
+			break;
+		if (attr.config1 != 0x8000400000000145)
+			break;
+		if (attr.config2 != 0x0400000020041d07)
+			break;
+
+		ret = 0;
+	} while (0);
+
+	test_format_dir_put(format);
+	return ret;
+}
diff --git a/tools/perf/tests/rdpmc.c b/tools/perf/tests/rdpmc.c
new file mode 100644
index 0000000..ff94886
--- /dev/null
+++ b/tools/perf/tests/rdpmc.c
@@ -0,0 +1,175 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include "types.h"
+#include "perf.h"
+#include "debug.h"
+#include "tests.h"
+
+#if defined(__x86_64__) || defined(__i386__)
+
+#define barrier() asm volatile("" ::: "memory")
+
+static u64 rdpmc(unsigned int counter)
+{
+	unsigned int low, high;
+
+	asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+	return low | ((u64)high) << 32;
+}
+
+static u64 rdtsc(void)
+{
+	unsigned int low, high;
+
+	asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+	return low | ((u64)high) << 32;
+}
+
+static u64 mmap_read_self(void *addr)
+{
+	struct perf_event_mmap_page *pc = addr;
+	u32 seq, idx, time_mult = 0, time_shift = 0;
+	u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+
+	do {
+		seq = pc->lock;
+		barrier();
+
+		enabled = pc->time_enabled;
+		running = pc->time_running;
+
+		if (enabled != running) {
+			cyc = rdtsc();
+			time_mult = pc->time_mult;
+			time_shift = pc->time_shift;
+			time_offset = pc->time_offset;
+		}
+
+		idx = pc->index;
+		count = pc->offset;
+		if (idx)
+			count += rdpmc(idx - 1);
+
+		barrier();
+	} while (pc->lock != seq);
+
+	if (enabled != running) {
+		u64 quot, rem;
+
+		quot = (cyc >> time_shift);
+		rem = cyc & ((1 << time_shift) - 1);
+		delta = time_offset + quot * time_mult +
+			((rem * time_mult) >> time_shift);
+
+		enabled += delta;
+		if (idx)
+			running += delta;
+
+		quot = count / running;
+		rem = count % running;
+		count = quot * enabled + (rem * enabled) / running;
+	}
+
+	return count;
+}
+
+/*
+ * If the RDPMC instruction faults then signal this back to the test parent task:
+ */
+static void segfault_handler(int sig __maybe_unused,
+			     siginfo_t *info __maybe_unused,
+			     void *uc __maybe_unused)
+{
+	exit(-1);
+}
+
+static int __test__rdpmc(void)
+{
+	volatile int tmp = 0;
+	u64 i, loops = 1000;
+	int n;
+	int fd;
+	void *addr;
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_INSTRUCTIONS,
+		.exclude_kernel = 1,
+	};
+	u64 delta_sum = 0;
+        struct sigaction sa;
+
+	sigfillset(&sa.sa_mask);
+	sa.sa_sigaction = segfault_handler;
+	sigaction(SIGSEGV, &sa, NULL);
+
+	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	if (fd < 0) {
+		pr_err("Error: sys_perf_event_open() syscall returned "
+		       "with %d (%s)\n", fd, strerror(errno));
+		return -1;
+	}
+
+	addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+	if (addr == (void *)(-1)) {
+		pr_err("Error: mmap() syscall returned with (%s)\n",
+		       strerror(errno));
+		goto out_close;
+	}
+
+	for (n = 0; n < 6; n++) {
+		u64 stamp, now, delta;
+
+		stamp = mmap_read_self(addr);
+
+		for (i = 0; i < loops; i++)
+			tmp++;
+
+		now = mmap_read_self(addr);
+		loops *= 10;
+
+		delta = now - stamp;
+		pr_debug("%14d: %14Lu\n", n, (long long)delta);
+
+		delta_sum += delta;
+	}
+
+	munmap(addr, page_size);
+	pr_debug("   ");
+out_close:
+	close(fd);
+
+	if (!delta_sum)
+		return -1;
+
+	return 0;
+}
+
+int test__rdpmc(void)
+{
+	int status = 0;
+	int wret = 0;
+	int ret;
+	int pid;
+
+	pid = fork();
+	if (pid < 0)
+		return -1;
+
+	if (!pid) {
+		ret = __test__rdpmc();
+
+		exit(ret);
+	}
+
+	wret = waitpid(pid, &status, 0);
+	if (wret < 0 || status)
+		return -1;
+
+	return 0;
+}
+
+#endif
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
new file mode 100644
index 0000000..fc121ed
--- /dev/null
+++ b/tools/perf/tests/tests.h
@@ -0,0 +1,22 @@
+#ifndef TESTS_H
+#define TESTS_H
+
+/* Tests */
+int test__vmlinux_matches_kallsyms(void);
+int test__open_syscall_event(void);
+int test__open_syscall_event_on_all_cpus(void);
+int test__basic_mmap(void);
+int test__PERF_RECORD(void);
+int test__rdpmc(void);
+int test__perf_evsel__roundtrip_name_test(void);
+int test__perf_evsel__tp_sched_test(void);
+int test__syscall_open_tp_fields(void);
+int test__pmu(void);
+int test__attr(void);
+int test__dso_data(void);
+int test__parse_events(void);
+
+/* Util */
+int trace_event__id(const char *evname);
+
+#endif /* TESTS_H */
diff --git a/tools/perf/tests/util.c b/tools/perf/tests/util.c
new file mode 100644
index 0000000..748f2e8
--- /dev/null
+++ b/tools/perf/tests/util.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "tests.h"
+#include "debugfs.h"
+
+int trace_event__id(const char *evname)
+{
+	char *filename;
+	int err = -1, fd;
+
+	if (asprintf(&filename,
+		     "%s/syscalls/%s/id",
+		     tracing_events_path, evname) < 0)
+		return -1;
+
+	fd = open(filename, O_RDONLY);
+	if (fd >= 0) {
+		char id[16];
+		if (read(fd, id, sizeof(id)) > 0)
+			err = atoi(id);
+		close(fd);
+	}
+
+	free(filename);
+	return err;
+}
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
new file mode 100644
index 0000000..0d1cdbe
--- /dev/null
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -0,0 +1,230 @@
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+#include <string.h>
+#include "map.h"
+#include "symbol.h"
+#include "util.h"
+#include "tests.h"
+#include "debug.h"
+#include "machine.h"
+
+static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
+					   struct symbol *sym)
+{
+	bool *visited = symbol__priv(sym);
+	*visited = true;
+	return 0;
+}
+
+int test__vmlinux_matches_kallsyms(void)
+{
+	int err = -1;
+	struct rb_node *nd;
+	struct symbol *sym;
+	struct map *kallsyms_map, *vmlinux_map;
+	struct machine kallsyms, vmlinux;
+	enum map_type type = MAP__FUNCTION;
+	struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", };
+
+	/*
+	 * Step 1:
+	 *
+	 * Init the machines that will hold kernel, modules obtained from
+	 * both vmlinux + .ko files and from /proc/kallsyms split by modules.
+	 */
+	machine__init(&kallsyms, "", HOST_KERNEL_ID);
+	machine__init(&vmlinux, "", HOST_KERNEL_ID);
+
+	/*
+	 * Step 2:
+	 *
+	 * Create the kernel maps for kallsyms and the DSO where we will then
+	 * load /proc/kallsyms. Also create the modules maps from /proc/modules
+	 * and find the .ko files that match them in /lib/modules/`uname -r`/.
+	 */
+	if (machine__create_kernel_maps(&kallsyms) < 0) {
+		pr_debug("machine__create_kernel_maps ");
+		return -1;
+	}
+
+	/*
+	 * Step 3:
+	 *
+	 * Load and split /proc/kallsyms into multiple maps, one per module.
+	 */
+	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, NULL) <= 0) {
+		pr_debug("dso__load_kallsyms ");
+		goto out;
+	}
+
+	/*
+	 * Step 4:
+	 *
+	 * kallsyms will be internally on demand sorted by name so that we can
+	 * find the reference relocation * symbol, i.e. the symbol we will use
+	 * to see if the running kernel was relocated by checking if it has the
+	 * same value in the vmlinux file we load.
+	 */
+	kallsyms_map = machine__kernel_map(&kallsyms, type);
+
+	sym = map__find_symbol_by_name(kallsyms_map, ref_reloc_sym.name, NULL);
+	if (sym == NULL) {
+		pr_debug("dso__find_symbol_by_name ");
+		goto out;
+	}
+
+	ref_reloc_sym.addr = sym->start;
+
+	/*
+	 * Step 5:
+	 *
+	 * Now repeat step 2, this time for the vmlinux file we'll auto-locate.
+	 */
+	if (machine__create_kernel_maps(&vmlinux) < 0) {
+		pr_debug("machine__create_kernel_maps ");
+		goto out;
+	}
+
+	vmlinux_map = machine__kernel_map(&vmlinux, type);
+	map__kmap(vmlinux_map)->ref_reloc_sym = &ref_reloc_sym;
+
+	/*
+	 * Step 6:
+	 *
+	 * Locate a vmlinux file in the vmlinux path that has a buildid that
+	 * matches the one of the running kernel.
+	 *
+	 * While doing that look if we find the ref reloc symbol, if we find it
+	 * we'll have its ref_reloc_symbol.unrelocated_addr and then
+	 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
+	 * to fixup the symbols.
+	 */
+	if (machine__load_vmlinux_path(&vmlinux, type,
+				       vmlinux_matches_kallsyms_filter) <= 0) {
+		pr_debug("machine__load_vmlinux_path ");
+		goto out;
+	}
+
+	err = 0;
+	/*
+	 * Step 7:
+	 *
+	 * Now look at the symbols in the vmlinux DSO and check if we find all of them
+	 * in the kallsyms dso. For the ones that are in both, check its names and
+	 * end addresses too.
+	 */
+	for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
+		struct symbol *pair, *first_pair;
+		bool backwards = true;
+
+		sym  = rb_entry(nd, struct symbol, rb_node);
+
+		if (sym->start == sym->end)
+			continue;
+
+		first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+		pair = first_pair;
+
+		if (pair && pair->start == sym->start) {
+next_pair:
+			if (strcmp(sym->name, pair->name) == 0) {
+				/*
+				 * kallsyms don't have the symbol end, so we
+				 * set that by using the next symbol start - 1,
+				 * in some cases we get this up to a page
+				 * wrong, trace_kmalloc when I was developing
+				 * this code was one such example, 2106 bytes
+				 * off the real size. More than that and we
+				 * _really_ have a problem.
+				 */
+				s64 skew = sym->end - pair->end;
+				if (llabs(skew) < page_size)
+					continue;
+
+				pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
+					 sym->start, sym->name, sym->end, pair->end);
+			} else {
+				struct rb_node *nnd;
+detour:
+				nnd = backwards ? rb_prev(&pair->rb_node) :
+						  rb_next(&pair->rb_node);
+				if (nnd) {
+					struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
+
+					if (next->start == sym->start) {
+						pair = next;
+						goto next_pair;
+					}
+				}
+
+				if (backwards) {
+					backwards = false;
+					pair = first_pair;
+					goto detour;
+				}
+
+				pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+					 sym->start, sym->name, pair->name);
+			}
+		} else
+			pr_debug("%#" PRIx64 ": %s not on kallsyms\n", sym->start, sym->name);
+
+		err = -1;
+	}
+
+	if (!verbose)
+		goto out;
+
+	pr_info("Maps only in vmlinux:\n");
+
+	for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
+		struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
+		/*
+		 * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
+		 * the kernel will have the path for the vmlinux file being used,
+		 * so use the short name, less descriptive but the same ("[kernel]" in
+		 * both cases.
+		 */
+		pair = map_groups__find_by_name(&kallsyms.kmaps, type,
+						(pos->dso->kernel ?
+							pos->dso->short_name :
+							pos->dso->name));
+		if (pair)
+			pair->priv = 1;
+		else
+			map__fprintf(pos, stderr);
+	}
+
+	pr_info("Maps in vmlinux with a different name in kallsyms:\n");
+
+	for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
+		struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
+
+		pair = map_groups__find(&kallsyms.kmaps, type, pos->start);
+		if (pair == NULL || pair->priv)
+			continue;
+
+		if (pair->start == pos->start) {
+			pair->priv = 1;
+			pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
+				pos->start, pos->end, pos->pgoff, pos->dso->name);
+			if (pos->pgoff != pair->pgoff || pos->end != pair->end)
+				pr_info(": \n*%" PRIx64 "-%" PRIx64 " %" PRIx64 "",
+					pair->start, pair->end, pair->pgoff);
+			pr_info(" %s\n", pair->dso->name);
+			pair->priv = 1;
+		}
+	}
+
+	pr_info("Maps only in kallsyms:\n");
+
+	for (nd = rb_first(&kallsyms.kmaps.maps[type]);
+	     nd; nd = rb_next(nd)) {
+		struct map *pos = rb_entry(nd, struct map, rb_node);
+
+		if (!pos->priv)
+			map__fprintf(pos, stderr);
+	}
+out:
+	return err;
+}
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 8f8cd2d..5dab3ca 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -188,6 +188,12 @@
 	struct disasm_line *cursor = ab->selection, *target;
 	struct browser_disasm_line *btarget, *bcursor;
 	unsigned int from, to;
+	struct map_symbol *ms = ab->b.priv;
+	struct symbol *sym = ms->sym;
+
+	/* PLT symbols contain external offsets */
+	if (strstr(sym->name, "@plt"))
+		return;
 
 	if (!cursor || !cursor->ins || !ins__is_jump(cursor->ins) ||
 	    !disasm_line__has_offset(cursor))
@@ -386,9 +392,8 @@
 	browser->b.nr_entries = browser->nr_asm_entries;
 }
 
-static bool annotate_browser__callq(struct annotate_browser *browser,
-				    int evidx, void (*timer)(void *arg),
-				    void *arg, int delay_secs)
+static bool annotate_browser__callq(struct annotate_browser *browser, int evidx,
+				    struct hist_browser_timer *hbt)
 {
 	struct map_symbol *ms = browser->b.priv;
 	struct disasm_line *dl = browser->selection;
@@ -418,7 +423,7 @@
 	}
 
 	pthread_mutex_unlock(&notes->lock);
-	symbol__tui_annotate(target, ms->map, evidx, timer, arg, delay_secs);
+	symbol__tui_annotate(target, ms->map, evidx, hbt);
 	ui_browser__show_title(&browser->b, sym->name);
 	return true;
 }
@@ -602,13 +607,13 @@
 }
 
 static int annotate_browser__run(struct annotate_browser *browser, int evidx,
-				 void(*timer)(void *arg),
-				 void *arg, int delay_secs)
+				 struct hist_browser_timer *hbt)
 {
 	struct rb_node *nd = NULL;
 	struct map_symbol *ms = browser->b.priv;
 	struct symbol *sym = ms->sym;
 	const char *help = "Press 'h' for help on key bindings";
+	int delay_secs = hbt ? hbt->refresh : 0;
 	int key;
 
 	if (ui_browser__show(&browser->b, sym->name, help) < 0)
@@ -639,8 +644,8 @@
 
 		switch (key) {
 		case K_TIMER:
-			if (timer != NULL)
-				timer(arg);
+			if (hbt)
+				hbt->timer(hbt->arg);
 
 			if (delay_secs != 0)
 				symbol__annotate_decay_histogram(sym, evidx);
@@ -676,8 +681,14 @@
 		"o             Toggle disassembler output/simplified view\n"
 		"s             Toggle source code view\n"
 		"/             Search string\n"
+		"r             Run available scripts\n"
 		"?             Search previous string\n");
 			continue;
+		case 'r':
+			{
+				script_browse(NULL);
+				continue;
+			}
 		case 'H':
 			nd = browser->curr_hot;
 			break;
@@ -734,7 +745,7 @@
 					goto show_sup_ins;
 				goto out;
 			} else if (!(annotate_browser__jump(browser) ||
-				     annotate_browser__callq(browser, evidx, timer, arg, delay_secs))) {
+				     annotate_browser__callq(browser, evidx, hbt))) {
 show_sup_ins:
 				ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions.");
 			}
@@ -757,16 +768,21 @@
 }
 
 int hist_entry__tui_annotate(struct hist_entry *he, int evidx,
-			     void(*timer)(void *arg), void *arg, int delay_secs)
+			     struct hist_browser_timer *hbt)
 {
-	return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx,
-				    timer, arg, delay_secs);
+	return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, hbt);
 }
 
 static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
 						size_t size)
 {
 	u64 offset;
+	struct map_symbol *ms = browser->b.priv;
+	struct symbol *sym = ms->sym;
+
+	/* PLT symbols contain external offsets */
+	if (strstr(sym->name, "@plt"))
+		return;
 
 	for (offset = 0; offset < size; ++offset) {
 		struct disasm_line *dl = browser->offsets[offset], *dlt;
@@ -810,8 +826,7 @@
 }
 
 int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
-			 void(*timer)(void *arg), void *arg,
-			 int delay_secs)
+			 struct hist_browser_timer *hbt)
 {
 	struct disasm_line *pos, *n;
 	struct annotation *notes;
@@ -893,7 +908,7 @@
 
 	annotate_browser__update_addr_width(&browser);
 
-	ret = annotate_browser__run(&browser, evidx, timer, arg, delay_secs);
+	ret = annotate_browser__run(&browser, evidx, hbt);
 	list_for_each_entry_safe(pos, n, &notes->src->source, node) {
 		list_del(&pos->node);
 		disasm_line__free(pos);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index ef2f93c..ccc4bd1 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -11,6 +11,7 @@
 #include "../../util/pstack.h"
 #include "../../util/sort.h"
 #include "../../util/util.h"
+#include "../../arch/common.h"
 
 #include "../browser.h"
 #include "../helpline.h"
@@ -310,10 +311,11 @@
 }
 
 static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
-			     void(*timer)(void *arg), void *arg, int delay_secs)
+			     struct hist_browser_timer *hbt)
 {
 	int key;
 	char title[160];
+	int delay_secs = hbt ? hbt->refresh : 0;
 
 	browser->b.entries = &browser->hists->entries;
 	browser->b.nr_entries = browser->hists->nr_entries;
@@ -330,7 +332,7 @@
 
 		switch (key) {
 		case K_TIMER:
-			timer(arg);
+			hbt->timer(hbt->arg);
 			ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
 
 			if (browser->hists->stats.nr_lost_warned !=
@@ -1127,11 +1129,17 @@
 	}
 }
 
+/* Check whether the browser is for 'top' or 'report' */
+static inline bool is_report_browser(void *timer)
+{
+	return timer == NULL;
+}
+
 static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				    const char *helpline, const char *ev_name,
 				    bool left_exits,
-				    void(*timer)(void *arg), void *arg,
-				    int delay_secs)
+				    struct hist_browser_timer *hbt,
+				    struct perf_session_env *env)
 {
 	struct hists *hists = &evsel->hists;
 	struct hist_browser *browser = hist_browser__new(hists);
@@ -1141,6 +1149,8 @@
 	int nr_options = 0;
 	int key = -1;
 	char buf[64];
+	char script_opt[64];
+	int delay_secs = hbt ? hbt->refresh : 0;
 
 	if (browser == NULL)
 		return -1;
@@ -1159,10 +1169,11 @@
 		int choice = 0,
 		    annotate = -2, zoom_dso = -2, zoom_thread = -2,
 		    annotate_f = -2, annotate_t = -2, browse_map = -2;
+		int scripts_comm = -2, scripts_symbol = -2, scripts_all = -2;
 
 		nr_options = 0;
 
-		key = hist_browser__run(browser, ev_name, timer, arg, delay_secs);
+		key = hist_browser__run(browser, ev_name, hbt);
 
 		if (browser->he_selection != NULL) {
 			thread = hist_browser__selected_thread(browser);
@@ -1211,6 +1222,10 @@
 				hist_browser__reset(browser);
 			}
 			continue;
+		case 'r':
+			if (is_report_browser(hbt))
+				goto do_scripts;
+			continue;
 		case K_F1:
 		case 'h':
 		case '?':
@@ -1229,6 +1244,7 @@
 					"E             Expand all callchains\n"
 					"d             Zoom into current DSO\n"
 					"t             Zoom into current Thread\n"
+					"r             Run available scripts('perf report' only)\n"
 					"P             Print histograms to perf.hist.N\n"
 					"V             Verbose (DSO names in callchains, etc)\n"
 					"/             Filter symbol by name");
@@ -1317,6 +1333,25 @@
 		    browser->selection->map != NULL &&
 		    asprintf(&options[nr_options], "Browse map details") > 0)
 			browse_map = nr_options++;
+
+		/* perf script support */
+		if (browser->he_selection) {
+			struct symbol *sym;
+
+			if (asprintf(&options[nr_options], "Run scripts for samples of thread [%s]",
+				browser->he_selection->thread->comm) > 0)
+				scripts_comm = nr_options++;
+
+			sym = browser->he_selection->ms.sym;
+			if (sym && sym->namelen &&
+				asprintf(&options[nr_options], "Run scripts for samples of symbol [%s]",
+						sym->name) > 0)
+				scripts_symbol = nr_options++;
+		}
+
+		if (asprintf(&options[nr_options], "Run scripts for all samples") > 0)
+			scripts_all = nr_options++;
+
 add_exit_option:
 		options[nr_options++] = (char *)"Exit";
 retry_popup_menu:
@@ -1334,6 +1369,9 @@
 			struct hist_entry *he;
 			int err;
 do_annotate:
+			if (!objdump_path && perf_session_env__lookup_objdump(env))
+				continue;
+
 			he = hist_browser__selected_entry(browser);
 			if (he == NULL)
 				continue;
@@ -1356,8 +1394,7 @@
 			 * Don't let this be freed, say, by hists__decay_entry.
 			 */
 			he->used = true;
-			err = hist_entry__tui_annotate(he, evsel->idx,
-						       timer, arg, delay_secs);
+			err = hist_entry__tui_annotate(he, evsel->idx, hbt);
 			he->used = false;
 			/*
 			 * offer option to annotate the other branch source or target
@@ -1411,6 +1448,20 @@
 			hists__filter_by_thread(hists);
 			hist_browser__reset(browser);
 		}
+		/* perf scripts support */
+		else if (choice == scripts_all || choice == scripts_comm ||
+				choice == scripts_symbol) {
+do_scripts:
+			memset(script_opt, 0, 64);
+
+			if (choice == scripts_comm)
+				sprintf(script_opt, " -c %s ", browser->he_selection->thread->comm);
+
+			if (choice == scripts_symbol)
+				sprintf(script_opt, " -S %s ", browser->he_selection->ms.sym->name);
+
+			script_browse(script_opt);
+		}
 	}
 out_free_stack:
 	pstack__delete(fstack);
@@ -1424,6 +1475,7 @@
 	struct ui_browser b;
 	struct perf_evsel *selection;
 	bool lost_events, lost_events_warned;
+	struct perf_session_env *env;
 };
 
 static void perf_evsel_menu__write(struct ui_browser *browser,
@@ -1466,11 +1518,12 @@
 
 static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 				int nr_events, const char *help,
-				void(*timer)(void *arg), void *arg, int delay_secs)
+				struct hist_browser_timer *hbt)
 {
 	struct perf_evlist *evlist = menu->b.priv;
 	struct perf_evsel *pos;
 	const char *ev_name, *title = "Available samples";
+	int delay_secs = hbt ? hbt->refresh : 0;
 	int key;
 
 	if (ui_browser__show(&menu->b, title,
@@ -1482,7 +1535,7 @@
 
 		switch (key) {
 		case K_TIMER:
-			timer(arg);
+			hbt->timer(hbt->arg);
 
 			if (!menu->lost_events_warned && menu->lost_events) {
 				ui_browser__warn_lost_events(&menu->b);
@@ -1500,12 +1553,12 @@
 			 * Give the calling tool a chance to populate the non
 			 * default evsel resorted hists tree.
 			 */
-			if (timer)
-				timer(arg);
+			if (hbt)
+				hbt->timer(hbt->arg);
 			ev_name = perf_evsel__name(pos);
 			key = perf_evsel__hists_browse(pos, nr_events, help,
-						       ev_name, true, timer,
-						       arg, delay_secs);
+						       ev_name, true, hbt,
+						       menu->env);
 			ui_browser__show_title(&menu->b, title);
 			switch (key) {
 			case K_TAB:
@@ -1553,8 +1606,8 @@
 
 static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 					   const char *help,
-					   void(*timer)(void *arg), void *arg,
-					   int delay_secs)
+					   struct hist_browser_timer *hbt,
+					   struct perf_session_env *env)
 {
 	struct perf_evsel *pos;
 	struct perf_evsel_menu menu = {
@@ -1566,6 +1619,7 @@
 			.nr_entries = evlist->nr_entries,
 			.priv	    = evlist,
 		},
+		.env = env,
 	};
 
 	ui_helpline__push("Press ESC to exit");
@@ -1578,23 +1632,20 @@
 			menu.b.width = line_len;
 	}
 
-	return perf_evsel_menu__run(&menu, evlist->nr_entries, help, timer,
-				    arg, delay_secs);
+	return perf_evsel_menu__run(&menu, evlist->nr_entries, help, hbt);
 }
 
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
-				  void(*timer)(void *arg), void *arg,
-				  int delay_secs)
+				  struct hist_browser_timer *hbt,
+				  struct perf_session_env *env)
 {
 	if (evlist->nr_entries == 1) {
 		struct perf_evsel *first = list_entry(evlist->entries.next,
 						      struct perf_evsel, node);
 		const char *ev_name = perf_evsel__name(first);
 		return perf_evsel__hists_browse(first, evlist->nr_entries, help,
-						ev_name, false, timer, arg,
-						delay_secs);
+						ev_name, false, hbt, env);
 	}
 
-	return __perf_evlist__tui_browse_hists(evlist, help,
-					       timer, arg, delay_secs);
+	return __perf_evlist__tui_browse_hists(evlist, help, hbt, env);
 }
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
new file mode 100644
index 0000000..cbbd44b
--- /dev/null
+++ b/tools/perf/ui/browsers/scripts.c
@@ -0,0 +1,189 @@
+#include <elf.h>
+#include <newt.h>
+#include <inttypes.h>
+#include <sys/ttydefaults.h>
+#include <string.h>
+#include "../../util/sort.h"
+#include "../../util/util.h"
+#include "../../util/hist.h"
+#include "../../util/debug.h"
+#include "../../util/symbol.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../libslang.h"
+
+/* 2048 lines should be enough for a script output */
+#define MAX_LINES		2048
+
+/* 160 bytes for one output line */
+#define AVERAGE_LINE_LEN	160
+
+struct script_line {
+	struct list_head node;
+	char line[AVERAGE_LINE_LEN];
+};
+
+struct perf_script_browser {
+	struct ui_browser b;
+	struct list_head entries;
+	const char *script_name;
+	int nr_lines;
+};
+
+#define SCRIPT_NAMELEN	128
+#define SCRIPT_MAX_NO	64
+/*
+ * Usually the full path for a script is:
+ *	/home/username/libexec/perf-core/scripts/python/xxx.py
+ *	/home/username/libexec/perf-core/scripts/perl/xxx.pl
+ * So 256 should be long enough to contain the full path.
+ */
+#define SCRIPT_FULLPATH_LEN	256
+
+/*
+ * When success, will copy the full path of the selected script
+ * into  the buffer pointed by script_name, and return 0.
+ * Return -1 on failure.
+ */
+static int list_scripts(char *script_name)
+{
+	char *buf, *names[SCRIPT_MAX_NO], *paths[SCRIPT_MAX_NO];
+	int i, num, choice, ret = -1;
+
+	/* Preset the script name to SCRIPT_NAMELEN */
+	buf = malloc(SCRIPT_MAX_NO * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN));
+	if (!buf)
+		return ret;
+
+	for (i = 0; i < SCRIPT_MAX_NO; i++) {
+		names[i] = buf + i * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN);
+		paths[i] = names[i] + SCRIPT_NAMELEN;
+	}
+
+	num = find_scripts(names, paths);
+	if (num > 0) {
+		choice = ui__popup_menu(num, names);
+		if (choice < num && choice >= 0) {
+			strcpy(script_name, paths[choice]);
+			ret = 0;
+		}
+	}
+
+	free(buf);
+	return ret;
+}
+
+static void script_browser__write(struct ui_browser *browser,
+				   void *entry, int row)
+{
+	struct script_line *sline = list_entry(entry, struct script_line, node);
+	bool current_entry = ui_browser__is_current_entry(browser, row);
+
+	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+						       HE_COLORSET_NORMAL);
+
+	slsmg_write_nstring(sline->line, browser->width);
+}
+
+static int script_browser__run(struct perf_script_browser *self)
+{
+	int key;
+
+	if (ui_browser__show(&self->b, self->script_name,
+			     "Press <- or ESC to exit") < 0)
+		return -1;
+
+	while (1) {
+		key = ui_browser__run(&self->b, 0);
+
+		/* We can add some special key handling here if needed */
+		break;
+	}
+
+	ui_browser__hide(&self->b);
+	return key;
+}
+
+
+int script_browse(const char *script_opt)
+{
+	char cmd[SCRIPT_FULLPATH_LEN*2], script_name[SCRIPT_FULLPATH_LEN];
+	char *line = NULL;
+	size_t len = 0;
+	ssize_t retlen;
+	int ret = -1, nr_entries = 0;
+	FILE *fp;
+	void *buf;
+	struct script_line *sline;
+
+	struct perf_script_browser script = {
+		.b = {
+			.refresh    = ui_browser__list_head_refresh,
+			.seek	    = ui_browser__list_head_seek,
+			.write	    = script_browser__write,
+		},
+		.script_name = script_name,
+	};
+
+	INIT_LIST_HEAD(&script.entries);
+
+	/* Save each line of the output in one struct script_line object. */
+	buf = zalloc((sizeof(*sline)) * MAX_LINES);
+	if (!buf)
+		return -1;
+	sline = buf;
+
+	memset(script_name, 0, SCRIPT_FULLPATH_LEN);
+	if (list_scripts(script_name))
+		goto exit;
+
+	sprintf(cmd, "perf script -s %s ", script_name);
+
+	if (script_opt)
+		strcat(cmd, script_opt);
+
+	if (input_name) {
+		strcat(cmd, " -i ");
+		strcat(cmd, input_name);
+	}
+
+	strcat(cmd, " 2>&1");
+
+	fp = popen(cmd, "r");
+	if (!fp)
+		goto exit;
+
+	while ((retlen = getline(&line, &len, fp)) != -1) {
+		strncpy(sline->line, line, AVERAGE_LINE_LEN);
+
+		/* If one output line is very large, just cut it short */
+		if (retlen >= AVERAGE_LINE_LEN) {
+			sline->line[AVERAGE_LINE_LEN - 1] = '\0';
+			sline->line[AVERAGE_LINE_LEN - 2] = '\n';
+		}
+		list_add_tail(&sline->node, &script.entries);
+
+		if (script.b.width < retlen)
+			script.b.width = retlen;
+
+		if (nr_entries++ >= MAX_LINES - 1)
+			break;
+		sline++;
+	}
+
+	if (script.b.width > AVERAGE_LINE_LEN)
+		script.b.width = AVERAGE_LINE_LEN;
+
+	if (line)
+		free(line);
+	pclose(fp);
+
+	script.nr_lines = nr_entries;
+	script.b.nr_entries = nr_entries;
+	script.b.entries = &script.entries;
+
+	ret = script_browser__run(&script);
+exit:
+	free(buf);
+	return ret;
+}
diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index 4125c62..253b621 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -237,9 +237,7 @@
 
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 				  const char *help,
-				  void (*timer) (void *arg)__maybe_unused,
-				  void *arg __maybe_unused,
-				  int delay_secs __maybe_unused)
+				  struct hist_browser_timer *hbt __maybe_unused)
 {
 	struct perf_evsel *pos;
 	GtkWidget *vbox;
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 687af0b..856320e 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -30,6 +30,7 @@
 int perf_gtk__deactivate_context(struct perf_gtk_context **ctx);
 
 void perf_gtk__init_helpline(void);
+void perf_gtk__init_progress(void);
 void perf_gtk__init_hpp(void);
 
 #ifndef HAVE_GTK_INFO_BAR
diff --git a/tools/perf/ui/gtk/progress.c b/tools/perf/ui/gtk/progress.c
new file mode 100644
index 0000000..482bcf3
--- /dev/null
+++ b/tools/perf/ui/gtk/progress.c
@@ -0,0 +1,59 @@
+#include <inttypes.h>
+
+#include "gtk.h"
+#include "../progress.h"
+#include "util.h"
+
+static GtkWidget *dialog;
+static GtkWidget *progress;
+
+static void gtk_progress_update(u64 curr, u64 total, const char *title)
+{
+	double fraction = total ? 1.0 * curr / total : 0.0;
+	char buf[1024];
+
+	if (dialog == NULL) {
+		GtkWidget *vbox = gtk_vbox_new(TRUE, 5);
+		GtkWidget *label = gtk_label_new(title);
+
+		dialog = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+		progress = gtk_progress_bar_new();
+
+		gtk_box_pack_start(GTK_BOX(vbox), label, TRUE, FALSE, 3);
+		gtk_box_pack_start(GTK_BOX(vbox), progress, TRUE, TRUE, 3);
+
+		gtk_container_add(GTK_CONTAINER(dialog), vbox);
+
+		gtk_window_set_title(GTK_WINDOW(dialog), "perf");
+		gtk_window_resize(GTK_WINDOW(dialog), 300, 80);
+		gtk_window_set_position(GTK_WINDOW(dialog), GTK_WIN_POS_CENTER);
+
+		gtk_widget_show_all(dialog);
+	}
+
+	gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(progress), fraction);
+	snprintf(buf, sizeof(buf), "%"PRIu64" / %"PRIu64, curr, total);
+	gtk_progress_bar_set_text(GTK_PROGRESS_BAR(progress), buf);
+
+	/* we didn't call gtk_main yet, so do it manually */
+	while (gtk_events_pending())
+		gtk_main_iteration();
+}
+
+static void gtk_progress_finish(void)
+{
+	/* this will also destroy all of its children */
+	gtk_widget_destroy(dialog);
+
+	dialog = NULL;
+}
+
+static struct ui_progress gtk_progress_fns = {
+	.update		= gtk_progress_update,
+	.finish		= gtk_progress_finish,
+};
+
+void perf_gtk__init_progress(void)
+{
+	progress_fns = &gtk_progress_fns;
+}
diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c
index 3c4c6ef..6c2dd2e 100644
--- a/tools/perf/ui/gtk/setup.c
+++ b/tools/perf/ui/gtk/setup.c
@@ -8,7 +8,9 @@
 {
 	perf_error__register(&perf_gtk_eops);
 	perf_gtk__init_helpline();
+	perf_gtk__init_progress();
 	perf_gtk__init_hpp();
+
 	return gtk_init_check(NULL, NULL) ? 0 : -1;
 }
 
diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c
index ccb046a..c06942a 100644
--- a/tools/perf/ui/gtk/util.c
+++ b/tools/perf/ui/gtk/util.c
@@ -111,14 +111,3 @@
 	.warning	= perf_gtk__warning_statusbar,
 #endif
 };
-
-/*
- * FIXME: Functions below should be implemented properly.
- *        For now, just add stubs for NO_NEWT=1 build.
- */
-#ifndef NEWT_SUPPORT
-void ui_progress__update(u64 curr __maybe_unused, u64 total __maybe_unused,
-			 const char *title __maybe_unused)
-{
-}
-#endif
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index f5a1e4f..aa84130 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -161,7 +161,7 @@
 
 static double baseline_percent(struct hist_entry *he)
 {
-	struct hist_entry *pair = he->pair;
+	struct hist_entry *pair = hist_entry__next_pair(he);
 	struct hists *pair_hists = pair ? pair->hists : NULL;
 	double percent = 0.0;
 
@@ -179,7 +179,10 @@
 {
 	double percent = baseline_percent(he);
 
-	return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent);
+	if (hist_entry__has_pairs(he))
+		return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent);
+	else
+		return scnprintf(hpp->buf, hpp->size, "        ");
 }
 
 static int hpp__entry_baseline(struct perf_hpp *hpp, struct hist_entry *he)
@@ -187,7 +190,10 @@
 	double percent = baseline_percent(he);
 	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%";
 
-	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+	if (hist_entry__has_pairs(he) || symbol_conf.field_sep)
+		return scnprintf(hpp->buf, hpp->size, fmt, percent);
+	else
+		return scnprintf(hpp->buf, hpp->size, "            ");
 }
 
 static int hpp__header_samples(struct perf_hpp *hpp)
@@ -228,6 +234,26 @@
 	return scnprintf(hpp->buf, hpp->size, fmt, he->stat.period);
 }
 
+static int hpp__header_period_baseline(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "Period Base");
+}
+
+static int hpp__width_period_baseline(struct perf_hpp *hpp __maybe_unused)
+{
+	return 12;
+}
+
+static int hpp__entry_period_baseline(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct hist_entry *pair = hist_entry__next_pair(he);
+	u64 period = pair ? pair->stat.period : 0;
+	const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%12" PRIu64;
+
+	return scnprintf(hpp->buf, hpp->size, fmt, period);
+}
 static int hpp__header_delta(struct perf_hpp *hpp)
 {
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
@@ -242,30 +268,79 @@
 
 static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he)
 {
-	struct hist_entry *pair = he->pair;
-	struct hists *pair_hists = pair ? pair->hists : NULL;
-	struct hists *hists = he->hists;
-	u64 old_total, new_total;
-	double old_percent = 0, new_percent = 0;
-	double diff;
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s";
 	char buf[32] = " ";
+	double diff;
 
-	old_total = pair_hists ? pair_hists->stats.total_period : 0;
-	if (old_total > 0 && pair)
-		old_percent = 100.0 * pair->stat.period / old_total;
+	if (he->diff.computed)
+		diff = he->diff.period_ratio_delta;
+	else
+		diff = perf_diff__compute_delta(he);
 
-	new_total = hists->stats.total_period;
-	if (new_total > 0)
-		new_percent = 100.0 * he->stat.period / new_total;
-
-	diff = new_percent - old_percent;
 	if (fabs(diff) >= 0.01)
 		scnprintf(buf, sizeof(buf), "%+4.2F%%", diff);
 
 	return scnprintf(hpp->buf, hpp->size, fmt, buf);
 }
 
+static int hpp__header_ratio(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "Ratio");
+}
+
+static int hpp__width_ratio(struct perf_hpp *hpp __maybe_unused)
+{
+	return 14;
+}
+
+static int hpp__entry_ratio(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
+	char buf[32] = " ";
+	double ratio;
+
+	if (he->diff.computed)
+		ratio = he->diff.period_ratio;
+	else
+		ratio = perf_diff__compute_ratio(he);
+
+	if (ratio > 0.0)
+		scnprintf(buf, sizeof(buf), "%+14.6F", ratio);
+
+	return scnprintf(hpp->buf, hpp->size, fmt, buf);
+}
+
+static int hpp__header_wdiff(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "Weighted diff");
+}
+
+static int hpp__width_wdiff(struct perf_hpp *hpp __maybe_unused)
+{
+	return 14;
+}
+
+static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
+	char buf[32] = " ";
+	s64 wdiff;
+
+	if (he->diff.computed)
+		wdiff = he->diff.wdiff;
+	else
+		wdiff = perf_diff__compute_wdiff(he);
+
+	if (wdiff != 0)
+		scnprintf(buf, sizeof(buf), "%14ld", wdiff);
+
+	return scnprintf(hpp->buf, hpp->size, fmt, buf);
+}
+
 static int hpp__header_displ(struct perf_hpp *hpp)
 {
 	return scnprintf(hpp->buf, hpp->size, "Displ.");
@@ -279,7 +354,7 @@
 static int hpp__entry_displ(struct perf_hpp *hpp,
 			    struct hist_entry *he)
 {
-	struct hist_entry *pair = he->pair;
+	struct hist_entry *pair = hist_entry__next_pair(he);
 	long displacement = pair ? pair->position - he->position : 0;
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%6.6s";
 	char buf[32] = " ";
@@ -290,6 +365,27 @@
 	return scnprintf(hpp->buf, hpp->size, fmt, buf);
 }
 
+static int hpp__header_formula(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%70s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "Formula");
+}
+
+static int hpp__width_formula(struct perf_hpp *hpp __maybe_unused)
+{
+	return 70;
+}
+
+static int hpp__entry_formula(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%-70s";
+	char buf[96] = " ";
+
+	perf_diff__formula(buf, sizeof(buf), he);
+	return scnprintf(hpp->buf, hpp->size, fmt, buf);
+}
+
 #define HPP__COLOR_PRINT_FNS(_name)		\
 	.header	= hpp__header_ ## _name,		\
 	.width	= hpp__width_ ## _name,		\
@@ -310,8 +406,12 @@
 	{ .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_us) },
 	{ .cond = false, HPP__PRINT_FNS(samples) },
 	{ .cond = false, HPP__PRINT_FNS(period) },
+	{ .cond = false, HPP__PRINT_FNS(period_baseline) },
 	{ .cond = false, HPP__PRINT_FNS(delta) },
-	{ .cond = false, HPP__PRINT_FNS(displ) }
+	{ .cond = false, HPP__PRINT_FNS(ratio) },
+	{ .cond = false, HPP__PRINT_FNS(wdiff) },
+	{ .cond = false, HPP__PRINT_FNS(displ) },
+	{ .cond = false, HPP__PRINT_FNS(formula) }
 };
 
 #undef HPP__COLOR_PRINT_FNS
diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
index 13aa64e..3ec69560 100644
--- a/tools/perf/ui/progress.c
+++ b/tools/perf/ui/progress.c
@@ -1,32 +1,26 @@
 #include "../cache.h"
 #include "progress.h"
-#include "libslang.h"
-#include "ui.h"
-#include "browser.h"
+
+static void nop_progress_update(u64 curr __maybe_unused,
+				u64 total __maybe_unused,
+				const char *title __maybe_unused)
+{
+}
+
+static struct ui_progress default_progress_fns =
+{
+	.update		= nop_progress_update,
+};
+
+struct ui_progress *progress_fns = &default_progress_fns;
 
 void ui_progress__update(u64 curr, u64 total, const char *title)
 {
-	int bar, y;
-	/*
-	 * FIXME: We should have a per UI backend way of showing progress,
-	 * stdio will just show a percentage as NN%, etc.
-	 */
-	if (use_browser <= 0)
-		return;
+	return progress_fns->update(curr, total, title);
+}
 
-	if (total == 0)
-		return;
-
-	ui__refresh_dimensions(true);
-	pthread_mutex_lock(&ui__lock);
-	y = SLtt_Screen_Rows / 2 - 2;
-	SLsmg_set_color(0);
-	SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols);
-	SLsmg_gotorc(y++, 1);
-	SLsmg_write_string((char *)title);
-	SLsmg_set_color(HE_COLORSET_SELECTED);
-	bar = ((SLtt_Screen_Cols - 2) * curr) / total;
-	SLsmg_fill_region(y, 1, 1, bar, ' ');
-	SLsmg_refresh();
-	pthread_mutex_unlock(&ui__lock);
+void ui_progress__finish(void)
+{
+	if (progress_fns->finish)
+		progress_fns->finish();
 }
diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h
index d9c205b..257cc22 100644
--- a/tools/perf/ui/progress.h
+++ b/tools/perf/ui/progress.h
@@ -3,6 +3,16 @@
 
 #include <../types.h>
 
+struct ui_progress {
+	void (*update)(u64, u64, const char *);
+	void (*finish)(void);
+};
+
+extern struct ui_progress *progress_fns;
+
+void ui_progress__init(void);
+
 void ui_progress__update(u64 curr, u64 total, const char *title);
+void ui_progress__finish(void);
 
 #endif
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index fbd4e32..f0ee204 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -342,7 +342,7 @@
 	const char *sep = symbol_conf.field_sep;
 	const char *col_width = symbol_conf.col_width_list_str;
 	int idx, nr_rows = 0;
-	char bf[64];
+	char bf[96];
 	struct perf_hpp dummy_hpp = {
 		.buf	= bf,
 		.size	= sizeof(bf),
diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c
new file mode 100644
index 0000000..6c2184d
--- /dev/null
+++ b/tools/perf/ui/tui/progress.c
@@ -0,0 +1,42 @@
+#include "../cache.h"
+#include "../progress.h"
+#include "../libslang.h"
+#include "../ui.h"
+#include "../browser.h"
+
+static void tui_progress__update(u64 curr, u64 total, const char *title)
+{
+	int bar, y;
+	/*
+	 * FIXME: We should have a per UI backend way of showing progress,
+	 * stdio will just show a percentage as NN%, etc.
+	 */
+	if (use_browser <= 0)
+		return;
+
+	if (total == 0)
+		return;
+
+	ui__refresh_dimensions(true);
+	pthread_mutex_lock(&ui__lock);
+	y = SLtt_Screen_Rows / 2 - 2;
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols);
+	SLsmg_gotorc(y++, 1);
+	SLsmg_write_string((char *)title);
+	SLsmg_set_color(HE_COLORSET_SELECTED);
+	bar = ((SLtt_Screen_Cols - 2) * curr) / total;
+	SLsmg_fill_region(y, 1, 1, bar, ' ');
+	SLsmg_refresh();
+	pthread_mutex_unlock(&ui__lock);
+}
+
+static struct ui_progress tui_progress_fns =
+{
+	.update		= tui_progress__update,
+};
+
+void ui_progress__init(void)
+{
+	progress_fns = &tui_progress_fns;
+}
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index 60debb8..81efa19 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -118,6 +118,7 @@
 	newtSetSuspendCallback(newt_suspend, NULL);
 	ui_helpline__init();
 	ui_browser__init();
+	ui_progress__init();
 
 	signal(SIGSEGV, ui__signal);
 	signal(SIGFPE, ui__signal);
diff --git a/tools/perf/ui/ui.h b/tools/perf/ui/ui.h
index 7b67045..d86359c 100644
--- a/tools/perf/ui/ui.h
+++ b/tools/perf/ui/ui.h
@@ -3,9 +3,37 @@
 
 #include <pthread.h>
 #include <stdbool.h>
+#include <linux/compiler.h>
 
 extern pthread_mutex_t ui__lock;
 
+extern int use_browser;
+
+void setup_browser(bool fallback_to_pager);
+void exit_browser(bool wait_for_ok);
+
+#ifdef NEWT_SUPPORT
+int ui__init(void);
+void ui__exit(bool wait_for_ok);
+#else
+static inline int ui__init(void)
+{
+	return -1;
+}
+static inline void ui__exit(bool wait_for_ok __maybe_unused) {}
+#endif
+
+#ifdef GTK2_SUPPORT
+int perf_gtk__init(void);
+void perf_gtk__exit(bool wait_for_ok);
+#else
+static inline int perf_gtk__init(void)
+{
+	return -1;
+}
+static inline void perf_gtk__exit(bool wait_for_ok __maybe_unused) {}
+#endif
+
 void ui__refresh_dimensions(bool force);
 
 #endif /* _PERF_UI_H_ */
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 95264f3..6aa34e5 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -9,18 +9,14 @@
 LF='
 '
 
+#
 # First check if there is a .git to get the version from git describe
-# otherwise try to get the version from the kernel makefile
+# otherwise try to get the version from the kernel Makefile
+#
 if test -d ../../.git -o -f ../../.git &&
-	VN=$(git describe --match 'v[0-9].[0-9]*' --abbrev=4 HEAD 2>/dev/null) &&
-	case "$VN" in
-	*$LF*) (exit 1) ;;
-	v[0-9]*)
-		git update-index -q --refresh
-		test -z "$(git diff-index --name-only HEAD --)" ||
-		VN="$VN-dirty" ;;
-	esac
+	VN=$(git tag 2>/dev/null | tail -1 | grep -E "v[0-9].[0-9]*")
 then
+	VN=$(echo $VN"-g"$(git log -1 --abbrev=4 --pretty=format:"%h" HEAD))
 	VN=$(echo "$VN" | sed -e 's/-/./g');
 else
 	VN=$(MAKEFLAGS= make -sC ../.. kernelversion)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f0a9103..07aaeea 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -15,6 +15,7 @@
 #include "debug.h"
 #include "annotate.h"
 #include <pthread.h>
+#include <linux/bitops.h>
 
 const char 	*disassembler_style;
 const char	*objdump_path;
@@ -170,15 +171,15 @@
 	if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0)
 		goto out_free_ops;
 
-        ops->locked.ins = ins__find(name);
-        if (ops->locked.ins == NULL)
-                goto out_free_ops;
+	ops->locked.ins = ins__find(name);
+	if (ops->locked.ins == NULL)
+		goto out_free_ops;
 
-        if (!ops->locked.ins->ops)
-                return 0;
+	if (!ops->locked.ins->ops)
+		return 0;
 
-        if (ops->locked.ins->ops->parse)
-                ops->locked.ins->ops->parse(ops->locked.ops);
+	if (ops->locked.ins->ops->parse)
+		ops->locked.ins->ops->parse(ops->locked.ops);
 
 	return 0;
 
@@ -400,6 +401,8 @@
 	{ .name = "testb", .ops  = &mov_ops, },
 	{ .name = "testl", .ops  = &mov_ops, },
 	{ .name = "xadd",  .ops  = &mov_ops, },
+	{ .name = "xbeginl", .ops  = &jump_ops, },
+	{ .name = "xbeginq", .ops  = &jump_ops, },
 };
 
 static int ins__cmp(const void *name, const void *insp)
@@ -855,12 +858,41 @@
 	struct source_line *iter;
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
+	int ret;
 
 	while (*p != NULL) {
 		parent = *p;
 		iter = rb_entry(parent, struct source_line, node);
 
-		if (src_line->percent > iter->percent)
+		ret = strcmp(iter->path, src_line->path);
+		if (ret == 0) {
+			iter->percent_sum += src_line->percent;
+			return;
+		}
+
+		if (ret < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	src_line->percent_sum = src_line->percent;
+
+	rb_link_node(&src_line->node, parent, p);
+	rb_insert_color(&src_line->node, root);
+}
+
+static void __resort_source_line(struct rb_root *root, struct source_line *src_line)
+{
+	struct source_line *iter;
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct source_line, node);
+
+		if (src_line->percent_sum > iter->percent_sum)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
@@ -870,6 +902,24 @@
 	rb_insert_color(&src_line->node, root);
 }
 
+static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root)
+{
+	struct source_line *src_line;
+	struct rb_node *node;
+
+	node = rb_first(src_root);
+	while (node) {
+		struct rb_node *next;
+
+		src_line = rb_entry(node, struct source_line, node);
+		next = rb_next(node);
+		rb_erase(node, src_root);
+
+		__resort_source_line(dest_root, src_line);
+		node = next;
+	}
+}
+
 static void symbol__free_source_line(struct symbol *sym, int len)
 {
 	struct annotation *notes = symbol__annotation(sym);
@@ -894,6 +944,7 @@
 	struct source_line *src_line;
 	struct annotation *notes = symbol__annotation(sym);
 	struct sym_hist *h = annotation__histogram(notes, evidx);
+	struct rb_root tmp_root = RB_ROOT;
 
 	if (!h->sum)
 		return 0;
@@ -928,12 +979,13 @@
 			goto next;
 
 		strcpy(src_line[i].path, path);
-		insert_source_line(root, &src_line[i]);
+		insert_source_line(&tmp_root, &src_line[i]);
 
 	next:
 		pclose(fp);
 	}
 
+	resort_source_line(root, &tmp_root);
 	return 0;
 }
 
@@ -957,7 +1009,7 @@
 		char *path;
 
 		src_line = rb_entry(node, struct source_line, node);
-		percent = src_line->percent;
+		percent = src_line->percent_sum;
 		color = get_percent_color(percent);
 		path = src_line->path;
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 39242dc..8eec943 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -5,6 +5,7 @@
 #include <stdint.h>
 #include "types.h"
 #include "symbol.h"
+#include "hist.h"
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <pthread.h>
@@ -75,6 +76,7 @@
 struct source_line {
 	struct rb_node	node;
 	double		percent;
+	double		percent_sum;
 	char		*path;
 };
 
@@ -140,20 +142,18 @@
 
 #ifdef NEWT_SUPPORT
 int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
-			 void(*timer)(void *arg), void *arg, int delay_secs);
+			 struct hist_browser_timer *hbt);
 #else
 static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
 				       struct map *map __maybe_unused,
 				       int evidx __maybe_unused,
-				       void(*timer)(void *arg) __maybe_unused,
-				       void *arg __maybe_unused,
-				       int delay_secs __maybe_unused)
+				       struct hist_browser_timer *hbt
+				       __maybe_unused)
 {
 	return 0;
 }
 #endif
 
 extern const char	*disassembler_style;
-extern const char	*objdump_path;
 
 #endif	/* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 8e3a740..5295625 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -16,11 +16,11 @@
 #include "session.h"
 #include "tool.h"
 
-static int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
-				  union perf_event *event,
-				  struct perf_sample *sample __maybe_unused,
-				  struct perf_evsel *evsel __maybe_unused,
-				  struct machine *machine)
+int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
+			   union perf_event *event,
+			   struct perf_sample *sample __maybe_unused,
+			   struct perf_evsel *evsel __maybe_unused,
+			   struct machine *machine)
 {
 	struct addr_location al;
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -64,12 +64,27 @@
 struct perf_tool build_id__mark_dso_hit_ops = {
 	.sample	= build_id__mark_dso_hit,
 	.mmap	= perf_event__process_mmap,
-	.fork	= perf_event__process_task,
+	.fork	= perf_event__process_fork,
 	.exit	= perf_event__exit_del_thread,
 	.attr		 = perf_event__process_attr,
 	.build_id	 = perf_event__process_build_id,
 };
 
+int build_id__sprintf(const u8 *build_id, int len, char *bf)
+{
+	char *bid = bf;
+	const u8 *raw = build_id;
+	int i;
+
+	for (i = 0; i < len; ++i) {
+		sprintf(bid, "%02x", *raw);
+		++raw;
+		bid += 2;
+	}
+
+	return raw - build_id;
+}
+
 char *dso__build_id_filename(struct dso *self, char *bf, size_t size)
 {
 	char build_id_hex[BUILD_ID_SIZE * 2 + 1];
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index a993ba8..a811f5c 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -1,10 +1,19 @@
 #ifndef PERF_BUILD_ID_H_
 #define PERF_BUILD_ID_H_ 1
 
-#include "session.h"
+#define BUILD_ID_SIZE 20
+
+#include "tool.h"
+#include "types.h"
 
 extern struct perf_tool build_id__mark_dso_hit_ops;
+struct dso;
 
+int build_id__sprintf(const u8 *build_id, int len, char *bf);
 char *dso__build_id_filename(struct dso *self, char *bf, size_t size);
 
+int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
+			   struct perf_sample *sample, struct perf_evsel *evsel,
+			   struct machine *machine);
+
 #endif
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 2bd5137..26e3672 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -5,6 +5,7 @@
 #include "util.h"
 #include "strbuf.h"
 #include "../perf.h"
+#include "../ui/ui.h"
 
 #define CMD_EXEC_PATH "--exec-path"
 #define CMD_PERF_DIR "--perf-dir="
@@ -31,44 +32,6 @@
 extern int pager_in_use(void);
 extern int pager_use_color;
 
-extern int use_browser;
-
-#if defined(NEWT_SUPPORT) || defined(GTK2_SUPPORT)
-void setup_browser(bool fallback_to_pager);
-void exit_browser(bool wait_for_ok);
-
-#ifdef NEWT_SUPPORT
-int ui__init(void);
-void ui__exit(bool wait_for_ok);
-#else
-static inline int ui__init(void)
-{
-	return -1;
-}
-static inline void ui__exit(bool wait_for_ok __maybe_unused) {}
-#endif
-
-#ifdef GTK2_SUPPORT
-int perf_gtk__init(void);
-void perf_gtk__exit(bool wait_for_ok);
-#else
-static inline int perf_gtk__init(void)
-{
-	return -1;
-}
-static inline void perf_gtk__exit(bool wait_for_ok __maybe_unused) {}
-#endif
-
-#else /* NEWT_SUPPORT || GTK2_SUPPORT */
-
-static inline void setup_browser(bool fallback_to_pager)
-{
-	if (fallback_to_pager)
-		setup_pager();
-}
-static inline void exit_browser(bool wait_for_ok __maybe_unused) {}
-#endif /* NEWT_SUPPORT || GTK2_SUPPORT */
-
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
 
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index dec9875..83e8d23 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -26,6 +26,7 @@
 static inline void ui_progress__update(u64 curr __maybe_unused,
 				       u64 total __maybe_unused,
 				       const char *title __maybe_unused) {}
+static inline void ui_progress__finish(void) {}
 
 #define ui__error(format, arg...) ui__warning(format, ##arg)
 
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
new file mode 100644
index 0000000..d6d9a46
--- /dev/null
+++ b/tools/perf/util/dso.c
@@ -0,0 +1,595 @@
+#include "symbol.h"
+#include "dso.h"
+#include "machine.h"
+#include "util.h"
+#include "debug.h"
+
+char dso__symtab_origin(const struct dso *dso)
+{
+	static const char origin[] = {
+		[DSO_BINARY_TYPE__KALLSYMS]		= 'k',
+		[DSO_BINARY_TYPE__VMLINUX]		= 'v',
+		[DSO_BINARY_TYPE__JAVA_JIT]		= 'j',
+		[DSO_BINARY_TYPE__DEBUGLINK]		= 'l',
+		[DSO_BINARY_TYPE__BUILD_ID_CACHE]	= 'B',
+		[DSO_BINARY_TYPE__FEDORA_DEBUGINFO]	= 'f',
+		[DSO_BINARY_TYPE__UBUNTU_DEBUGINFO]	= 'u',
+		[DSO_BINARY_TYPE__BUILDID_DEBUGINFO]	= 'b',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_DSO]	= 'd',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE]	= 'K',
+		[DSO_BINARY_TYPE__GUEST_KALLSYMS]	= 'g',
+		[DSO_BINARY_TYPE__GUEST_KMODULE]	= 'G',
+		[DSO_BINARY_TYPE__GUEST_VMLINUX]	= 'V',
+	};
+
+	if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND)
+		return '!';
+	return origin[dso->symtab_type];
+}
+
+int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
+			  char *root_dir, char *file, size_t size)
+{
+	char build_id_hex[BUILD_ID_SIZE * 2 + 1];
+	int ret = 0;
+
+	switch (type) {
+	case DSO_BINARY_TYPE__DEBUGLINK: {
+		char *debuglink;
+
+		strncpy(file, dso->long_name, size);
+		debuglink = file + dso->long_name_len;
+		while (debuglink != file && *debuglink != '/')
+			debuglink--;
+		if (*debuglink == '/')
+			debuglink++;
+		filename__read_debuglink(dso->long_name, debuglink,
+					 size - (debuglink - file));
+		}
+		break;
+	case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+		/* skip the locally configured cache if a symfs is given */
+		if (symbol_conf.symfs[0] ||
+		    (dso__build_id_filename(dso, file, size) == NULL))
+			ret = -1;
+		break;
+
+	case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+		snprintf(file, size, "%s/usr/lib/debug%s.debug",
+			 symbol_conf.symfs, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+		snprintf(file, size, "%s/usr/lib/debug%s",
+			 symbol_conf.symfs, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+		if (!dso->has_build_id) {
+			ret = -1;
+			break;
+		}
+
+		build_id__sprintf(dso->build_id,
+				  sizeof(dso->build_id),
+				  build_id_hex);
+		snprintf(file, size,
+			 "%s/usr/lib/debug/.build-id/%.2s/%s.debug",
+			 symbol_conf.symfs, build_id_hex, build_id_hex + 2);
+		break;
+
+	case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+		snprintf(file, size, "%s%s",
+			 symbol_conf.symfs, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__GUEST_KMODULE:
+		snprintf(file, size, "%s%s%s", symbol_conf.symfs,
+			 root_dir, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+		snprintf(file, size, "%s%s", symbol_conf.symfs,
+			 dso->long_name);
+		break;
+
+	default:
+	case DSO_BINARY_TYPE__KALLSYMS:
+	case DSO_BINARY_TYPE__VMLINUX:
+	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+	case DSO_BINARY_TYPE__GUEST_VMLINUX:
+	case DSO_BINARY_TYPE__JAVA_JIT:
+	case DSO_BINARY_TYPE__NOT_FOUND:
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+static int open_dso(struct dso *dso, struct machine *machine)
+{
+	char *root_dir = (char *) "";
+	char *name;
+	int fd;
+
+	name = malloc(PATH_MAX);
+	if (!name)
+		return -ENOMEM;
+
+	if (machine)
+		root_dir = machine->root_dir;
+
+	if (dso__binary_type_file(dso, dso->data_type,
+				  root_dir, name, PATH_MAX)) {
+		free(name);
+		return -EINVAL;
+	}
+
+	fd = open(name, O_RDONLY);
+	free(name);
+	return fd;
+}
+
+int dso__data_fd(struct dso *dso, struct machine *machine)
+{
+	static enum dso_binary_type binary_type_data[] = {
+		DSO_BINARY_TYPE__BUILD_ID_CACHE,
+		DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+		DSO_BINARY_TYPE__NOT_FOUND,
+	};
+	int i = 0;
+
+	if (dso->data_type != DSO_BINARY_TYPE__NOT_FOUND)
+		return open_dso(dso, machine);
+
+	do {
+		int fd;
+
+		dso->data_type = binary_type_data[i++];
+
+		fd = open_dso(dso, machine);
+		if (fd >= 0)
+			return fd;
+
+	} while (dso->data_type != DSO_BINARY_TYPE__NOT_FOUND);
+
+	return -EINVAL;
+}
+
+static void
+dso_cache__free(struct rb_root *root)
+{
+	struct rb_node *next = rb_first(root);
+
+	while (next) {
+		struct dso_cache *cache;
+
+		cache = rb_entry(next, struct dso_cache, rb_node);
+		next = rb_next(&cache->rb_node);
+		rb_erase(&cache->rb_node, root);
+		free(cache);
+	}
+}
+
+static struct dso_cache*
+dso_cache__find(struct rb_root *root, u64 offset)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct dso_cache *cache;
+
+	while (*p != NULL) {
+		u64 end;
+
+		parent = *p;
+		cache = rb_entry(parent, struct dso_cache, rb_node);
+		end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+		if (offset < cache->offset)
+			p = &(*p)->rb_left;
+		else if (offset >= end)
+			p = &(*p)->rb_right;
+		else
+			return cache;
+	}
+	return NULL;
+}
+
+static void
+dso_cache__insert(struct rb_root *root, struct dso_cache *new)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct dso_cache *cache;
+	u64 offset = new->offset;
+
+	while (*p != NULL) {
+		u64 end;
+
+		parent = *p;
+		cache = rb_entry(parent, struct dso_cache, rb_node);
+		end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+		if (offset < cache->offset)
+			p = &(*p)->rb_left;
+		else if (offset >= end)
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+}
+
+static ssize_t
+dso_cache__memcpy(struct dso_cache *cache, u64 offset,
+		  u8 *data, u64 size)
+{
+	u64 cache_offset = offset - cache->offset;
+	u64 cache_size   = min(cache->size - cache_offset, size);
+
+	memcpy(data, cache->data + cache_offset, cache_size);
+	return cache_size;
+}
+
+static ssize_t
+dso_cache__read(struct dso *dso, struct machine *machine,
+		 u64 offset, u8 *data, ssize_t size)
+{
+	struct dso_cache *cache;
+	ssize_t ret;
+	int fd;
+
+	fd = dso__data_fd(dso, machine);
+	if (fd < 0)
+		return -1;
+
+	do {
+		u64 cache_offset;
+
+		ret = -ENOMEM;
+
+		cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
+		if (!cache)
+			break;
+
+		cache_offset = offset & DSO__DATA_CACHE_MASK;
+		ret = -EINVAL;
+
+		if (-1 == lseek(fd, cache_offset, SEEK_SET))
+			break;
+
+		ret = read(fd, cache->data, DSO__DATA_CACHE_SIZE);
+		if (ret <= 0)
+			break;
+
+		cache->offset = cache_offset;
+		cache->size   = ret;
+		dso_cache__insert(&dso->cache, cache);
+
+		ret = dso_cache__memcpy(cache, offset, data, size);
+
+	} while (0);
+
+	if (ret <= 0)
+		free(cache);
+
+	close(fd);
+	return ret;
+}
+
+static ssize_t dso_cache_read(struct dso *dso, struct machine *machine,
+			      u64 offset, u8 *data, ssize_t size)
+{
+	struct dso_cache *cache;
+
+	cache = dso_cache__find(&dso->cache, offset);
+	if (cache)
+		return dso_cache__memcpy(cache, offset, data, size);
+	else
+		return dso_cache__read(dso, machine, offset, data, size);
+}
+
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+			      u64 offset, u8 *data, ssize_t size)
+{
+	ssize_t r = 0;
+	u8 *p = data;
+
+	do {
+		ssize_t ret;
+
+		ret = dso_cache_read(dso, machine, offset, p, size);
+		if (ret < 0)
+			return ret;
+
+		/* Reached EOF, return what we have. */
+		if (!ret)
+			break;
+
+		BUG_ON(ret > size);
+
+		r      += ret;
+		p      += ret;
+		offset += ret;
+		size   -= ret;
+
+	} while (size);
+
+	return r;
+}
+
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+			    struct machine *machine, u64 addr,
+			    u8 *data, ssize_t size)
+{
+	u64 offset = map->map_ip(map, addr);
+	return dso__data_read_offset(dso, machine, offset, data, size);
+}
+
+struct map *dso__new_map(const char *name)
+{
+	struct map *map = NULL;
+	struct dso *dso = dso__new(name);
+
+	if (dso)
+		map = map__new2(0, dso, MAP__FUNCTION);
+
+	return map;
+}
+
+struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
+		    const char *short_name, int dso_type)
+{
+	/*
+	 * The kernel dso could be created by build_id processing.
+	 */
+	struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name);
+
+	/*
+	 * We need to run this in all cases, since during the build_id
+	 * processing we had no idea this was the kernel dso.
+	 */
+	if (dso != NULL) {
+		dso__set_short_name(dso, short_name);
+		dso->kernel = dso_type;
+	}
+
+	return dso;
+}
+
+void dso__set_long_name(struct dso *dso, char *name)
+{
+	if (name == NULL)
+		return;
+	dso->long_name = name;
+	dso->long_name_len = strlen(name);
+}
+
+void dso__set_short_name(struct dso *dso, const char *name)
+{
+	if (name == NULL)
+		return;
+	dso->short_name = name;
+	dso->short_name_len = strlen(name);
+}
+
+static void dso__set_basename(struct dso *dso)
+{
+	dso__set_short_name(dso, basename(dso->long_name));
+}
+
+int dso__name_len(const struct dso *dso)
+{
+	if (!dso)
+		return strlen("[unknown]");
+	if (verbose)
+		return dso->long_name_len;
+
+	return dso->short_name_len;
+}
+
+bool dso__loaded(const struct dso *dso, enum map_type type)
+{
+	return dso->loaded & (1 << type);
+}
+
+bool dso__sorted_by_name(const struct dso *dso, enum map_type type)
+{
+	return dso->sorted_by_name & (1 << type);
+}
+
+void dso__set_sorted_by_name(struct dso *dso, enum map_type type)
+{
+	dso->sorted_by_name |= (1 << type);
+}
+
+struct dso *dso__new(const char *name)
+{
+	struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
+
+	if (dso != NULL) {
+		int i;
+		strcpy(dso->name, name);
+		dso__set_long_name(dso, dso->name);
+		dso__set_short_name(dso, dso->name);
+		for (i = 0; i < MAP__NR_TYPES; ++i)
+			dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
+		dso->cache = RB_ROOT;
+		dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
+		dso->data_type   = DSO_BINARY_TYPE__NOT_FOUND;
+		dso->loaded = 0;
+		dso->sorted_by_name = 0;
+		dso->has_build_id = 0;
+		dso->kernel = DSO_TYPE_USER;
+		dso->needs_swap = DSO_SWAP__UNSET;
+		INIT_LIST_HEAD(&dso->node);
+	}
+
+	return dso;
+}
+
+void dso__delete(struct dso *dso)
+{
+	int i;
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		symbols__delete(&dso->symbols[i]);
+	if (dso->sname_alloc)
+		free((char *)dso->short_name);
+	if (dso->lname_alloc)
+		free(dso->long_name);
+	dso_cache__free(&dso->cache);
+	free(dso);
+}
+
+void dso__set_build_id(struct dso *dso, void *build_id)
+{
+	memcpy(dso->build_id, build_id, sizeof(dso->build_id));
+	dso->has_build_id = 1;
+}
+
+bool dso__build_id_equal(const struct dso *dso, u8 *build_id)
+{
+	return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0;
+}
+
+void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
+{
+	char path[PATH_MAX];
+
+	if (machine__is_default_guest(machine))
+		return;
+	sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
+	if (sysfs__read_build_id(path, dso->build_id,
+				 sizeof(dso->build_id)) == 0)
+		dso->has_build_id = true;
+}
+
+int dso__kernel_module_get_build_id(struct dso *dso,
+				    const char *root_dir)
+{
+	char filename[PATH_MAX];
+	/*
+	 * kernel module short names are of the form "[module]" and
+	 * we need just "module" here.
+	 */
+	const char *name = dso->short_name + 1;
+
+	snprintf(filename, sizeof(filename),
+		 "%s/sys/module/%.*s/notes/.note.gnu.build-id",
+		 root_dir, (int)strlen(name) - 1, name);
+
+	if (sysfs__read_build_id(filename, dso->build_id,
+				 sizeof(dso->build_id)) == 0)
+		dso->has_build_id = true;
+
+	return 0;
+}
+
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
+{
+	bool have_build_id = false;
+	struct dso *pos;
+
+	list_for_each_entry(pos, head, node) {
+		if (with_hits && !pos->hit)
+			continue;
+		if (pos->has_build_id) {
+			have_build_id = true;
+			continue;
+		}
+		if (filename__read_build_id(pos->long_name, pos->build_id,
+					    sizeof(pos->build_id)) > 0) {
+			have_build_id	  = true;
+			pos->has_build_id = true;
+		}
+	}
+
+	return have_build_id;
+}
+
+void dsos__add(struct list_head *head, struct dso *dso)
+{
+	list_add_tail(&dso->node, head);
+}
+
+struct dso *dsos__find(struct list_head *head, const char *name)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, head, node)
+		if (strcmp(pos->long_name, name) == 0)
+			return pos;
+	return NULL;
+}
+
+struct dso *__dsos__findnew(struct list_head *head, const char *name)
+{
+	struct dso *dso = dsos__find(head, name);
+
+	if (!dso) {
+		dso = dso__new(name);
+		if (dso != NULL) {
+			dsos__add(head, dso);
+			dso__set_basename(dso);
+		}
+	}
+
+	return dso;
+}
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+			       bool with_hits)
+{
+	struct dso *pos;
+	size_t ret = 0;
+
+	list_for_each_entry(pos, head, node) {
+		if (with_hits && !pos->hit)
+			continue;
+		ret += dso__fprintf_buildid(pos, fp);
+		ret += fprintf(fp, " %s\n", pos->long_name);
+	}
+	return ret;
+}
+
+size_t __dsos__fprintf(struct list_head *head, FILE *fp)
+{
+	struct dso *pos;
+	size_t ret = 0;
+
+	list_for_each_entry(pos, head, node) {
+		int i;
+		for (i = 0; i < MAP__NR_TYPES; ++i)
+			ret += dso__fprintf(pos, i, fp);
+	}
+
+	return ret;
+}
+
+size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
+{
+	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+	return fprintf(fp, "%s", sbuild_id);
+}
+
+size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
+{
+	struct rb_node *nd;
+	size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
+
+	if (dso->short_name != dso->long_name)
+		ret += fprintf(fp, "%s, ", dso->long_name);
+	ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
+		       dso->loaded ? "" : "NOT ");
+	ret += dso__fprintf_buildid(dso, fp);
+	ret += fprintf(fp, ")\n");
+	for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+		ret += symbol__fprintf(pos, fp);
+	}
+
+	return ret;
+}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
new file mode 100644
index 0000000..e032769
--- /dev/null
+++ b/tools/perf/util/dso.h
@@ -0,0 +1,148 @@
+#ifndef __PERF_DSO
+#define __PERF_DSO
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include "types.h"
+#include "map.h"
+
+enum dso_binary_type {
+	DSO_BINARY_TYPE__KALLSYMS = 0,
+	DSO_BINARY_TYPE__GUEST_KALLSYMS,
+	DSO_BINARY_TYPE__VMLINUX,
+	DSO_BINARY_TYPE__GUEST_VMLINUX,
+	DSO_BINARY_TYPE__JAVA_JIT,
+	DSO_BINARY_TYPE__DEBUGLINK,
+	DSO_BINARY_TYPE__BUILD_ID_CACHE,
+	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+	DSO_BINARY_TYPE__GUEST_KMODULE,
+	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+	DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+enum dso_kernel_type {
+	DSO_TYPE_USER = 0,
+	DSO_TYPE_KERNEL,
+	DSO_TYPE_GUEST_KERNEL
+};
+
+enum dso_swap_type {
+	DSO_SWAP__UNSET,
+	DSO_SWAP__NO,
+	DSO_SWAP__YES,
+};
+
+#define DSO__SWAP(dso, type, val)			\
+({							\
+	type ____r = val;				\
+	BUG_ON(dso->needs_swap == DSO_SWAP__UNSET);	\
+	if (dso->needs_swap == DSO_SWAP__YES) {		\
+		switch (sizeof(____r)) {		\
+		case 2:					\
+			____r = bswap_16(val);		\
+			break;				\
+		case 4:					\
+			____r = bswap_32(val);		\
+			break;				\
+		case 8:					\
+			____r = bswap_64(val);		\
+			break;				\
+		default:				\
+			BUG_ON(1);			\
+		}					\
+	}						\
+	____r;						\
+})
+
+#define DSO__DATA_CACHE_SIZE 4096
+#define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1)
+
+struct dso_cache {
+	struct rb_node	rb_node;
+	u64 offset;
+	u64 size;
+	char data[0];
+};
+
+struct dso {
+	struct list_head node;
+	struct rb_root	 symbols[MAP__NR_TYPES];
+	struct rb_root	 symbol_names[MAP__NR_TYPES];
+	struct rb_root	 cache;
+	enum dso_kernel_type	kernel;
+	enum dso_swap_type	needs_swap;
+	enum dso_binary_type	symtab_type;
+	enum dso_binary_type	data_type;
+	u8		 adjust_symbols:1;
+	u8		 has_build_id:1;
+	u8		 hit:1;
+	u8		 annotate_warned:1;
+	u8		 sname_alloc:1;
+	u8		 lname_alloc:1;
+	u8		 sorted_by_name;
+	u8		 loaded;
+	u8		 build_id[BUILD_ID_SIZE];
+	const char	 *short_name;
+	char		 *long_name;
+	u16		 long_name_len;
+	u16		 short_name_len;
+	char		 name[0];
+};
+
+static inline void dso__set_loaded(struct dso *dso, enum map_type type)
+{
+	dso->loaded |= (1 << type);
+}
+
+struct dso *dso__new(const char *name);
+void dso__delete(struct dso *dso);
+
+void dso__set_short_name(struct dso *dso, const char *name);
+void dso__set_long_name(struct dso *dso, char *name);
+
+int dso__name_len(const struct dso *dso);
+
+bool dso__loaded(const struct dso *dso, enum map_type type);
+
+bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
+void dso__set_sorted_by_name(struct dso *dso, enum map_type type);
+void dso__sort_by_name(struct dso *dso, enum map_type type);
+
+void dso__set_build_id(struct dso *dso, void *build_id);
+bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
+void dso__read_running_kernel_build_id(struct dso *dso,
+				       struct machine *machine);
+int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir);
+
+char dso__symtab_origin(const struct dso *dso);
+int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
+			  char *root_dir, char *file, size_t size);
+
+int dso__data_fd(struct dso *dso, struct machine *machine);
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+			      u64 offset, u8 *data, ssize_t size);
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+			    struct machine *machine, u64 addr,
+			    u8 *data, ssize_t size);
+
+struct map *dso__new_map(const char *name);
+struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
+				const char *short_name, int dso_type);
+
+void dsos__add(struct list_head *head, struct dso *dso);
+struct dso *dsos__find(struct list_head *head, const char *name);
+struct dso *__dsos__findnew(struct list_head *head, const char *name);
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+			       bool with_hits);
+size_t __dsos__fprintf(struct list_head *head, FILE *fp);
+
+size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+				    enum map_type type, FILE *fp);
+size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
+#endif /* __PERF_DSO */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 6715b19..3cf2c3e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,6 +1,7 @@
 #include <linux/types.h>
 #include "event.h"
 #include "debug.h"
+#include "machine.h"
 #include "sort.h"
 #include "string.h"
 #include "strlist.h"
@@ -192,55 +193,43 @@
 	event->header.misc = PERF_RECORD_MISC_USER;
 
 	while (1) {
-		char bf[BUFSIZ], *pbf = bf;
-		int n;
+		char bf[BUFSIZ];
+		char prot[5];
+		char execname[PATH_MAX];
+		char anonstr[] = "//anon";
 		size_t size;
+
 		if (fgets(bf, sizeof(bf), fp) == NULL)
 			break;
 
+		/* ensure null termination since stack will be reused. */
+		strcpy(execname, "");
+
 		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = hex2u64(pbf, &event->mmap.start);
-		if (n < 0)
+		sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n",
+		       &event->mmap.start, &event->mmap.len, prot,
+		       &event->mmap.pgoff, execname);
+
+		if (prot[2] != 'x')
 			continue;
-		pbf += n + 1;
-		n = hex2u64(pbf, &event->mmap.len);
-		if (n < 0)
-			continue;
-		pbf += n + 3;
-		if (*pbf == 'x') { /* vm_exec */
-			char anonstr[] = "//anon\n";
-			char *execname = strchr(bf, '/');
 
-			/* Catch VDSO */
-			if (execname == NULL)
-				execname = strstr(bf, "[vdso]");
+		if (!strcmp(execname, ""))
+			strcpy(execname, anonstr);
 
-			/* Catch anonymous mmaps */
-			if ((execname == NULL) && !strstr(bf, "["))
-				execname = anonstr;
+		size = strlen(execname) + 1;
+		memcpy(event->mmap.filename, execname, size);
+		size = PERF_ALIGN(size, sizeof(u64));
+		event->mmap.len -= event->mmap.start;
+		event->mmap.header.size = (sizeof(event->mmap) -
+					   (sizeof(event->mmap.filename) - size));
+		memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+		event->mmap.header.size += machine->id_hdr_size;
+		event->mmap.pid = tgid;
+		event->mmap.tid = pid;
 
-			if (execname == NULL)
-				continue;
-
-			pbf += 3;
-			n = hex2u64(pbf, &event->mmap.pgoff);
-
-			size = strlen(execname);
-			execname[size - 1] = '\0'; /* Remove \n */
-			memcpy(event->mmap.filename, execname, size);
-			size = PERF_ALIGN(size, sizeof(u64));
-			event->mmap.len -= event->mmap.start;
-			event->mmap.header.size = (sizeof(event->mmap) -
-					        (sizeof(event->mmap.filename) - size));
-			memset(event->mmap.filename + size, 0, machine->id_hdr_size);
-			event->mmap.header.size += machine->id_hdr_size;
-			event->mmap.pid = tgid;
-			event->mmap.tid = pid;
-
-			if (process(tool, event, &synth_sample, machine) != 0) {
-				rc = -1;
-				break;
-			}
+		if (process(tool, event, &synth_sample, machine) != 0) {
+			rc = -1;
+			break;
 		}
 	}
 
@@ -404,16 +393,15 @@
 
 		if (*end) /* only interested in proper numerical dirents */
 			continue;
-
-		if (__event__synthesize_thread(comm_event, mmap_event, pid, 1,
-					   process, tool, machine) != 0) {
-			err = -1;
-			goto out_closedir;
-		}
+		/*
+ 		 * We may race with exiting thread, so don't stop just because
+ 		 * one thread couldn't be synthesized.
+ 		 */
+		__event__synthesize_thread(comm_event, mmap_event, pid, 1,
+					   process, tool, machine);
 	}
 
 	err = 0;
-out_closedir:
 	closedir(proc);
 out_free_mmap:
 	free(mmap_event);
@@ -519,134 +507,15 @@
 			     struct perf_sample *sample __maybe_unused,
 			     struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, event->comm.tid);
-
-	if (dump_trace)
-		perf_event__fprintf_comm(event, stdout);
-
-	if (thread == NULL || thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
-		return -1;
-	}
-
-	return 0;
+	return machine__process_comm_event(machine, event);
 }
 
 int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
 			     struct perf_sample *sample __maybe_unused,
-			     struct machine *machine __maybe_unused)
+			     struct machine *machine)
 {
-	dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
-		    event->lost.id, event->lost.lost);
-	return 0;
-}
-
-static void perf_event__set_kernel_mmap_len(union perf_event *event,
-					    struct map **maps)
-{
-	maps[MAP__FUNCTION]->start = event->mmap.start;
-	maps[MAP__FUNCTION]->end   = event->mmap.start + event->mmap.len;
-	/*
-	 * Be a bit paranoid here, some perf.data file came with
-	 * a zero sized synthesized MMAP event for the kernel.
-	 */
-	if (maps[MAP__FUNCTION]->end == 0)
-		maps[MAP__FUNCTION]->end = ~0ULL;
-}
-
-static int perf_event__process_kernel_mmap(struct perf_tool *tool
-					   __maybe_unused,
-					   union perf_event *event,
-					   struct machine *machine)
-{
-	struct map *map;
-	char kmmap_prefix[PATH_MAX];
-	enum dso_kernel_type kernel_type;
-	bool is_kernel_mmap;
-
-	machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
-	if (machine__is_host(machine))
-		kernel_type = DSO_TYPE_KERNEL;
-	else
-		kernel_type = DSO_TYPE_GUEST_KERNEL;
-
-	is_kernel_mmap = memcmp(event->mmap.filename,
-				kmmap_prefix,
-				strlen(kmmap_prefix) - 1) == 0;
-	if (event->mmap.filename[0] == '/' ||
-	    (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
-
-		char short_module_name[1024];
-		char *name, *dot;
-
-		if (event->mmap.filename[0] == '/') {
-			name = strrchr(event->mmap.filename, '/');
-			if (name == NULL)
-				goto out_problem;
-
-			++name; /* skip / */
-			dot = strrchr(name, '.');
-			if (dot == NULL)
-				goto out_problem;
-			snprintf(short_module_name, sizeof(short_module_name),
-					"[%.*s]", (int)(dot - name), name);
-			strxfrchar(short_module_name, '-', '_');
-		} else
-			strcpy(short_module_name, event->mmap.filename);
-
-		map = machine__new_module(machine, event->mmap.start,
-					  event->mmap.filename);
-		if (map == NULL)
-			goto out_problem;
-
-		name = strdup(short_module_name);
-		if (name == NULL)
-			goto out_problem;
-
-		map->dso->short_name = name;
-		map->dso->sname_alloc = 1;
-		map->end = map->start + event->mmap.len;
-	} else if (is_kernel_mmap) {
-		const char *symbol_name = (event->mmap.filename +
-				strlen(kmmap_prefix));
-		/*
-		 * Should be there already, from the build-id table in
-		 * the header.
-		 */
-		struct dso *kernel = __dsos__findnew(&machine->kernel_dsos,
-						     kmmap_prefix);
-		if (kernel == NULL)
-			goto out_problem;
-
-		kernel->kernel = kernel_type;
-		if (__machine__create_kernel_maps(machine, kernel) < 0)
-			goto out_problem;
-
-		perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps);
-
-		/*
-		 * Avoid using a zero address (kptr_restrict) for the ref reloc
-		 * symbol. Effectively having zero here means that at record
-		 * time /proc/sys/kernel/kptr_restrict was non zero.
-		 */
-		if (event->mmap.pgoff != 0) {
-			maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
-							 symbol_name,
-							 event->mmap.pgoff);
-		}
-
-		if (machine__is_default_guest(machine)) {
-			/*
-			 * preload dso of guest kernel and modules
-			 */
-			dso__load(kernel, machine->vmlinux_maps[MAP__FUNCTION],
-				  NULL);
-		}
-	}
-	return 0;
-out_problem:
-	return -1;
+	return machine__process_lost_event(machine, event);
 }
 
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
@@ -656,43 +525,12 @@
 		       event->mmap.len, event->mmap.pgoff, event->mmap.filename);
 }
 
-int perf_event__process_mmap(struct perf_tool *tool,
+int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
 			     struct perf_sample *sample __maybe_unused,
 			     struct machine *machine)
 {
-	struct thread *thread;
-	struct map *map;
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-	int ret = 0;
-
-	if (dump_trace)
-		perf_event__fprintf_mmap(event, stdout);
-
-	if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
-	    cpumode == PERF_RECORD_MISC_KERNEL) {
-		ret = perf_event__process_kernel_mmap(tool, event, machine);
-		if (ret < 0)
-			goto out_problem;
-		return 0;
-	}
-
-	thread = machine__findnew_thread(machine, event->mmap.pid);
-	if (thread == NULL)
-		goto out_problem;
-	map = map__new(&machine->user_dsos, event->mmap.start,
-			event->mmap.len, event->mmap.pgoff,
-			event->mmap.pid, event->mmap.filename,
-			MAP__FUNCTION);
-	if (map == NULL)
-		goto out_problem;
-
-	thread__insert_map(thread, map);
-	return 0;
-
-out_problem:
-	dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
-	return 0;
+	return machine__process_mmap_event(machine, event);
 }
 
 size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
@@ -702,29 +540,20 @@
 		       event->fork.ppid, event->fork.ptid);
 }
 
-int perf_event__process_task(struct perf_tool *tool __maybe_unused,
+int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
 			     struct perf_sample *sample __maybe_unused,
-			      struct machine *machine)
+			     struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
-	struct thread *parent = machine__findnew_thread(machine, event->fork.ptid);
+	return machine__process_fork_event(machine, event);
+}
 
-	if (dump_trace)
-		perf_event__fprintf_task(event, stdout);
-
-	if (event->header.type == PERF_RECORD_EXIT) {
-		machine__remove_thread(machine, thread);
-		return 0;
-	}
-
-	if (thread == NULL || parent == NULL ||
-	    thread__fork(thread, parent) < 0) {
-		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
-		return -1;
-	}
-
-	return 0;
+int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample __maybe_unused,
+			     struct machine *machine)
+{
+	return machine__process_exit_event(machine, event);
 }
 
 size_t perf_event__fprintf(union perf_event *event, FILE *fp)
@@ -750,27 +579,12 @@
 	return ret;
 }
 
-int perf_event__process(struct perf_tool *tool, union perf_event *event,
-			struct perf_sample *sample, struct machine *machine)
+int perf_event__process(struct perf_tool *tool __maybe_unused,
+			union perf_event *event,
+			struct perf_sample *sample __maybe_unused,
+			struct machine *machine)
 {
-	switch (event->header.type) {
-	case PERF_RECORD_COMM:
-		perf_event__process_comm(tool, event, sample, machine);
-		break;
-	case PERF_RECORD_MMAP:
-		perf_event__process_mmap(tool, event, sample, machine);
-		break;
-	case PERF_RECORD_FORK:
-	case PERF_RECORD_EXIT:
-		perf_event__process_task(tool, event, sample, machine);
-		break;
-	case PERF_RECORD_LOST:
-		perf_event__process_lost(tool, event, sample, machine);
-	default:
-		break;
-	}
-
-	return 0;
+	return machine__process_event(machine, event);
 }
 
 void thread__find_addr_map(struct thread *self,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 21b99e7..0d573ff 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -6,6 +6,7 @@
 
 #include "../perf.h"
 #include "map.h"
+#include "build-id.h"
 
 /*
  * PERF_SAMPLE_IP | PERF_SAMPLE_TID | *
@@ -96,8 +97,6 @@
 	struct stack_dump user_stack;
 };
 
-#define BUILD_ID_SIZE 20
-
 struct build_id_event {
 	struct perf_event_header header;
 	pid_t			 pid;
@@ -191,7 +190,11 @@
 			     union perf_event *event,
 			     struct perf_sample *sample,
 			     struct machine *machine);
-int perf_event__process_task(struct perf_tool *tool,
+int perf_event__process_fork(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_exit(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_sample *sample,
 			     struct machine *machine);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 186b877..7052934 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -52,15 +52,13 @@
 void perf_evlist__config_attrs(struct perf_evlist *evlist,
 			       struct perf_record_opts *opts)
 {
-	struct perf_evsel *evsel, *first;
+	struct perf_evsel *evsel;
 
 	if (evlist->cpus->map[0] < 0)
 		opts->no_inherit = true;
 
-	first = perf_evlist__first(evlist);
-
 	list_for_each_entry(evsel, &evlist->entries, node) {
-		perf_evsel__config(evsel, opts, first);
+		perf_evsel__config(evsel, opts);
 
 		if (evlist->nr_entries > 1)
 			evsel->attr.sample_type |= PERF_SAMPLE_ID;
@@ -224,6 +222,8 @@
 
 	for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
 		list_for_each_entry(pos, &evlist->entries, node) {
+			if (perf_evsel__is_group_member(pos))
+				continue;
 			for (thread = 0; thread < evlist->threads->nr; thread++)
 				ioctl(FD(pos, cpu, thread),
 				      PERF_EVENT_IOC_DISABLE, 0);
@@ -238,6 +238,8 @@
 
 	for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) {
 		list_for_each_entry(pos, &evlist->entries, node) {
+			if (perf_evsel__is_group_member(pos))
+				continue;
 			for (thread = 0; thread < evlist->threads->nr; thread++)
 				ioctl(FD(pos, cpu, thread),
 				      PERF_EVENT_IOC_ENABLE, 0);
@@ -325,8 +327,6 @@
 
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
-	/* XXX Move this to perf.c, making it generally available */
-	unsigned int page_size = sysconf(_SC_PAGE_SIZE);
 	struct perf_mmap *md = &evlist->mmap[idx];
 	unsigned int head = perf_mmap__read_head(md);
 	unsigned int old = md->prev;
@@ -528,7 +528,6 @@
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 		      bool overwrite)
 {
-	unsigned int page_size = sysconf(_SC_PAGE_SIZE);
 	struct perf_evsel *evsel;
 	const struct cpu_map *cpus = evlist->cpus;
 	const struct thread_map *threads = evlist->threads;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d144d46..1b16dd1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -404,13 +404,40 @@
 	return evsel->name ?: "unknown";
 }
 
-void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts,
-			struct perf_evsel *first)
+/*
+ * The enable_on_exec/disabled value strategy:
+ *
+ *  1) For any type of traced program:
+ *    - all independent events and group leaders are disabled
+ *    - all group members are enabled
+ *
+ *     Group members are ruled by group leaders. They need to
+ *     be enabled, because the group scheduling relies on that.
+ *
+ *  2) For traced programs executed by perf:
+ *     - all independent events and group leaders have
+ *       enable_on_exec set
+ *     - we don't specifically enable or disable any event during
+ *       the record command
+ *
+ *     Independent events and group leaders are initially disabled
+ *     and get enabled by exec. Group members are ruled by group
+ *     leaders as stated in 1).
+ *
+ *  3) For traced programs attached by perf (pid/tid):
+ *     - we specifically enable or disable all events during
+ *       the record command
+ *
+ *     When attaching events to already running traced we
+ *     enable/disable events specifically, as there's no
+ *     initial traced exec call.
+ */
+void perf_evsel__config(struct perf_evsel *evsel,
+			struct perf_record_opts *opts)
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	int track = !evsel->idx; /* only the first counter needs these */
 
-	attr->disabled = 1;
 	attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
 	attr->inherit	    = !opts->no_inherit;
 	attr->read_format   = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -486,10 +513,21 @@
 	attr->mmap = track;
 	attr->comm = track;
 
-	if (perf_target__none(&opts->target) &&
-	    (!opts->group || evsel == first)) {
+	/*
+	 * XXX see the function comment above
+	 *
+	 * Disabling only independent events or group leaders,
+	 * keeping group members enabled.
+	 */
+	if (!perf_evsel__is_group_member(evsel))
+		attr->disabled = 1;
+
+	/*
+	 * Setting enable_on_exec for independent events and
+	 * group leaders for traced executed by perf.
+	 */
+	if (perf_target__none(&opts->target) && !perf_evsel__is_group_member(evsel))
 		attr->enable_on_exec = 1;
-	}
 }
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -669,7 +707,7 @@
 	struct perf_evsel *leader = evsel->leader;
 	int fd;
 
-	if (!leader)
+	if (!perf_evsel__is_group_member(evsel))
 		return -1;
 
 	/*
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d99b476..3d2b801 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -93,8 +93,7 @@
 void perf_evsel__delete(struct perf_evsel *evsel);
 
 void perf_evsel__config(struct perf_evsel *evsel,
-			struct perf_record_opts *opts,
-			struct perf_evsel *first);
+			struct perf_record_opts *opts);
 
 bool perf_evsel__is_cache_op_valid(u8 type, u8 op);
 
@@ -226,4 +225,9 @@
 {
 	return list_entry(evsel->node.next, struct perf_evsel, node);
 }
+
+static inline bool perf_evsel__is_group_member(const struct perf_evsel *evsel)
+{
+	return evsel->leader != NULL;
+}
 #endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 566b84c..b7da463 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -23,6 +23,7 @@
 #include "pmu.h"
 #include "vdso.h"
 #include "strbuf.h"
+#include "build-id.h"
 
 static bool no_buildid_cache = false;
 
@@ -2342,6 +2343,16 @@
 	return -1;
 }
 
+bool is_perf_magic(u64 magic)
+{
+	if (!memcmp(&magic, __perf_magic1, sizeof(magic))
+		|| magic == __perf_magic2
+		|| magic == __perf_magic2_sw)
+		return true;
+
+	return false;
+}
+
 static int check_magic_endian(u64 magic, uint64_t hdr_sz,
 			      bool is_pipe, struct perf_header *ph)
 {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 9bc0078..20f0344 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -154,6 +154,7 @@
 int perf_event__process_build_id(struct perf_tool *tool,
 				 union perf_event *event,
 				 struct perf_session *session);
+bool is_perf_magic(u64 magic);
 
 /*
  * arch specific callback
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 277947a..cb17e2a 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -244,6 +244,8 @@
 			he->ms.map->referenced = true;
 		if (symbol_conf.use_callchain)
 			callchain_init(he->callchain);
+
+		INIT_LIST_HEAD(&he->pairs.node);
 	}
 
 	return he;
@@ -410,6 +412,7 @@
 
 void hist_entry__free(struct hist_entry *he)
 {
+	free(he->branch_info);
 	free(he);
 }
 
@@ -713,3 +716,99 @@
 	++hists->stats.nr_events[0];
 	++hists->stats.nr_events[type];
 }
+
+static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
+						 struct hist_entry *pair)
+{
+	struct rb_node **p = &hists->entries.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__cmp(pair, he);
+
+		if (!cmp)
+			goto out;
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	he = hist_entry__new(pair);
+	if (he) {
+		memset(&he->stat, 0, sizeof(he->stat));
+		he->hists = hists;
+		rb_link_node(&he->rb_node, parent, p);
+		rb_insert_color(&he->rb_node, &hists->entries);
+		hists__inc_nr_entries(hists, he);
+	}
+out:
+	return he;
+}
+
+static struct hist_entry *hists__find_entry(struct hists *hists,
+					    struct hist_entry *he)
+{
+	struct rb_node *n = hists->entries.rb_node;
+
+	while (n) {
+		struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
+		int64_t cmp = hist_entry__cmp(he, iter);
+
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return iter;
+	}
+
+	return NULL;
+}
+
+/*
+ * Look for pairs to link to the leader buckets (hist_entries):
+ */
+void hists__match(struct hists *leader, struct hists *other)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	for (nd = rb_first(&leader->entries); nd; nd = rb_next(nd)) {
+		pos  = rb_entry(nd, struct hist_entry, rb_node);
+		pair = hists__find_entry(other, pos);
+
+		if (pair)
+			hist__entry_add_pair(pos, pair);
+	}
+}
+
+/*
+ * Look for entries in the other hists that are not present in the leader, if
+ * we find them, just add a dummy entry on the leader hists, with period=0,
+ * nr_events=0, to serve as the list header.
+ */
+int hists__link(struct hists *leader, struct hists *other)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	for (nd = rb_first(&other->entries); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node);
+
+		if (!hist_entry__has_pairs(pos)) {
+			pair = hists__add_dummy_entry(leader, pos);
+			if (pair == NULL)
+				return -1;
+			hist__entry_add_pair(pair, pos);
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 66cb31f..8b091a5 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <pthread.h>
 #include "callchain.h"
+#include "header.h"
 
 extern struct callchain_param callchain_param;
 
@@ -114,6 +115,9 @@
 void hists__reset_col_len(struct hists *hists);
 void hists__calc_col_len(struct hists *hists, struct hist_entry *he);
 
+void hists__match(struct hists *leader, struct hists *other);
+int hists__link(struct hists *leader, struct hists *other);
+
 struct perf_hpp {
 	char *buf;
 	size_t size;
@@ -140,8 +144,12 @@
 	PERF_HPP__OVERHEAD_GUEST_US,
 	PERF_HPP__SAMPLES,
 	PERF_HPP__PERIOD,
+	PERF_HPP__PERIOD_BASELINE,
 	PERF_HPP__DELTA,
+	PERF_HPP__RATIO,
+	PERF_HPP__WEIGHTED_DIFF,
 	PERF_HPP__DISPL,
+	PERF_HPP__FORMULA,
 
 	PERF_HPP__MAX_INDEX
 };
@@ -153,21 +161,27 @@
 
 struct perf_evlist;
 
+struct hist_browser_timer {
+	void (*timer)(void *arg);
+	void *arg;
+	int refresh;
+};
+
 #ifdef NEWT_SUPPORT
 #include "../ui/keysyms.h"
 int hist_entry__tui_annotate(struct hist_entry *he, int evidx,
-			     void(*timer)(void *arg), void *arg, int delay_secs);
+			     struct hist_browser_timer *hbt);
 
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
-				  void(*timer)(void *arg), void *arg,
-				  int refresh);
+				  struct hist_browser_timer *hbt,
+				  struct perf_session_env *env);
+int script_browse(const char *script_opt);
 #else
 static inline
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
 				  const char *help __maybe_unused,
-				  void(*timer)(void *arg) __maybe_unused,
-				  void *arg __maybe_unused,
-				  int refresh __maybe_unused)
+				  struct hist_browser_timer *hbt __maybe_unused,
+				  struct perf_session_env *env __maybe_unused)
 {
 	return 0;
 }
@@ -175,28 +189,29 @@
 static inline int hist_entry__tui_annotate(struct hist_entry *self
 					   __maybe_unused,
 					   int evidx __maybe_unused,
-					   void(*timer)(void *arg)
-					   __maybe_unused,
-					   void *arg __maybe_unused,
-					   int delay_secs __maybe_unused)
+					   struct hist_browser_timer *hbt
+					   __maybe_unused)
 {
 	return 0;
 }
+
+static inline int script_browse(const char *script_opt __maybe_unused)
+{
+	return 0;
+}
+
 #define K_LEFT -1
 #define K_RIGHT -2
 #endif
 
 #ifdef GTK2_SUPPORT
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help,
-				  void(*timer)(void *arg), void *arg,
-				  int refresh);
+				  struct hist_browser_timer *hbt __maybe_unused);
 #else
 static inline
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused,
 				  const char *help __maybe_unused,
-				  void(*timer)(void *arg) __maybe_unused,
-				  void *arg __maybe_unused,
-				  int refresh __maybe_unused)
+				  struct hist_browser_timer *hbt __maybe_unused)
 {
 	return 0;
 }
@@ -204,4 +219,8 @@
 
 unsigned int hists__sort_list_width(struct hists *self);
 
+double perf_diff__compute_delta(struct hist_entry *he);
+double perf_diff__compute_ratio(struct hist_entry *he);
+s64 perf_diff__compute_wdiff(struct hist_entry *he);
+int perf_diff__formula(char *buf, size_t size, struct hist_entry *he);
 #endif	/* __PERF_HIST_H */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
new file mode 100644
index 0000000..1f09d05
--- /dev/null
+++ b/tools/perf/util/machine.c
@@ -0,0 +1,464 @@
+#include "debug.h"
+#include "event.h"
+#include "machine.h"
+#include "map.h"
+#include "strlist.h"
+#include "thread.h"
+#include <stdbool.h>
+
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
+{
+	map_groups__init(&machine->kmaps);
+	RB_CLEAR_NODE(&machine->rb_node);
+	INIT_LIST_HEAD(&machine->user_dsos);
+	INIT_LIST_HEAD(&machine->kernel_dsos);
+
+	machine->threads = RB_ROOT;
+	INIT_LIST_HEAD(&machine->dead_threads);
+	machine->last_match = NULL;
+
+	machine->kmaps.machine = machine;
+	machine->pid = pid;
+
+	machine->root_dir = strdup(root_dir);
+	if (machine->root_dir == NULL)
+		return -ENOMEM;
+
+	if (pid != HOST_KERNEL_ID) {
+		struct thread *thread = machine__findnew_thread(machine, pid);
+		char comm[64];
+
+		if (thread == NULL)
+			return -ENOMEM;
+
+		snprintf(comm, sizeof(comm), "[guest/%d]", pid);
+		thread__set_comm(thread, comm);
+	}
+
+	return 0;
+}
+
+static void dsos__delete(struct list_head *dsos)
+{
+	struct dso *pos, *n;
+
+	list_for_each_entry_safe(pos, n, dsos, node) {
+		list_del(&pos->node);
+		dso__delete(pos);
+	}
+}
+
+void machine__exit(struct machine *machine)
+{
+	map_groups__exit(&machine->kmaps);
+	dsos__delete(&machine->user_dsos);
+	dsos__delete(&machine->kernel_dsos);
+	free(machine->root_dir);
+	machine->root_dir = NULL;
+}
+
+void machine__delete(struct machine *machine)
+{
+	machine__exit(machine);
+	free(machine);
+}
+
+struct machine *machines__add(struct rb_root *machines, pid_t pid,
+			      const char *root_dir)
+{
+	struct rb_node **p = &machines->rb_node;
+	struct rb_node *parent = NULL;
+	struct machine *pos, *machine = malloc(sizeof(*machine));
+
+	if (machine == NULL)
+		return NULL;
+
+	if (machine__init(machine, root_dir, pid) != 0) {
+		free(machine);
+		return NULL;
+	}
+
+	while (*p != NULL) {
+		parent = *p;
+		pos = rb_entry(parent, struct machine, rb_node);
+		if (pid < pos->pid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&machine->rb_node, parent, p);
+	rb_insert_color(&machine->rb_node, machines);
+
+	return machine;
+}
+
+struct machine *machines__find(struct rb_root *machines, pid_t pid)
+{
+	struct rb_node **p = &machines->rb_node;
+	struct rb_node *parent = NULL;
+	struct machine *machine;
+	struct machine *default_machine = NULL;
+
+	while (*p != NULL) {
+		parent = *p;
+		machine = rb_entry(parent, struct machine, rb_node);
+		if (pid < machine->pid)
+			p = &(*p)->rb_left;
+		else if (pid > machine->pid)
+			p = &(*p)->rb_right;
+		else
+			return machine;
+		if (!machine->pid)
+			default_machine = machine;
+	}
+
+	return default_machine;
+}
+
+struct machine *machines__findnew(struct rb_root *machines, pid_t pid)
+{
+	char path[PATH_MAX];
+	const char *root_dir = "";
+	struct machine *machine = machines__find(machines, pid);
+
+	if (machine && (machine->pid == pid))
+		goto out;
+
+	if ((pid != HOST_KERNEL_ID) &&
+	    (pid != DEFAULT_GUEST_KERNEL_ID) &&
+	    (symbol_conf.guestmount)) {
+		sprintf(path, "%s/%d", symbol_conf.guestmount, pid);
+		if (access(path, R_OK)) {
+			static struct strlist *seen;
+
+			if (!seen)
+				seen = strlist__new(true, NULL);
+
+			if (!strlist__has_entry(seen, path)) {
+				pr_err("Can't access file %s\n", path);
+				strlist__add(seen, path);
+			}
+			machine = NULL;
+			goto out;
+		}
+		root_dir = path;
+	}
+
+	machine = machines__add(machines, pid, root_dir);
+out:
+	return machine;
+}
+
+void machines__process(struct rb_root *machines,
+		       machine__process_t process, void *data)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first(machines); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		process(pos, data);
+	}
+}
+
+char *machine__mmap_name(struct machine *machine, char *bf, size_t size)
+{
+	if (machine__is_host(machine))
+		snprintf(bf, size, "[%s]", "kernel.kallsyms");
+	else if (machine__is_default_guest(machine))
+		snprintf(bf, size, "[%s]", "guest.kernel.kallsyms");
+	else {
+		snprintf(bf, size, "[%s.%d]", "guest.kernel.kallsyms",
+			 machine->pid);
+	}
+
+	return bf;
+}
+
+void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size)
+{
+	struct rb_node *node;
+	struct machine *machine;
+
+	for (node = rb_first(machines); node; node = rb_next(node)) {
+		machine = rb_entry(node, struct machine, rb_node);
+		machine->id_hdr_size = id_hdr_size;
+	}
+
+	return;
+}
+
+static struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid,
+						bool create)
+{
+	struct rb_node **p = &machine->threads.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
+
+	/*
+	 * Font-end cache - PID lookups come in blocks,
+	 * so most of the time we dont have to look up
+	 * the full rbtree:
+	 */
+	if (machine->last_match && machine->last_match->pid == pid)
+		return machine->last_match;
+
+	while (*p != NULL) {
+		parent = *p;
+		th = rb_entry(parent, struct thread, rb_node);
+
+		if (th->pid == pid) {
+			machine->last_match = th;
+			return th;
+		}
+
+		if (pid < th->pid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	if (!create)
+		return NULL;
+
+	th = thread__new(pid);
+	if (th != NULL) {
+		rb_link_node(&th->rb_node, parent, p);
+		rb_insert_color(&th->rb_node, &machine->threads);
+		machine->last_match = th;
+	}
+
+	return th;
+}
+
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid)
+{
+	return __machine__findnew_thread(machine, pid, true);
+}
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid)
+{
+	return __machine__findnew_thread(machine, pid, false);
+}
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event)
+{
+	struct thread *thread = machine__findnew_thread(machine, event->comm.tid);
+
+	if (dump_trace)
+		perf_event__fprintf_comm(event, stdout);
+
+	if (thread == NULL || thread__set_comm(thread, event->comm.comm)) {
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int machine__process_lost_event(struct machine *machine __maybe_unused,
+				union perf_event *event)
+{
+	dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
+		    event->lost.id, event->lost.lost);
+	return 0;
+}
+
+static void machine__set_kernel_mmap_len(struct machine *machine,
+					 union perf_event *event)
+{
+	int i;
+
+	for (i = 0; i < MAP__NR_TYPES; i++) {
+		machine->vmlinux_maps[i]->start = event->mmap.start;
+		machine->vmlinux_maps[i]->end   = (event->mmap.start +
+						   event->mmap.len);
+		/*
+		 * Be a bit paranoid here, some perf.data file came with
+		 * a zero sized synthesized MMAP event for the kernel.
+		 */
+		if (machine->vmlinux_maps[i]->end == 0)
+			machine->vmlinux_maps[i]->end = ~0ULL;
+	}
+}
+
+static int machine__process_kernel_mmap_event(struct machine *machine,
+					      union perf_event *event)
+{
+	struct map *map;
+	char kmmap_prefix[PATH_MAX];
+	enum dso_kernel_type kernel_type;
+	bool is_kernel_mmap;
+
+	machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
+	if (machine__is_host(machine))
+		kernel_type = DSO_TYPE_KERNEL;
+	else
+		kernel_type = DSO_TYPE_GUEST_KERNEL;
+
+	is_kernel_mmap = memcmp(event->mmap.filename,
+				kmmap_prefix,
+				strlen(kmmap_prefix) - 1) == 0;
+	if (event->mmap.filename[0] == '/' ||
+	    (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
+
+		char short_module_name[1024];
+		char *name, *dot;
+
+		if (event->mmap.filename[0] == '/') {
+			name = strrchr(event->mmap.filename, '/');
+			if (name == NULL)
+				goto out_problem;
+
+			++name; /* skip / */
+			dot = strrchr(name, '.');
+			if (dot == NULL)
+				goto out_problem;
+			snprintf(short_module_name, sizeof(short_module_name),
+					"[%.*s]", (int)(dot - name), name);
+			strxfrchar(short_module_name, '-', '_');
+		} else
+			strcpy(short_module_name, event->mmap.filename);
+
+		map = machine__new_module(machine, event->mmap.start,
+					  event->mmap.filename);
+		if (map == NULL)
+			goto out_problem;
+
+		name = strdup(short_module_name);
+		if (name == NULL)
+			goto out_problem;
+
+		map->dso->short_name = name;
+		map->dso->sname_alloc = 1;
+		map->end = map->start + event->mmap.len;
+	} else if (is_kernel_mmap) {
+		const char *symbol_name = (event->mmap.filename +
+				strlen(kmmap_prefix));
+		/*
+		 * Should be there already, from the build-id table in
+		 * the header.
+		 */
+		struct dso *kernel = __dsos__findnew(&machine->kernel_dsos,
+						     kmmap_prefix);
+		if (kernel == NULL)
+			goto out_problem;
+
+		kernel->kernel = kernel_type;
+		if (__machine__create_kernel_maps(machine, kernel) < 0)
+			goto out_problem;
+
+		machine__set_kernel_mmap_len(machine, event);
+
+		/*
+		 * Avoid using a zero address (kptr_restrict) for the ref reloc
+		 * symbol. Effectively having zero here means that at record
+		 * time /proc/sys/kernel/kptr_restrict was non zero.
+		 */
+		if (event->mmap.pgoff != 0) {
+			maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
+							 symbol_name,
+							 event->mmap.pgoff);
+		}
+
+		if (machine__is_default_guest(machine)) {
+			/*
+			 * preload dso of guest kernel and modules
+			 */
+			dso__load(kernel, machine->vmlinux_maps[MAP__FUNCTION],
+				  NULL);
+		}
+	}
+	return 0;
+out_problem:
+	return -1;
+}
+
+int machine__process_mmap_event(struct machine *machine, union perf_event *event)
+{
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	struct thread *thread;
+	struct map *map;
+	int ret = 0;
+
+	if (dump_trace)
+		perf_event__fprintf_mmap(event, stdout);
+
+	if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	    cpumode == PERF_RECORD_MISC_KERNEL) {
+		ret = machine__process_kernel_mmap_event(machine, event);
+		if (ret < 0)
+			goto out_problem;
+		return 0;
+	}
+
+	thread = machine__findnew_thread(machine, event->mmap.pid);
+	if (thread == NULL)
+		goto out_problem;
+	map = map__new(&machine->user_dsos, event->mmap.start,
+			event->mmap.len, event->mmap.pgoff,
+			event->mmap.pid, event->mmap.filename,
+			MAP__FUNCTION);
+	if (map == NULL)
+		goto out_problem;
+
+	thread__insert_map(thread, map);
+	return 0;
+
+out_problem:
+	dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
+	return 0;
+}
+
+int machine__process_fork_event(struct machine *machine, union perf_event *event)
+{
+	struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
+	struct thread *parent = machine__findnew_thread(machine, event->fork.ptid);
+
+	if (dump_trace)
+		perf_event__fprintf_task(event, stdout);
+
+	if (thread == NULL || parent == NULL ||
+	    thread__fork(thread, parent) < 0) {
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int machine__process_exit_event(struct machine *machine, union perf_event *event)
+{
+	struct thread *thread = machine__find_thread(machine, event->fork.tid);
+
+	if (dump_trace)
+		perf_event__fprintf_task(event, stdout);
+
+	if (thread != NULL)
+		machine__remove_thread(machine, thread);
+
+	return 0;
+}
+
+int machine__process_event(struct machine *machine, union perf_event *event)
+{
+	int ret;
+
+	switch (event->header.type) {
+	case PERF_RECORD_COMM:
+		ret = machine__process_comm_event(machine, event); break;
+	case PERF_RECORD_MMAP:
+		ret = machine__process_mmap_event(machine, event); break;
+	case PERF_RECORD_FORK:
+		ret = machine__process_fork_event(machine, event); break;
+	case PERF_RECORD_EXIT:
+		ret = machine__process_exit_event(machine, event); break;
+	case PERF_RECORD_LOST:
+		ret = machine__process_lost_event(machine, event); break;
+	default:
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
new file mode 100644
index 0000000..b7cde74
--- /dev/null
+++ b/tools/perf/util/machine.h
@@ -0,0 +1,148 @@
+#ifndef __PERF_MACHINE_H
+#define __PERF_MACHINE_H
+
+#include <sys/types.h>
+#include <linux/rbtree.h>
+#include "map.h"
+
+struct branch_stack;
+struct perf_evsel;
+struct perf_sample;
+struct symbol;
+struct thread;
+union perf_event;
+
+/* Native host kernel uses -1 as pid index in machine */
+#define	HOST_KERNEL_ID			(-1)
+#define	DEFAULT_GUEST_KERNEL_ID		(0)
+
+struct machine {
+	struct rb_node	  rb_node;
+	pid_t		  pid;
+	u16		  id_hdr_size;
+	char		  *root_dir;
+	struct rb_root	  threads;
+	struct list_head  dead_threads;
+	struct thread	  *last_match;
+	struct list_head  user_dsos;
+	struct list_head  kernel_dsos;
+	struct map_groups kmaps;
+	struct map	  *vmlinux_maps[MAP__NR_TYPES];
+};
+
+static inline
+struct map *machine__kernel_map(struct machine *machine, enum map_type type)
+{
+	return machine->vmlinux_maps[type];
+}
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid);
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event);
+int machine__process_exit_event(struct machine *machine, union perf_event *event);
+int machine__process_fork_event(struct machine *machine, union perf_event *event);
+int machine__process_lost_event(struct machine *machine, union perf_event *event);
+int machine__process_mmap_event(struct machine *machine, union perf_event *event);
+int machine__process_event(struct machine *machine, union perf_event *event);
+
+typedef void (*machine__process_t)(struct machine *machine, void *data);
+
+void machines__process(struct rb_root *machines,
+		       machine__process_t process, void *data);
+
+struct machine *machines__add(struct rb_root *machines, pid_t pid,
+			      const char *root_dir);
+struct machine *machines__find_host(struct rb_root *machines);
+struct machine *machines__find(struct rb_root *machines, pid_t pid);
+struct machine *machines__findnew(struct rb_root *machines, pid_t pid);
+
+void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size);
+char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
+
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
+void machine__exit(struct machine *machine);
+void machine__delete(struct machine *machine);
+
+
+struct branch_info *machine__resolve_bstack(struct machine *machine,
+					    struct thread *thread,
+					    struct branch_stack *bs);
+int machine__resolve_callchain(struct machine *machine,
+			       struct perf_evsel *evsel,
+			       struct thread *thread,
+			       struct perf_sample *sample,
+			       struct symbol **parent);
+
+/*
+ * Default guest kernel is defined by parameter --guestkallsyms
+ * and --guestmodules
+ */
+static inline bool machine__is_default_guest(struct machine *machine)
+{
+	return machine ? machine->pid == DEFAULT_GUEST_KERNEL_ID : false;
+}
+
+static inline bool machine__is_host(struct machine *machine)
+{
+	return machine ? machine->pid == HOST_KERNEL_ID : false;
+}
+
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid);
+void machine__remove_thread(struct machine *machine, struct thread *th);
+
+size_t machine__fprintf(struct machine *machine, FILE *fp);
+
+static inline
+struct symbol *machine__find_kernel_symbol(struct machine *machine,
+					   enum map_type type, u64 addr,
+					   struct map **mapp,
+					   symbol_filter_t filter)
+{
+	return map_groups__find_symbol(&machine->kmaps, type, addr,
+				       mapp, filter);
+}
+
+static inline
+struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
+					     struct map **mapp,
+					     symbol_filter_t filter)
+{
+	return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
+					   mapp, filter);
+}
+
+static inline
+struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
+						     const char *name,
+						     struct map **mapp,
+						     symbol_filter_t filter)
+{
+	return map_groups__find_function_by_name(&machine->kmaps, name, mapp,
+						 filter);
+}
+
+struct map *machine__new_module(struct machine *machine, u64 start,
+				const char *filename);
+
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+			   enum map_type type, symbol_filter_t filter);
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
+			       symbol_filter_t filter);
+
+size_t machine__fprintf_dsos_buildid(struct machine *machine,
+				     FILE *fp, bool with_hits);
+size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp);
+size_t machines__fprintf_dsos_buildid(struct rb_root *machines,
+				      FILE *fp, bool with_hits);
+
+void machine__destroy_kernel_maps(struct machine *machine);
+int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
+int machine__create_kernel_maps(struct machine *machine);
+
+int machines__create_kernel_maps(struct rb_root *machines, pid_t pid);
+int machines__create_guest_kernel_maps(struct rb_root *machines);
+void machines__destroy_guest_kernel_maps(struct rb_root *machines);
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
+
+#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 6109fa4..0328d45 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -10,6 +10,7 @@
 #include "thread.h"
 #include "strlist.h"
 #include "vdso.h"
+#include "build-id.h"
 
 const char *map_type__name[MAP__NR_TYPES] = {
 	[MAP__FUNCTION] = "Functions",
@@ -23,7 +24,7 @@
 
 static inline int is_no_dso_memory(const char *filename)
 {
-	return !strcmp(filename, "[stack]") ||
+	return !strncmp(filename, "[stack", 6) ||
 	       !strcmp(filename, "[heap]");
 }
 
@@ -589,182 +590,3 @@
 
 	return NULL;
 }
-
-int machine__init(struct machine *self, const char *root_dir, pid_t pid)
-{
-	map_groups__init(&self->kmaps);
-	RB_CLEAR_NODE(&self->rb_node);
-	INIT_LIST_HEAD(&self->user_dsos);
-	INIT_LIST_HEAD(&self->kernel_dsos);
-
-	self->threads = RB_ROOT;
-	INIT_LIST_HEAD(&self->dead_threads);
-	self->last_match = NULL;
-
-	self->kmaps.machine = self;
-	self->pid	    = pid;
-	self->root_dir      = strdup(root_dir);
-	if (self->root_dir == NULL)
-		return -ENOMEM;
-
-	if (pid != HOST_KERNEL_ID) {
-		struct thread *thread = machine__findnew_thread(self, pid);
-		char comm[64];
-
-		if (thread == NULL)
-			return -ENOMEM;
-
-		snprintf(comm, sizeof(comm), "[guest/%d]", pid);
-		thread__set_comm(thread, comm);
-	}
-
-	return 0;
-}
-
-static void dsos__delete(struct list_head *self)
-{
-	struct dso *pos, *n;
-
-	list_for_each_entry_safe(pos, n, self, node) {
-		list_del(&pos->node);
-		dso__delete(pos);
-	}
-}
-
-void machine__exit(struct machine *self)
-{
-	map_groups__exit(&self->kmaps);
-	dsos__delete(&self->user_dsos);
-	dsos__delete(&self->kernel_dsos);
-	free(self->root_dir);
-	self->root_dir = NULL;
-}
-
-void machine__delete(struct machine *self)
-{
-	machine__exit(self);
-	free(self);
-}
-
-struct machine *machines__add(struct rb_root *self, pid_t pid,
-			      const char *root_dir)
-{
-	struct rb_node **p = &self->rb_node;
-	struct rb_node *parent = NULL;
-	struct machine *pos, *machine = malloc(sizeof(*machine));
-
-	if (!machine)
-		return NULL;
-
-	if (machine__init(machine, root_dir, pid) != 0) {
-		free(machine);
-		return NULL;
-	}
-
-	while (*p != NULL) {
-		parent = *p;
-		pos = rb_entry(parent, struct machine, rb_node);
-		if (pid < pos->pid)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&machine->rb_node, parent, p);
-	rb_insert_color(&machine->rb_node, self);
-
-	return machine;
-}
-
-struct machine *machines__find(struct rb_root *self, pid_t pid)
-{
-	struct rb_node **p = &self->rb_node;
-	struct rb_node *parent = NULL;
-	struct machine *machine;
-	struct machine *default_machine = NULL;
-
-	while (*p != NULL) {
-		parent = *p;
-		machine = rb_entry(parent, struct machine, rb_node);
-		if (pid < machine->pid)
-			p = &(*p)->rb_left;
-		else if (pid > machine->pid)
-			p = &(*p)->rb_right;
-		else
-			return machine;
-		if (!machine->pid)
-			default_machine = machine;
-	}
-
-	return default_machine;
-}
-
-struct machine *machines__findnew(struct rb_root *self, pid_t pid)
-{
-	char path[PATH_MAX];
-	const char *root_dir = "";
-	struct machine *machine = machines__find(self, pid);
-
-	if (machine && (machine->pid == pid))
-		goto out;
-
-	if ((pid != HOST_KERNEL_ID) &&
-	    (pid != DEFAULT_GUEST_KERNEL_ID) &&
-	    (symbol_conf.guestmount)) {
-		sprintf(path, "%s/%d", symbol_conf.guestmount, pid);
-		if (access(path, R_OK)) {
-			static struct strlist *seen;
-
-			if (!seen)
-				seen = strlist__new(true, NULL);
-
-			if (!strlist__has_entry(seen, path)) {
-				pr_err("Can't access file %s\n", path);
-				strlist__add(seen, path);
-			}
-			machine = NULL;
-			goto out;
-		}
-		root_dir = path;
-	}
-
-	machine = machines__add(self, pid, root_dir);
-
-out:
-	return machine;
-}
-
-void machines__process(struct rb_root *self, machine__process_t process, void *data)
-{
-	struct rb_node *nd;
-
-	for (nd = rb_first(self); nd; nd = rb_next(nd)) {
-		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		process(pos, data);
-	}
-}
-
-char *machine__mmap_name(struct machine *self, char *bf, size_t size)
-{
-	if (machine__is_host(self))
-		snprintf(bf, size, "[%s]", "kernel.kallsyms");
-	else if (machine__is_default_guest(self))
-		snprintf(bf, size, "[%s]", "guest.kernel.kallsyms");
-	else
-		snprintf(bf, size, "[%s.%d]", "guest.kernel.kallsyms", self->pid);
-
-	return bf;
-}
-
-void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size)
-{
-	struct rb_node *node;
-	struct machine *machine;
-
-	for (node = rb_first(machines); node; node = rb_next(node)) {
-		machine = rb_entry(node, struct machine, rb_node);
-		machine->id_hdr_size = id_hdr_size;
-	}
-
-	return;
-}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index d2250fc..bcb39e2 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -57,30 +57,6 @@
 	struct machine	 *machine;
 };
 
-/* Native host kernel uses -1 as pid index in machine */
-#define	HOST_KERNEL_ID			(-1)
-#define	DEFAULT_GUEST_KERNEL_ID		(0)
-
-struct machine {
-	struct rb_node	  rb_node;
-	pid_t		  pid;
-	u16		  id_hdr_size;
-	char		  *root_dir;
-	struct rb_root	  threads;
-	struct list_head  dead_threads;
-	struct thread	  *last_match;
-	struct list_head  user_dsos;
-	struct list_head  kernel_dsos;
-	struct map_groups kmaps;
-	struct map	  *vmlinux_maps[MAP__NR_TYPES];
-};
-
-static inline
-struct map *machine__kernel_map(struct machine *self, enum map_type type)
-{
-	return self->vmlinux_maps[type];
-}
-
 static inline struct kmap *map__kmap(struct map *self)
 {
 	return (struct kmap *)(self + 1);
@@ -143,44 +119,9 @@
 size_t map_groups__fprintf(struct map_groups *mg, int verbose, FILE *fp);
 size_t map_groups__fprintf_maps(struct map_groups *mg, int verbose, FILE *fp);
 
-typedef void (*machine__process_t)(struct machine *self, void *data);
-
-void machines__process(struct rb_root *self, machine__process_t process, void *data);
-struct machine *machines__add(struct rb_root *self, pid_t pid,
-			      const char *root_dir);
-struct machine *machines__find_host(struct rb_root *self);
-struct machine *machines__find(struct rb_root *self, pid_t pid);
-struct machine *machines__findnew(struct rb_root *self, pid_t pid);
-void machines__set_id_hdr_size(struct rb_root *self, u16 id_hdr_size);
-char *machine__mmap_name(struct machine *self, char *bf, size_t size);
-int machine__init(struct machine *self, const char *root_dir, pid_t pid);
-void machine__exit(struct machine *self);
-void machine__delete(struct machine *self);
-
-struct perf_evsel;
-struct perf_sample;
-int machine__resolve_callchain(struct machine *machine,
-			       struct perf_evsel *evsel,
-			       struct thread *thread,
-			       struct perf_sample *sample,
-			       struct symbol **parent);
 int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
 				     u64 addr);
 
-/*
- * Default guest kernel is defined by parameter --guestkallsyms
- * and --guestmodules
- */
-static inline bool machine__is_default_guest(struct machine *self)
-{
-	return self ? self->pid == DEFAULT_GUEST_KERNEL_ID : false;
-}
-
-static inline bool machine__is_host(struct machine *self)
-{
-	return self ? self->pid == HOST_KERNEL_ID : false;
-}
-
 static inline void map_groups__insert(struct map_groups *mg, struct map *map)
 {
 	maps__insert(&mg->maps[map->type], map);
@@ -209,29 +150,6 @@
 					       struct map **mapp,
 					       symbol_filter_t filter);
 
-
-struct thread *machine__findnew_thread(struct machine *machine, pid_t pid);
-void machine__remove_thread(struct machine *machine, struct thread *th);
-
-size_t machine__fprintf(struct machine *machine, FILE *fp);
-
-static inline
-struct symbol *machine__find_kernel_symbol(struct machine *self,
-					   enum map_type type, u64 addr,
-					   struct map **mapp,
-					   symbol_filter_t filter)
-{
-	return map_groups__find_symbol(&self->kmaps, type, addr, mapp, filter);
-}
-
-static inline
-struct symbol *machine__find_kernel_function(struct machine *self, u64 addr,
-					     struct map **mapp,
-					     symbol_filter_t filter)
-{
-	return machine__find_kernel_symbol(self, MAP__FUNCTION, addr, mapp, filter);
-}
-
 static inline
 struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
 						 const char *name, struct map **mapp,
@@ -240,22 +158,11 @@
 	return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp, filter);
 }
 
-static inline
-struct symbol *machine__find_kernel_function_by_name(struct machine *self,
-						     const char *name,
-						     struct map **mapp,
-						     symbol_filter_t filter)
-{
-	return map_groups__find_function_by_name(&self->kmaps, name, mapp,
-						 filter);
-}
-
 int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 				   int verbose, FILE *fp);
 
 struct map *map_groups__find_by_name(struct map_groups *mg,
 				     enum map_type type, const char *name);
-struct map *machine__new_module(struct machine *self, u64 start, const char *filename);
 
 void map_groups__flush(struct map_groups *mg);
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6b6d03e..2d8d53be 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -722,6 +722,27 @@
 	return 0;
 }
 
+/*
+ * Basic modifier sanity check to validate it contains only one
+ * instance of any modifier (apart from 'p') present.
+ */
+static int check_modifier(char *str)
+{
+	char *p = str;
+
+	/* The sizeof includes 0 byte as well. */
+	if (strlen(str) > (sizeof("ukhGHppp") - 1))
+		return -1;
+
+	while (*p) {
+		if (*p != 'p' && strchr(p + 1, *p))
+			return -1;
+		p++;
+	}
+
+	return 0;
+}
+
 int parse_events__modifier_event(struct list_head *list, char *str, bool add)
 {
 	struct perf_evsel *evsel;
@@ -730,6 +751,9 @@
 	if (str == NULL)
 		return 0;
 
+	if (check_modifier(str))
+		return -EINVAL;
+
 	if (!add && get_event_modifier(&mod, str, NULL))
 		return -EINVAL;
 
@@ -827,8 +851,6 @@
 	 * Both call perf_evlist__delete in case of error, so we dont
 	 * need to bother.
 	 */
-	fprintf(stderr, "invalid or unsupported event: '%s'\n", str);
-	fprintf(stderr, "Run 'perf list' for a list of valid events\n");
 	return ret;
 }
 
@@ -836,7 +858,13 @@
 			int unset __maybe_unused)
 {
 	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
-	return parse_events(evlist, str, unset);
+	int ret = parse_events(evlist, str, unset);
+
+	if (ret) {
+		fprintf(stderr, "invalid or unsupported event: '%s'\n", str);
+		fprintf(stderr, "Run 'perf list' for a list of valid events\n");
+	}
+	return ret;
 }
 
 int parse_filter(const struct option *opt, const char *str,
@@ -1081,7 +1109,7 @@
 		printf("  %-50s [%s]\n",
 		       "cpu/t1=v1[,t2=v2,t3 ...]/modifier",
 		       event_type_descriptors[PERF_TYPE_RAW]);
-		printf("   (see 'perf list --help' on how to encode it)\n");
+		printf("   (see 'man perf-list' on how to encode it)\n");
 		printf("\n");
 
 		printf("  %-50s [%s]\n",
@@ -1142,6 +1170,24 @@
 			config, str, 0);
 }
 
+int parse_events__term_sym_hw(struct parse_events__term **term,
+			      char *config, unsigned idx)
+{
+	struct event_symbol *sym;
+
+	BUG_ON(idx >= PERF_COUNT_HW_MAX);
+	sym = &event_symbols_hw[idx];
+
+	if (config)
+		return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
+				PARSE_EVENTS__TERM_TYPE_USER, config,
+				(char *) sym->symbol, 0);
+	else
+		return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
+				PARSE_EVENTS__TERM_TYPE_USER,
+				(char *) "event", (char *) sym->symbol, 0);
+}
+
 int parse_events__term_clone(struct parse_events__term **new,
 			     struct parse_events__term *term)
 {
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 2820c40..b7af80b 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -76,6 +76,8 @@
 			   int type_term, char *config, u64 num);
 int parse_events__term_str(struct parse_events__term **_term,
 			   int type_term, char *config, char *str);
+int parse_events__term_sym_hw(struct parse_events__term **term,
+			      char *config, unsigned idx);
 int parse_events__term_clone(struct parse_events__term **new,
 			     struct parse_events__term *term);
 void parse_events__free_terms(struct list_head *terms);
@@ -97,7 +99,6 @@
 void parse_events_update_lists(struct list_head *list_event,
 			       struct list_head *list_all);
 void parse_events_error(void *data, void *scanner, char const *msg);
-int parse_events__test(void);
 
 void print_events(const char *event_glob, bool name_only);
 void print_events_type(u8 type);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index c87efc1..e9d1134 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -81,7 +81,8 @@
 num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?]*
-modifier_event	[ukhpGH]{1,8}
+name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?]*
+modifier_event	[ukhpGH]+
 modifier_bp	[rwx]{1,3}
 
 %%
@@ -168,6 +169,7 @@
 branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
 ,			{ return ','; }
 "/"			{ BEGIN(INITIAL); return '/'; }
+{name_minus}		{ return str(yyscanner, PE_NAME); }
 }
 
 mem:			{ BEGIN(mem); return PE_PREFIX_MEM; }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index cd88209..0f9914a 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -352,6 +352,15 @@
 	$$ = term;
 }
 |
+PE_NAME '=' PE_VALUE_SYM_HW
+{
+	struct parse_events__term *term;
+	int config = $3 & 255;
+
+	ABORT_ON(parse_events__term_sym_hw(&term, $1, config));
+	$$ = term;
+}
+|
 PE_NAME
 {
 	struct parse_events__term *term;
@@ -361,6 +370,15 @@
 	$$ = term;
 }
 |
+PE_VALUE_SYM_HW
+{
+	struct parse_events__term *term;
+	int config = $1 & 255;
+
+	ABORT_ON(parse_events__term_sym_hw(&term, NULL, config));
+	$$ = term;
+}
+|
 PE_TERM '=' PE_NAME
 {
 	struct parse_events__term *term;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 8a2229d..9bdc60c 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -22,7 +22,7 @@
  * Parse & process all the sysfs attributes located under
  * the directory specified in 'dir' parameter.
  */
-static int pmu_format_parse(char *dir, struct list_head *head)
+int perf_pmu__format_parse(char *dir, struct list_head *head)
 {
 	struct dirent *evt_ent;
 	DIR *format_dir;
@@ -77,7 +77,7 @@
 	if (stat(path, &st) < 0)
 		return 0;	/* no error if format does not exist */
 
-	if (pmu_format_parse(path, format))
+	if (perf_pmu__format_parse(path, format))
 		return -1;
 
 	return 0;
@@ -164,7 +164,7 @@
 		 "%s/bus/event_source/devices/%s/events", sysfs, name);
 
 	if (stat(path, &st) < 0)
-		return -1;
+		return 0;	 /* no error if 'events' does not exist */
 
 	if (pmu_aliases_parse(path, head))
 		return -1;
@@ -296,6 +296,9 @@
 	if (pmu_format(name, &format))
 		return NULL;
 
+	if (pmu_aliases(name, &aliases))
+		return NULL;
+
 	if (pmu_type(name, &type))
 		return NULL;
 
@@ -305,8 +308,6 @@
 
 	pmu->cpus = pmu_cpumask(name);
 
-	pmu_aliases(name, &aliases);
-
 	INIT_LIST_HEAD(&pmu->format);
 	INIT_LIST_HEAD(&pmu->aliases);
 	list_splice(&format, &pmu->format);
@@ -445,8 +446,9 @@
 	return 0;
 }
 
-static int pmu_config(struct list_head *formats, struct perf_event_attr *attr,
-		      struct list_head *head_terms)
+int perf_pmu__config_terms(struct list_head *formats,
+			   struct perf_event_attr *attr,
+			   struct list_head *head_terms)
 {
 	struct parse_events__term *term;
 
@@ -466,7 +468,7 @@
 		     struct list_head *head_terms)
 {
 	attr->type = pmu->type;
-	return pmu_config(&pmu->format, attr, head_terms);
+	return perf_pmu__config_terms(&pmu->format, attr, head_terms);
 }
 
 static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu,
@@ -550,177 +552,3 @@
 	for (b = from; b <= to; b++)
 		set_bit(b, bits);
 }
-
-/* Simulated format definitions. */
-static struct test_format {
-	const char *name;
-	const char *value;
-} test_formats[] = {
-	{ "krava01", "config:0-1,62-63\n", },
-	{ "krava02", "config:10-17\n", },
-	{ "krava03", "config:5\n", },
-	{ "krava11", "config1:0,2,4,6,8,20-28\n", },
-	{ "krava12", "config1:63\n", },
-	{ "krava13", "config1:45-47\n", },
-	{ "krava21", "config2:0-3,10-13,20-23,30-33,40-43,50-53,60-63\n", },
-	{ "krava22", "config2:8,18,48,58\n", },
-	{ "krava23", "config2:28-29,38\n", },
-};
-
-#define TEST_FORMATS_CNT (sizeof(test_formats) / sizeof(struct test_format))
-
-/* Simulated users input. */
-static struct parse_events__term test_terms[] = {
-	{
-		.config    = (char *) "krava01",
-		.val.num   = 15,
-		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
-		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
-	},
-	{
-		.config    = (char *) "krava02",
-		.val.num   = 170,
-		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
-		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
-	},
-	{
-		.config    = (char *) "krava03",
-		.val.num   = 1,
-		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
-		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
-	},
-	{
-		.config    = (char *) "krava11",
-		.val.num   = 27,
-		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
-		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
-	},
-	{
-		.config    = (char *) "krava12",
-		.val.num   = 1,
-		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
-		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
-	},
-	{
-		.config    = (char *) "krava13",
-		.val.num   = 2,
-		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
-		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
-	},
-	{
-		.config    = (char *) "krava21",
-		.val.num   = 119,
-		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
-		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
-	},
-	{
-		.config    = (char *) "krava22",
-		.val.num   = 11,
-		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
-		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
-	},
-	{
-		.config    = (char *) "krava23",
-		.val.num   = 2,
-		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
-		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
-	},
-};
-#define TERMS_CNT (sizeof(test_terms) / sizeof(struct parse_events__term))
-
-/*
- * Prepare format directory data, exported by kernel
- * at /sys/bus/event_source/devices/<dev>/format.
- */
-static char *test_format_dir_get(void)
-{
-	static char dir[PATH_MAX];
-	unsigned int i;
-
-	snprintf(dir, PATH_MAX, "/tmp/perf-pmu-test-format-XXXXXX");
-	if (!mkdtemp(dir))
-		return NULL;
-
-	for (i = 0; i < TEST_FORMATS_CNT; i++) {
-		static char name[PATH_MAX];
-		struct test_format *format = &test_formats[i];
-		FILE *file;
-
-		snprintf(name, PATH_MAX, "%s/%s", dir, format->name);
-
-		file = fopen(name, "w");
-		if (!file)
-			return NULL;
-
-		if (1 != fwrite(format->value, strlen(format->value), 1, file))
-			break;
-
-		fclose(file);
-	}
-
-	return dir;
-}
-
-/* Cleanup format directory. */
-static int test_format_dir_put(char *dir)
-{
-	char buf[PATH_MAX];
-	snprintf(buf, PATH_MAX, "rm -f %s/*\n", dir);
-	if (system(buf))
-		return -1;
-
-	snprintf(buf, PATH_MAX, "rmdir %s\n", dir);
-	return system(buf);
-}
-
-static struct list_head *test_terms_list(void)
-{
-	static LIST_HEAD(terms);
-	unsigned int i;
-
-	for (i = 0; i < TERMS_CNT; i++)
-		list_add_tail(&test_terms[i].list, &terms);
-
-	return &terms;
-}
-
-#undef TERMS_CNT
-
-int perf_pmu__test(void)
-{
-	char *format = test_format_dir_get();
-	LIST_HEAD(formats);
-	struct list_head *terms = test_terms_list();
-	int ret;
-
-	if (!format)
-		return -EINVAL;
-
-	do {
-		struct perf_event_attr attr;
-
-		memset(&attr, 0, sizeof(attr));
-
-		ret = pmu_format_parse(format, &formats);
-		if (ret)
-			break;
-
-		ret = pmu_config(&formats, &attr, terms);
-		if (ret)
-			break;
-
-		ret = -EINVAL;
-
-		if (attr.config  != 0xc00000000002a823)
-			break;
-		if (attr.config1 != 0x8000400000000145)
-			break;
-		if (attr.config2 != 0x0400000020041d07)
-			break;
-
-		ret = 0;
-	} while (0);
-
-	test_format_dir_put(format);
-	return ret;
-}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index fdeb8ac..a313ed7 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -37,6 +37,9 @@
 struct perf_pmu *perf_pmu__find(char *name);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 		     struct list_head *head_terms);
+int perf_pmu__config_terms(struct list_head *formats,
+			   struct perf_event_attr *attr,
+			   struct list_head *head_terms);
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms);
 struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
 				struct list_head *head_terms);
@@ -46,6 +49,7 @@
 int perf_pmu__new_format(struct list_head *list, char *name,
 			 int config, unsigned long *bits);
 void perf_pmu__set_format(unsigned long *bits, long from, long to);
+int perf_pmu__format_parse(char *dir, struct list_head *head);
 
 struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
 
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index 13d36fa..daa17ae 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -17,59 +17,59 @@
 
 struct pstack *pstack__new(unsigned short max_nr_entries)
 {
-	struct pstack *self = zalloc((sizeof(*self) +
-				     max_nr_entries * sizeof(void *)));
-	if (self != NULL)
-		self->max_nr_entries = max_nr_entries;
-	return self;
+	struct pstack *pstack = zalloc((sizeof(*pstack) +
+				       max_nr_entries * sizeof(void *)));
+	if (pstack != NULL)
+		pstack->max_nr_entries = max_nr_entries;
+	return pstack;
 }
 
-void pstack__delete(struct pstack *self)
+void pstack__delete(struct pstack *pstack)
 {
-	free(self);
+	free(pstack);
 }
 
-bool pstack__empty(const struct pstack *self)
+bool pstack__empty(const struct pstack *pstack)
 {
-	return self->top == 0;
+	return pstack->top == 0;
 }
 
-void pstack__remove(struct pstack *self, void *key)
+void pstack__remove(struct pstack *pstack, void *key)
 {
-	unsigned short i = self->top, last_index = self->top - 1;
+	unsigned short i = pstack->top, last_index = pstack->top - 1;
 
 	while (i-- != 0) {
-		if (self->entries[i] == key) {
+		if (pstack->entries[i] == key) {
 			if (i < last_index)
-				memmove(self->entries + i,
-					self->entries + i + 1,
+				memmove(pstack->entries + i,
+					pstack->entries + i + 1,
 					(last_index - i) * sizeof(void *));
-			--self->top;
+			--pstack->top;
 			return;
 		}
 	}
 	pr_err("%s: %p not on the pstack!\n", __func__, key);
 }
 
-void pstack__push(struct pstack *self, void *key)
+void pstack__push(struct pstack *pstack, void *key)
 {
-	if (self->top == self->max_nr_entries) {
-		pr_err("%s: top=%d, overflow!\n", __func__, self->top);
+	if (pstack->top == pstack->max_nr_entries) {
+		pr_err("%s: top=%d, overflow!\n", __func__, pstack->top);
 		return;
 	}
-	self->entries[self->top++] = key;
+	pstack->entries[pstack->top++] = key;
 }
 
-void *pstack__pop(struct pstack *self)
+void *pstack__pop(struct pstack *pstack)
 {
 	void *ret;
 
-	if (self->top == 0) {
+	if (pstack->top == 0) {
 		pr_err("%s: underflow!\n", __func__);
 		return NULL;
 	}
 
-	ret = self->entries[--self->top];
-	self->entries[self->top] = NULL;
+	ret = pstack->entries[--pstack->top];
+	pstack->entries[pstack->top] = NULL;
 	return ret;
 }
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 9181bf2..a2657fd 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -1015,6 +1015,8 @@
 	    pyrf_cpu_map__setup_types() < 0)
 		return;
 
+	page_size = sysconf(_SC_PAGE_SIZE);
+
 	Py_INCREF(&pyrf_evlist__type);
 	PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type);
 
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
index 0171fb6..a16cdd2 100644
--- a/tools/perf/util/rblist.c
+++ b/tools/perf/util/rblist.c
@@ -44,6 +44,7 @@
 void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node)
 {
 	rb_erase(rb_node, &rblist->entries);
+	--rblist->nr_entries;
 	rblist->node_delete(rblist, rb_node);
 }
 
@@ -87,8 +88,7 @@
 		while (next) {
 			pos = next;
 			next = rb_next(pos);
-			rb_erase(pos, &rblist->entries);
-			rblist->node_delete(rblist, pos);
+			rblist__remove_node(rblist, pos);
 		}
 		free(rblist);
 	}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 730c663..14683df 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -32,7 +32,6 @@
 #include "../event.h"
 #include "../thread.h"
 #include "../trace-event.h"
-#include "../evsel.h"
 
 PyMODINIT_FUNC initperf_trace_context(void);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8cdd232..ce6f511 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1375,15 +1375,13 @@
 {
 	u64 head, page_offset, file_offset, file_pos, progress_next;
 	int err, mmap_prot, mmap_flags, map_idx = 0;
-	size_t	page_size, mmap_size;
+	size_t	mmap_size;
 	char *buf, *mmaps[8];
 	union perf_event *event;
 	uint32_t size;
 
 	perf_tool__fill_defaults(tool);
 
-	page_size = sysconf(_SC_PAGESIZE);
-
 	page_offset = page_size * (data_offset / page_size);
 	file_offset = page_offset;
 	head = data_offset - page_offset;
@@ -1460,6 +1458,7 @@
 	session->ordered_samples.next_flush = ULLONG_MAX;
 	err = flush_sample_queue(session, tool);
 out_err:
+	ui_progress__finish();
 	perf_session__warn_about_errors(session, tool);
 	perf_session_free_sample_buffers(session);
 	return err;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 0eae00a..cea133a 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -4,6 +4,7 @@
 #include "hist.h"
 #include "event.h"
 #include "header.h"
+#include "machine.h"
 #include "symbol.h"
 #include "thread.h"
 #include <linux/rbtree.h>
@@ -68,10 +69,6 @@
 				    struct ip_callchain *chain,
 				    struct symbol **parent);
 
-struct branch_info *machine__resolve_bstack(struct machine *self,
-					    struct thread *thread,
-					    struct branch_stack *bs);
-
 bool perf_session__has_traces(struct perf_session *self, const char *msg);
 
 void mem_bswap_64(void *src, int byte_size);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 5786f32..b4e8c3b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -52,6 +52,22 @@
 	u32			nr_events;
 };
 
+struct hist_entry_diff {
+	bool	computed;
+
+	/* PERF_HPP__DISPL */
+	int	displacement;
+
+	/* PERF_HPP__DELTA */
+	double	period_ratio_delta;
+
+	/* PERF_HPP__RATIO */
+	double	period_ratio;
+
+	/* HISTC_WEIGHTED_DIFF */
+	s64	wdiff;
+};
+
 /**
  * struct hist_entry - histogram entry
  *
@@ -61,12 +77,18 @@
 struct hist_entry {
 	struct rb_node		rb_node_in;
 	struct rb_node		rb_node;
+	union {
+		struct list_head node;
+		struct list_head head;
+	} pairs;
 	struct he_stat		stat;
 	struct map_symbol	ms;
 	struct thread		*thread;
 	u64			ip;
 	s32			cpu;
 
+	struct hist_entry_diff	diff;
+
 	/* XXX These two should move to some tree widget lib */
 	u16			row_offset;
 	u16			nr_rows;
@@ -78,15 +100,30 @@
 	char			*srcline;
 	struct symbol		*parent;
 	unsigned long		position;
-	union {
-		struct hist_entry *pair;
-		struct rb_root	  sorted_chain;
-	};
+	struct rb_root		sorted_chain;
 	struct branch_info	*branch_info;
 	struct hists		*hists;
 	struct callchain_root	callchain[0];
 };
 
+static inline bool hist_entry__has_pairs(struct hist_entry *he)
+{
+	return !list_empty(&he->pairs.node);
+}
+
+static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
+{
+	if (hist_entry__has_pairs(he))
+		return list_entry(he->pairs.node.next, struct hist_entry, pairs.node);
+	return NULL;
+}
+
+static inline void hist__entry_add_pair(struct hist_entry *he,
+					struct hist_entry *pair)
+{
+	list_add_tail(&he->pairs.head, &pair->pairs.node);
+}
+
 enum sort_type {
 	SORT_PID,
 	SORT_COMM,
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 3217059..346707d 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -314,6 +314,24 @@
 }
 
 /**
+ * strxfrchar - Locate and replace character in @s
+ * @s:    The string to be searched/changed.
+ * @from: Source character to be replaced.
+ * @to:   Destination character.
+ *
+ * Return pointer to the changed string.
+ */
+char *strxfrchar(char *s, char from, char to)
+{
+	char *p = s;
+
+	while ((p = strchr(p, from)) != NULL)
+		*p++ = to;
+
+	return s;
+}
+
+/**
  * rtrim - Removes trailing whitespace from @s.
  * @s: The string to be stripped.
  *
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e2e8c69..295f8d4 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -12,6 +12,7 @@
 #include "build-id.h"
 #include "util.h"
 #include "debug.h"
+#include "machine.h"
 #include "symbol.h"
 #include "strlist.h"
 
@@ -23,7 +24,6 @@
 #define KSYM_NAME_LEN 256
 #endif
 
-static void dso_cache__free(struct rb_root *root);
 static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 				symbol_filter_t filter);
 static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
@@ -56,39 +56,6 @@
 
 #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
 
-static enum dso_binary_type binary_type_data[] = {
-	DSO_BINARY_TYPE__BUILD_ID_CACHE,
-	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
-	DSO_BINARY_TYPE__NOT_FOUND,
-};
-
-#define DSO_BINARY_TYPE__DATA_CNT ARRAY_SIZE(binary_type_data)
-
-int dso__name_len(const struct dso *dso)
-{
-	if (!dso)
-		return strlen("[unknown]");
-	if (verbose)
-		return dso->long_name_len;
-
-	return dso->short_name_len;
-}
-
-bool dso__loaded(const struct dso *dso, enum map_type type)
-{
-	return dso->loaded & (1 << type);
-}
-
-bool dso__sorted_by_name(const struct dso *dso, enum map_type type)
-{
-	return dso->sorted_by_name & (1 << type);
-}
-
-static void dso__set_sorted_by_name(struct dso *dso, enum map_type type)
-{
-	dso->sorted_by_name |= (1 << type);
-}
-
 bool symbol_type__is_a(char symbol_type, enum map_type map_type)
 {
 	symbol_type = toupper(symbol_type);
@@ -270,7 +237,7 @@
 	free(((void *)sym) - symbol_conf.priv_size);
 }
 
-static size_t symbol__fprintf(struct symbol *sym, FILE *fp)
+size_t symbol__fprintf(struct symbol *sym, FILE *fp)
 {
 	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
 		       sym->start, sym->end,
@@ -301,53 +268,7 @@
 	return symbol__fprintf_symname_offs(sym, NULL, fp);
 }
 
-void dso__set_long_name(struct dso *dso, char *name)
-{
-	if (name == NULL)
-		return;
-	dso->long_name = name;
-	dso->long_name_len = strlen(name);
-}
-
-static void dso__set_short_name(struct dso *dso, const char *name)
-{
-	if (name == NULL)
-		return;
-	dso->short_name = name;
-	dso->short_name_len = strlen(name);
-}
-
-static void dso__set_basename(struct dso *dso)
-{
-	dso__set_short_name(dso, basename(dso->long_name));
-}
-
-struct dso *dso__new(const char *name)
-{
-	struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
-
-	if (dso != NULL) {
-		int i;
-		strcpy(dso->name, name);
-		dso__set_long_name(dso, dso->name);
-		dso__set_short_name(dso, dso->name);
-		for (i = 0; i < MAP__NR_TYPES; ++i)
-			dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
-		dso->cache = RB_ROOT;
-		dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
-		dso->data_type   = DSO_BINARY_TYPE__NOT_FOUND;
-		dso->loaded = 0;
-		dso->sorted_by_name = 0;
-		dso->has_build_id = 0;
-		dso->kernel = DSO_TYPE_USER;
-		dso->needs_swap = DSO_SWAP__UNSET;
-		INIT_LIST_HEAD(&dso->node);
-	}
-
-	return dso;
-}
-
-static void symbols__delete(struct rb_root *symbols)
+void symbols__delete(struct rb_root *symbols)
 {
 	struct symbol *pos;
 	struct rb_node *next = rb_first(symbols);
@@ -360,25 +281,6 @@
 	}
 }
 
-void dso__delete(struct dso *dso)
-{
-	int i;
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		symbols__delete(&dso->symbols[i]);
-	if (dso->sname_alloc)
-		free((char *)dso->short_name);
-	if (dso->lname_alloc)
-		free(dso->long_name);
-	dso_cache__free(&dso->cache);
-	free(dso);
-}
-
-void dso__set_build_id(struct dso *dso, void *build_id)
-{
-	memcpy(dso->build_id, build_id, sizeof(dso->build_id));
-	dso->has_build_id = 1;
-}
-
 void symbols__insert(struct rb_root *symbols, struct symbol *sym)
 {
 	struct rb_node **p = &symbols->rb_node;
@@ -504,29 +406,6 @@
 				     &dso->symbols[type]);
 }
 
-int build_id__sprintf(const u8 *build_id, int len, char *bf)
-{
-	char *bid = bf;
-	const u8 *raw = build_id;
-	int i;
-
-	for (i = 0; i < len; ++i) {
-		sprintf(bid, "%02x", *raw);
-		++raw;
-		bid += 2;
-	}
-
-	return raw - build_id;
-}
-
-size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
-{
-	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
-
-	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
-	return fprintf(fp, "%s", sbuild_id);
-}
-
 size_t dso__fprintf_symbols_by_name(struct dso *dso,
 				    enum map_type type, FILE *fp)
 {
@@ -542,25 +421,6 @@
 	return ret;
 }
 
-size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
-{
-	struct rb_node *nd;
-	size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
-
-	if (dso->short_name != dso->long_name)
-		ret += fprintf(fp, "%s, ", dso->long_name);
-	ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
-		       dso->loaded ? "" : "NOT ");
-	ret += dso__fprintf_buildid(dso, fp);
-	ret += fprintf(fp, ")\n");
-	for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
-		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
-		ret += symbol__fprintf(pos, fp);
-	}
-
-	return ret;
-}
-
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start))
@@ -892,136 +752,6 @@
 	return -1;
 }
 
-bool dso__build_id_equal(const struct dso *dso, u8 *build_id)
-{
-	return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0;
-}
-
-bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
-{
-	bool have_build_id = false;
-	struct dso *pos;
-
-	list_for_each_entry(pos, head, node) {
-		if (with_hits && !pos->hit)
-			continue;
-		if (pos->has_build_id) {
-			have_build_id = true;
-			continue;
-		}
-		if (filename__read_build_id(pos->long_name, pos->build_id,
-					    sizeof(pos->build_id)) > 0) {
-			have_build_id	  = true;
-			pos->has_build_id = true;
-		}
-	}
-
-	return have_build_id;
-}
-
-char dso__symtab_origin(const struct dso *dso)
-{
-	static const char origin[] = {
-		[DSO_BINARY_TYPE__KALLSYMS]		= 'k',
-		[DSO_BINARY_TYPE__VMLINUX]		= 'v',
-		[DSO_BINARY_TYPE__JAVA_JIT]		= 'j',
-		[DSO_BINARY_TYPE__DEBUGLINK]		= 'l',
-		[DSO_BINARY_TYPE__BUILD_ID_CACHE]	= 'B',
-		[DSO_BINARY_TYPE__FEDORA_DEBUGINFO]	= 'f',
-		[DSO_BINARY_TYPE__UBUNTU_DEBUGINFO]	= 'u',
-		[DSO_BINARY_TYPE__BUILDID_DEBUGINFO]	= 'b',
-		[DSO_BINARY_TYPE__SYSTEM_PATH_DSO]	= 'd',
-		[DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE]	= 'K',
-		[DSO_BINARY_TYPE__GUEST_KALLSYMS]	= 'g',
-		[DSO_BINARY_TYPE__GUEST_KMODULE]	= 'G',
-		[DSO_BINARY_TYPE__GUEST_VMLINUX]	= 'V',
-	};
-
-	if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND)
-		return '!';
-	return origin[dso->symtab_type];
-}
-
-int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
-			  char *root_dir, char *file, size_t size)
-{
-	char build_id_hex[BUILD_ID_SIZE * 2 + 1];
-	int ret = 0;
-
-	switch (type) {
-	case DSO_BINARY_TYPE__DEBUGLINK: {
-		char *debuglink;
-
-		strncpy(file, dso->long_name, size);
-		debuglink = file + dso->long_name_len;
-		while (debuglink != file && *debuglink != '/')
-			debuglink--;
-		if (*debuglink == '/')
-			debuglink++;
-		filename__read_debuglink(dso->long_name, debuglink,
-					 size - (debuglink - file));
-		}
-		break;
-	case DSO_BINARY_TYPE__BUILD_ID_CACHE:
-		/* skip the locally configured cache if a symfs is given */
-		if (symbol_conf.symfs[0] ||
-		    (dso__build_id_filename(dso, file, size) == NULL))
-			ret = -1;
-		break;
-
-	case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
-		snprintf(file, size, "%s/usr/lib/debug%s.debug",
-			 symbol_conf.symfs, dso->long_name);
-		break;
-
-	case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
-		snprintf(file, size, "%s/usr/lib/debug%s",
-			 symbol_conf.symfs, dso->long_name);
-		break;
-
-	case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
-		if (!dso->has_build_id) {
-			ret = -1;
-			break;
-		}
-
-		build_id__sprintf(dso->build_id,
-				  sizeof(dso->build_id),
-				  build_id_hex);
-		snprintf(file, size,
-			 "%s/usr/lib/debug/.build-id/%.2s/%s.debug",
-			 symbol_conf.symfs, build_id_hex, build_id_hex + 2);
-		break;
-
-	case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
-		snprintf(file, size, "%s%s",
-			 symbol_conf.symfs, dso->long_name);
-		break;
-
-	case DSO_BINARY_TYPE__GUEST_KMODULE:
-		snprintf(file, size, "%s%s%s", symbol_conf.symfs,
-			 root_dir, dso->long_name);
-		break;
-
-	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
-		snprintf(file, size, "%s%s", symbol_conf.symfs,
-			 dso->long_name);
-		break;
-
-	default:
-	case DSO_BINARY_TYPE__KALLSYMS:
-	case DSO_BINARY_TYPE__VMLINUX:
-	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
-	case DSO_BINARY_TYPE__GUEST_VMLINUX:
-	case DSO_BINARY_TYPE__JAVA_JIT:
-	case DSO_BINARY_TYPE__NOT_FOUND:
-		ret = -1;
-		break;
-	}
-
-	return ret;
-}
-
 int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 {
 	char *name;
@@ -1157,27 +887,6 @@
 	return NULL;
 }
 
-static int dso__kernel_module_get_build_id(struct dso *dso,
-					   const char *root_dir)
-{
-	char filename[PATH_MAX];
-	/*
-	 * kernel module short names are of the form "[module]" and
-	 * we need just "module" here.
-	 */
-	const char *name = dso->short_name + 1;
-
-	snprintf(filename, sizeof(filename),
-		 "%s/sys/module/%.*s/notes/.note.gnu.build-id",
-		 root_dir, (int)strlen(name) - 1, name);
-
-	if (sysfs__read_build_id(filename, dso->build_id,
-				 sizeof(dso->build_id)) == 0)
-		dso->has_build_id = true;
-
-	return 0;
-}
-
 static int map_groups__set_modules_path_dir(struct map_groups *mg,
 				const char *dir_name)
 {
@@ -1591,50 +1300,6 @@
 	return err;
 }
 
-void dsos__add(struct list_head *head, struct dso *dso)
-{
-	list_add_tail(&dso->node, head);
-}
-
-struct dso *dsos__find(struct list_head *head, const char *name)
-{
-	struct dso *pos;
-
-	list_for_each_entry(pos, head, node)
-		if (strcmp(pos->long_name, name) == 0)
-			return pos;
-	return NULL;
-}
-
-struct dso *__dsos__findnew(struct list_head *head, const char *name)
-{
-	struct dso *dso = dsos__find(head, name);
-
-	if (!dso) {
-		dso = dso__new(name);
-		if (dso != NULL) {
-			dsos__add(head, dso);
-			dso__set_basename(dso);
-		}
-	}
-
-	return dso;
-}
-
-size_t __dsos__fprintf(struct list_head *head, FILE *fp)
-{
-	struct dso *pos;
-	size_t ret = 0;
-
-	list_for_each_entry(pos, head, node) {
-		int i;
-		for (i = 0; i < MAP__NR_TYPES; ++i)
-			ret += dso__fprintf(pos, i, fp);
-	}
-
-	return ret;
-}
-
 size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp)
 {
 	struct rb_node *nd;
@@ -1649,21 +1314,6 @@
 	return ret;
 }
 
-static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
-				      bool with_hits)
-{
-	struct dso *pos;
-	size_t ret = 0;
-
-	list_for_each_entry(pos, head, node) {
-		if (with_hits && !pos->hit)
-			continue;
-		ret += dso__fprintf_buildid(pos, fp);
-		ret += fprintf(fp, " %s\n", pos->long_name);
-	}
-	return ret;
-}
-
 size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
 				     bool with_hits)
 {
@@ -1684,39 +1334,6 @@
 	return ret;
 }
 
-static struct dso*
-dso__kernel_findnew(struct machine *machine, const char *name,
-		    const char *short_name, int dso_type)
-{
-	/*
-	 * The kernel dso could be created by build_id processing.
-	 */
-	struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name);
-
-	/*
-	 * We need to run this in all cases, since during the build_id
-	 * processing we had no idea this was the kernel dso.
-	 */
-	if (dso != NULL) {
-		dso__set_short_name(dso, short_name);
-		dso->kernel = dso_type;
-	}
-
-	return dso;
-}
-
-void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
-{
-	char path[PATH_MAX];
-
-	if (machine__is_default_guest(machine))
-		return;
-	sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
-	if (sysfs__read_build_id(path, dso->build_id,
-				 sizeof(dso->build_id)) == 0)
-		dso->has_build_id = true;
-}
-
 static struct dso *machine__get_kernel(struct machine *machine)
 {
 	const char *vmlinux_name = NULL;
@@ -2065,49 +1682,6 @@
 	return machine__create_kernel_maps(machine);
 }
 
-static int hex(char ch)
-{
-	if ((ch >= '0') && (ch <= '9'))
-		return ch - '0';
-	if ((ch >= 'a') && (ch <= 'f'))
-		return ch - 'a' + 10;
-	if ((ch >= 'A') && (ch <= 'F'))
-		return ch - 'A' + 10;
-	return -1;
-}
-
-/*
- * While we find nice hex chars, build a long_val.
- * Return number of chars processed.
- */
-int hex2u64(const char *ptr, u64 *long_val)
-{
-	const char *p = ptr;
-	*long_val = 0;
-
-	while (*p) {
-		const int hex_val = hex(*p);
-
-		if (hex_val < 0)
-			break;
-
-		*long_val = (*long_val << 4) | hex_val;
-		p++;
-	}
-
-	return p - ptr;
-}
-
-char *strxfrchar(char *s, char from, char to)
-{
-	char *p = s;
-
-	while ((p = strchr(p, from)) != NULL)
-		*p++ = to;
-
-	return s;
-}
-
 int machines__create_guest_kernel_maps(struct rb_root *machines)
 {
 	int ret = 0;
@@ -2202,229 +1776,3 @@
 
 	return ret;
 }
-
-struct map *dso__new_map(const char *name)
-{
-	struct map *map = NULL;
-	struct dso *dso = dso__new(name);
-
-	if (dso)
-		map = map__new2(0, dso, MAP__FUNCTION);
-
-	return map;
-}
-
-static int open_dso(struct dso *dso, struct machine *machine)
-{
-	char *root_dir = (char *) "";
-	char *name;
-	int fd;
-
-	name = malloc(PATH_MAX);
-	if (!name)
-		return -ENOMEM;
-
-	if (machine)
-		root_dir = machine->root_dir;
-
-	if (dso__binary_type_file(dso, dso->data_type,
-				  root_dir, name, PATH_MAX)) {
-		free(name);
-		return -EINVAL;
-	}
-
-	fd = open(name, O_RDONLY);
-	free(name);
-	return fd;
-}
-
-int dso__data_fd(struct dso *dso, struct machine *machine)
-{
-	int i = 0;
-
-	if (dso->data_type != DSO_BINARY_TYPE__NOT_FOUND)
-		return open_dso(dso, machine);
-
-	do {
-		int fd;
-
-		dso->data_type = binary_type_data[i++];
-
-		fd = open_dso(dso, machine);
-		if (fd >= 0)
-			return fd;
-
-	} while (dso->data_type != DSO_BINARY_TYPE__NOT_FOUND);
-
-	return -EINVAL;
-}
-
-static void
-dso_cache__free(struct rb_root *root)
-{
-	struct rb_node *next = rb_first(root);
-
-	while (next) {
-		struct dso_cache *cache;
-
-		cache = rb_entry(next, struct dso_cache, rb_node);
-		next = rb_next(&cache->rb_node);
-		rb_erase(&cache->rb_node, root);
-		free(cache);
-	}
-}
-
-static struct dso_cache*
-dso_cache__find(struct rb_root *root, u64 offset)
-{
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct dso_cache *cache;
-
-	while (*p != NULL) {
-		u64 end;
-
-		parent = *p;
-		cache = rb_entry(parent, struct dso_cache, rb_node);
-		end = cache->offset + DSO__DATA_CACHE_SIZE;
-
-		if (offset < cache->offset)
-			p = &(*p)->rb_left;
-		else if (offset >= end)
-			p = &(*p)->rb_right;
-		else
-			return cache;
-	}
-	return NULL;
-}
-
-static void
-dso_cache__insert(struct rb_root *root, struct dso_cache *new)
-{
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct dso_cache *cache;
-	u64 offset = new->offset;
-
-	while (*p != NULL) {
-		u64 end;
-
-		parent = *p;
-		cache = rb_entry(parent, struct dso_cache, rb_node);
-		end = cache->offset + DSO__DATA_CACHE_SIZE;
-
-		if (offset < cache->offset)
-			p = &(*p)->rb_left;
-		else if (offset >= end)
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&new->rb_node, parent, p);
-	rb_insert_color(&new->rb_node, root);
-}
-
-static ssize_t
-dso_cache__memcpy(struct dso_cache *cache, u64 offset,
-		  u8 *data, u64 size)
-{
-	u64 cache_offset = offset - cache->offset;
-	u64 cache_size   = min(cache->size - cache_offset, size);
-
-	memcpy(data, cache->data + cache_offset, cache_size);
-	return cache_size;
-}
-
-static ssize_t
-dso_cache__read(struct dso *dso, struct machine *machine,
-		 u64 offset, u8 *data, ssize_t size)
-{
-	struct dso_cache *cache;
-	ssize_t ret;
-	int fd;
-
-	fd = dso__data_fd(dso, machine);
-	if (fd < 0)
-		return -1;
-
-	do {
-		u64 cache_offset;
-
-		ret = -ENOMEM;
-
-		cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
-		if (!cache)
-			break;
-
-		cache_offset = offset & DSO__DATA_CACHE_MASK;
-		ret = -EINVAL;
-
-		if (-1 == lseek(fd, cache_offset, SEEK_SET))
-			break;
-
-		ret = read(fd, cache->data, DSO__DATA_CACHE_SIZE);
-		if (ret <= 0)
-			break;
-
-		cache->offset = cache_offset;
-		cache->size   = ret;
-		dso_cache__insert(&dso->cache, cache);
-
-		ret = dso_cache__memcpy(cache, offset, data, size);
-
-	} while (0);
-
-	if (ret <= 0)
-		free(cache);
-
-	close(fd);
-	return ret;
-}
-
-static ssize_t dso_cache_read(struct dso *dso, struct machine *machine,
-			      u64 offset, u8 *data, ssize_t size)
-{
-	struct dso_cache *cache;
-
-	cache = dso_cache__find(&dso->cache, offset);
-	if (cache)
-		return dso_cache__memcpy(cache, offset, data, size);
-	else
-		return dso_cache__read(dso, machine, offset, data, size);
-}
-
-ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
-			      u64 offset, u8 *data, ssize_t size)
-{
-	ssize_t r = 0;
-	u8 *p = data;
-
-	do {
-		ssize_t ret;
-
-		ret = dso_cache_read(dso, machine, offset, p, size);
-		if (ret < 0)
-			return ret;
-
-		/* Reached EOF, return what we have. */
-		if (!ret)
-			break;
-
-		BUG_ON(ret > size);
-
-		r      += ret;
-		p      += ret;
-		offset += ret;
-		size   -= ret;
-
-	} while (size);
-
-	return r;
-}
-
-ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
-			    struct machine *machine, u64 addr,
-			    u8 *data, ssize_t size)
-{
-	u64 offset = map->map_ip(map, addr);
-	return dso__data_read_offset(dso, machine, offset, data, size);
-}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 8b6ef7f..de68f98 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -11,6 +11,7 @@
 #include <stdio.h>
 #include <byteswap.h>
 #include <libgen.h>
+#include "build-id.h"
 
 #ifdef LIBELF_SUPPORT
 #include <libelf.h>
@@ -18,6 +19,8 @@
 #include <elf.h>
 #endif
 
+#include "dso.h"
+
 #ifdef HAVE_CPLUS_DEMANGLE
 extern char *cplus_demangle(const char *, int);
 
@@ -39,9 +42,6 @@
 #endif
 #endif
 
-int hex2u64(const char *ptr, u64 *val);
-char *strxfrchar(char *s, char from, char to);
-
 /*
  * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
  * for newer versions we can use mmap to reduce memory usage:
@@ -57,8 +57,6 @@
 #define DMGL_ANSI        (1 << 1)       /* Include const, volatile, etc */
 #endif
 
-#define BUILD_ID_SIZE 20
-
 /** struct symbol - symtab entry
  *
  * @ignore - resolvable but tools ignore it (e.g. idle routines)
@@ -74,6 +72,7 @@
 };
 
 void symbol__delete(struct symbol *sym);
+void symbols__delete(struct rb_root *symbols);
 
 static inline size_t symbol__size(const struct symbol *sym)
 {
@@ -164,70 +163,6 @@
 	s32	      cpu;
 };
 
-enum dso_binary_type {
-	DSO_BINARY_TYPE__KALLSYMS = 0,
-	DSO_BINARY_TYPE__GUEST_KALLSYMS,
-	DSO_BINARY_TYPE__VMLINUX,
-	DSO_BINARY_TYPE__GUEST_VMLINUX,
-	DSO_BINARY_TYPE__JAVA_JIT,
-	DSO_BINARY_TYPE__DEBUGLINK,
-	DSO_BINARY_TYPE__BUILD_ID_CACHE,
-	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
-	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
-	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
-	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
-	DSO_BINARY_TYPE__GUEST_KMODULE,
-	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
-	DSO_BINARY_TYPE__NOT_FOUND,
-};
-
-enum dso_kernel_type {
-	DSO_TYPE_USER = 0,
-	DSO_TYPE_KERNEL,
-	DSO_TYPE_GUEST_KERNEL
-};
-
-enum dso_swap_type {
-	DSO_SWAP__UNSET,
-	DSO_SWAP__NO,
-	DSO_SWAP__YES,
-};
-
-#define DSO__DATA_CACHE_SIZE 4096
-#define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1)
-
-struct dso_cache {
-	struct rb_node	rb_node;
-	u64 offset;
-	u64 size;
-	char data[0];
-};
-
-struct dso {
-	struct list_head node;
-	struct rb_root	 symbols[MAP__NR_TYPES];
-	struct rb_root	 symbol_names[MAP__NR_TYPES];
-	struct rb_root	 cache;
-	enum dso_kernel_type	kernel;
-	enum dso_swap_type	needs_swap;
-	enum dso_binary_type	symtab_type;
-	enum dso_binary_type	data_type;
-	u8		 adjust_symbols:1;
-	u8		 has_build_id:1;
-	u8		 hit:1;
-	u8		 annotate_warned:1;
-	u8		 sname_alloc:1;
-	u8		 lname_alloc:1;
-	u8		 sorted_by_name;
-	u8		 loaded;
-	u8		 build_id[BUILD_ID_SIZE];
-	const char	 *short_name;
-	char	 	 *long_name;
-	u16		 long_name_len;
-	u16		 short_name_len;
-	char		 name[0];
-};
-
 struct symsrc {
 	char *name;
 	int fd;
@@ -258,47 +193,6 @@
 bool symsrc__has_symtab(struct symsrc *ss);
 bool symsrc__possibly_runtime(struct symsrc *ss);
 
-#define DSO__SWAP(dso, type, val)			\
-({							\
-	type ____r = val;				\
-	BUG_ON(dso->needs_swap == DSO_SWAP__UNSET);	\
-	if (dso->needs_swap == DSO_SWAP__YES) {		\
-		switch (sizeof(____r)) {		\
-		case 2:					\
-			____r = bswap_16(val);		\
-			break;				\
-		case 4:					\
-			____r = bswap_32(val);		\
-			break;				\
-		case 8:					\
-			____r = bswap_64(val);		\
-			break;				\
-		default:				\
-			BUG_ON(1);			\
-		}					\
-	}						\
-	____r;						\
-})
-
-struct dso *dso__new(const char *name);
-void dso__delete(struct dso *dso);
-
-int dso__name_len(const struct dso *dso);
-
-bool dso__loaded(const struct dso *dso, enum map_type type);
-bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
-
-static inline void dso__set_loaded(struct dso *dso, enum map_type type)
-{
-	dso->loaded |= (1 << type);
-}
-
-void dso__sort_by_name(struct dso *dso, enum map_type type);
-
-void dsos__add(struct list_head *head, struct dso *dso);
-struct dso *dsos__find(struct list_head *head, const char *name);
-struct dso *__dsos__findnew(struct list_head *head, const char *name);
-
 int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter);
 int dso__load_vmlinux(struct dso *dso, struct map *map,
 		      const char *vmlinux, symbol_filter_t filter);
@@ -306,30 +200,7 @@
 			   symbol_filter_t filter);
 int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
 		       symbol_filter_t filter);
-int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type, symbol_filter_t filter);
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
-			       symbol_filter_t filter);
 
-size_t __dsos__fprintf(struct list_head *head, FILE *fp);
-
-size_t machine__fprintf_dsos_buildid(struct machine *machine,
-				     FILE *fp, bool with_hits);
-size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp);
-size_t machines__fprintf_dsos_buildid(struct rb_root *machines,
-				      FILE *fp, bool with_hits);
-size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
-size_t dso__fprintf_symbols_by_name(struct dso *dso,
-				    enum map_type type, FILE *fp);
-size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
-
-char dso__symtab_origin(const struct dso *dso);
-void dso__set_long_name(struct dso *dso, char *name);
-void dso__set_build_id(struct dso *dso, void *build_id);
-bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
-void dso__read_running_kernel_build_id(struct dso *dso,
-				       struct machine *machine);
-struct map *dso__new_map(const char *name);
 struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
 				u64 addr);
 struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
@@ -337,22 +208,12 @@
 
 int filename__read_build_id(const char *filename, void *bf, size_t size);
 int sysfs__read_build_id(const char *filename, void *bf, size_t size);
-bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
-int build_id__sprintf(const u8 *build_id, int len, char *bf);
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start));
 int filename__read_debuglink(const char *filename, char *debuglink,
 			     size_t size);
 
-void machine__destroy_kernel_maps(struct machine *machine);
-int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
-int machine__create_kernel_maps(struct machine *machine);
-
-int machines__create_kernel_maps(struct rb_root *machines, pid_t pid);
-int machines__create_guest_kernel_maps(struct rb_root *machines);
-void machines__destroy_guest_kernel_maps(struct rb_root *machines);
-
 int symbol__init(void);
 void symbol__exit(void);
 void symbol__elf_init(void);
@@ -360,20 +221,9 @@
 size_t symbol__fprintf_symname_offs(const struct symbol *sym,
 				    const struct addr_location *al, FILE *fp);
 size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
+size_t symbol__fprintf(struct symbol *sym, FILE *fp);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 
-size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
-
-int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
-			  char *root_dir, char *file, size_t size);
-
-int dso__data_fd(struct dso *dso, struct machine *machine);
-ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
-			      u64 offset, u8 *data, ssize_t size);
-ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
-			    struct machine *machine, u64 addr,
-			    u8 *data, ssize_t size);
-int dso__test_data(void);
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		  struct symsrc *runtime_ss, symbol_filter_t filter,
 		  int kmodule);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 8b3e593..df59623 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,7 +7,7 @@
 #include "util.h"
 #include "debug.h"
 
-static struct thread *thread__new(pid_t pid)
+struct thread *thread__new(pid_t pid)
 {
 	struct thread *self = zalloc(sizeof(*self));
 
@@ -60,45 +60,6 @@
 	       map_groups__fprintf(&self->mg, verbose, fp);
 }
 
-struct thread *machine__findnew_thread(struct machine *self, pid_t pid)
-{
-	struct rb_node **p = &self->threads.rb_node;
-	struct rb_node *parent = NULL;
-	struct thread *th;
-
-	/*
-	 * Font-end cache - PID lookups come in blocks,
-	 * so most of the time we dont have to look up
-	 * the full rbtree:
-	 */
-	if (self->last_match && self->last_match->pid == pid)
-		return self->last_match;
-
-	while (*p != NULL) {
-		parent = *p;
-		th = rb_entry(parent, struct thread, rb_node);
-
-		if (th->pid == pid) {
-			self->last_match = th;
-			return th;
-		}
-
-		if (pid < th->pid)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	th = thread__new(pid);
-	if (th != NULL) {
-		rb_link_node(&th->rb_node, parent, p);
-		rb_insert_color(&th->rb_node, &self->threads);
-		self->last_match = th;
-	}
-
-	return th;
-}
-
 void thread__insert_map(struct thread *self, struct map *map)
 {
 	map_groups__fixup_overlappings(&self->mg, map, verbose, stderr);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index f66610b..f2fa17c 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -3,6 +3,7 @@
 
 #include <linux/rbtree.h>
 #include <unistd.h>
+#include <sys/types.h>
 #include "symbol.h"
 
 struct thread {
@@ -22,6 +23,7 @@
 
 struct machine;
 
+struct thread *thread__new(pid_t pid);
 void thread__delete(struct thread *self);
 
 int thread__set_comm(struct thread *self, const char *comm);
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 719ed74..3741572 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -47,8 +47,6 @@
 int host_bigendian;
 static int long_size;
 
-static unsigned long	page_size;
-
 static ssize_t calc_data_size;
 static bool repipe;
 
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 9966459..5906e84 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -10,6 +10,8 @@
 /*
  * XXX We need to find a better place for these things...
  */
+unsigned int page_size;
+
 bool perf_host  = true;
 bool perf_guest = false;
 
@@ -164,6 +166,39 @@
 	return n;
 }
 
+static int hex(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	if ((ch >= 'A') && (ch <= 'F'))
+		return ch - 'A' + 10;
+	return -1;
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+int hex2u64(const char *ptr, u64 *long_val)
+{
+	const char *p = ptr;
+	*long_val = 0;
+
+	while (*p) {
+		const int hex_val = hex(*p);
+
+		if (hex_val < 0)
+			break;
+
+		*long_val = (*long_val << 4) | hex_val;
+		p++;
+	}
+
+	return p - ptr;
+}
+
 /* Obtain a backtrace and print it to stdout. */
 #ifdef BACKTRACE_SUPPORT
 void dump_stack(void)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 70fa70b..c233091 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -198,6 +198,10 @@
 #undef tolower
 #undef toupper
 
+#ifndef NSEC_PER_MSEC
+#define NSEC_PER_MSEC	1000000L
+#endif
+
 extern unsigned char sane_ctype[256];
 #define GIT_SPACE		0x01
 #define GIT_DIGIT		0x02
@@ -236,6 +240,7 @@
 bool strglobmatch(const char *str, const char *pat);
 bool strlazymatch(const char *str, const char *pat);
 int strtailcmp(const char *s1, const char *s2);
+char *strxfrchar(char *s, char from, char to);
 unsigned long convert_unit(unsigned long value, char *unit);
 int readn(int fd, void *buf, size_t size);
 
@@ -258,9 +263,12 @@
 }
 
 size_t hex_width(u64 v);
+int hex2u64(const char *ptr, u64 *val);
 
 char *rtrim(char *s);
 
 void dump_stack(void);
 
+extern unsigned int page_size;
+
 #endif