Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (470 commits)
  x86: Fix comments of register/stack access functions
  perf tools: Replace %m with %a in sscanf
  hw-breakpoints: Keep track of user disabled breakpoints
  tracing/syscalls: Make syscall events print callbacks static
  tracing: Add DEFINE_EVENT(), DEFINE_SINGLE_EVENT() support to docbook
  perf: Don't free perf_mmap_data until work has been done
  perf_event: Fix compile error
  perf tools: Fix _GNU_SOURCE macro related strndup() build error
  trace_syscalls: Remove unused syscall_name_to_nr()
  trace_syscalls: Simplify syscall profile
  trace_syscalls: Remove duplicate init_enter_##sname()
  trace_syscalls: Add syscall_nr field to struct syscall_metadata
  trace_syscalls: Remove enter_id exit_id
  trace_syscalls: Set event_enter_##sname->data to its metadata
  trace_syscalls: Remove unused event_syscall_enter and event_syscall_exit
  perf_event: Initialize data.period in perf_swevent_hrtimer()
  perf probe: Simplify event naming
  perf probe: Add --list option for listing current probe events
  perf probe: Add argv_split() from lib/argv_split.c
  perf probe: Move probe event utility functions to probe-event.c
  ...
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
index b0756d0..8bca1d5 100644
--- a/Documentation/DocBook/tracepoint.tmpl
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -86,4 +86,9 @@
 !Iinclude/trace/events/irq.h
   </chapter>
 
+  <chapter id="signal">
+   <title>SIGNAL</title>
+!Iinclude/trace/events/signal.h
+  </chapter>
+
 </book>
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
new file mode 100644
index 0000000..47aabee
--- /dev/null
+++ b/Documentation/trace/kprobetrace.txt
@@ -0,0 +1,149 @@
+                        Kprobe-based Event Tracing
+                        ==========================
+
+                 Documentation is written by Masami Hiramatsu
+
+
+Overview
+--------
+These events are similar to tracepoint based events. Instead of Tracepoint,
+this is based on kprobes (kprobe and kretprobe). So it can probe wherever
+kprobes can probe (this means, all functions body except for __kprobes
+functions). Unlike the Tracepoint based event, this can be added and removed
+dynamically, on the fly.
+
+To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y.
+
+Similar to the events tracer, this doesn't need to be activated via
+current_tracer. Instead of that, add probe points via
+/sys/kernel/debug/tracing/kprobe_events, and enable it via
+/sys/kernel/debug/tracing/events/kprobes/<EVENT>/enabled.
+
+
+Synopsis of kprobe_events
+-------------------------
+  p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS]	: Set a probe
+  r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS]		: Set a return probe
+
+ GRP		: Group name. If omitted, use "kprobes" for it.
+ EVENT		: Event name. If omitted, the event name is generated
+		  based on SYMBOL+offs or MEMADDR.
+ SYMBOL[+offs]	: Symbol+offset where the probe is inserted.
+ MEMADDR	: Address where the probe is inserted.
+
+ FETCHARGS	: Arguments. Each probe can have up to 128 args.
+  %REG		: Fetch register REG
+  @ADDR		: Fetch memory at ADDR (ADDR should be in kernel)
+  @SYM[+|-offs]	: Fetch memory at SYM +|- offs (SYM should be a data symbol)
+  $stackN	: Fetch Nth entry of stack (N >= 0)
+  $stack	: Fetch stack address.
+  $argN		: Fetch function argument. (N >= 0)(*)
+  $retval	: Fetch return value.(**)
+  +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
+  NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
+
+  (*) aN may not correct on asmlinkaged functions and at the middle of
+      function body.
+  (**) only for return probe.
+  (***) this is useful for fetching a field of data structures.
+
+
+Per-Probe Event Filtering
+-------------------------
+ Per-probe event filtering feature allows you to set different filter on each
+probe and gives you what arguments will be shown in trace buffer. If an event
+name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event
+under tracing/events/kprobes/<EVENT>, at the directory you can see 'id',
+'enabled', 'format' and 'filter'.
+
+enabled:
+  You can enable/disable the probe by writing 1 or 0 on it.
+
+format:
+  This shows the format of this probe event.
+
+filter:
+  You can write filtering rules of this event.
+
+id:
+  This shows the id of this probe event.
+
+
+Event Profiling
+---------------
+ You can check the total number of probe hits and probe miss-hits via
+/sys/kernel/debug/tracing/kprobe_profile.
+ The first column is event name, the second is the number of probe hits,
+the third is the number of probe miss-hits.
+
+
+Usage examples
+--------------
+To add a probe as a new event, write a new definition to kprobe_events
+as below.
+
+  echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events
+
+ This sets a kprobe on the top of do_sys_open() function with recording
+1st to 4th arguments as "myprobe" event. As this example shows, users can
+choose more familiar names for each arguments.
+
+  echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
+
+ This sets a kretprobe on the return point of do_sys_open() function with
+recording return value as "myretprobe" event.
+ You can see the format of these events via
+/sys/kernel/debug/tracing/events/kprobes/<EVENT>/format.
+
+  cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
+name: myprobe
+ID: 75
+format:
+	field:unsigned short common_type;	offset:0;	size:2;
+	field:unsigned char common_flags;	offset:2;	size:1;
+	field:unsigned char common_preempt_count;	offset:3;	size:1;
+	field:int common_pid;	offset:4;	size:4;
+	field:int common_tgid;	offset:8;	size:4;
+
+	field: unsigned long ip;	offset:16;tsize:8;
+	field: int nargs;	offset:24;tsize:4;
+	field: unsigned long dfd;	offset:32;tsize:8;
+	field: unsigned long filename;	offset:40;tsize:8;
+	field: unsigned long flags;	offset:48;tsize:8;
+	field: unsigned long mode;	offset:56;tsize:8;
+
+print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
+
+
+ You can see that the event has 4 arguments as in the expressions you specified.
+
+  echo > /sys/kernel/debug/tracing/kprobe_events
+
+ This clears all probe points.
+
+ Right after definition, each event is disabled by default. For tracing these
+events, you need to enable it.
+
+  echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
+  echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
+
+ And you can see the traced information via /sys/kernel/debug/tracing/trace.
+
+  cat /sys/kernel/debug/tracing/trace
+# tracer: nop
+#
+#           TASK-PID    CPU#    TIMESTAMP  FUNCTION
+#              | |       |          |         |
+           <...>-1447  [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
+           <...>-1447  [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe
+           <...>-1447  [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6
+           <...>-1447  [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
+           <...>-1447  [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10
+           <...>-1447  [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
+
+
+ Each line shows when the kernel hits an event, and <- SYMBOL means kernel
+returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
+returns from do_sys_open to sys_open+0x1b).
+
+
diff --git a/arch/Kconfig b/arch/Kconfig
index 7f418bb..eef3bbb 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -126,4 +126,11 @@
 config HAVE_DEFAULT_NO_SPIN_MUTEXES
 	bool
 
+config HAVE_HW_BREAKPOINT
+	bool
+	depends on HAVE_PERF_EVENTS
+	select ANON_INODES
+	select PERF_EVENTS
+
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 3b10051..bf3382f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -46,7 +46,7 @@
 
 config HCALL_STATS
 	bool "Hypervisor call instrumentation"
-	depends on PPC_PSERIES && DEBUG_FS
+	depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
 	help
 	  Adds code to keep track of the number of hypervisor calls made and
 	  the amount of time spent in hypervisor calls.  Wall time spent in
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index f1889ab..c568329 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -1683,7 +1683,7 @@
 CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
-CONFIG_HCALL_STATS=y
+# CONFIG_HCALL_STATS is not set
 # CONFIG_CODE_PATCHING_SELFTEST is not set
 # CONFIG_FTR_FIXUP_SELFTEST is not set
 # CONFIG_MSI_BITMAP_SELFTEST is not set
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index 9154e85..f0fb4fc 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -19,6 +19,7 @@
 #define _ASM_POWERPC_EMULATED_OPS_H
 
 #include <asm/atomic.h>
+#include <linux/perf_event.h>
 
 
 #ifdef CONFIG_PPC_EMULATED_STATS
@@ -57,7 +58,7 @@
 
 extern void ppc_warn_emulated_print(const char *type);
 
-#define PPC_WARN_EMULATED(type)						 \
+#define __PPC_WARN_EMULATED(type)					 \
 	do {								 \
 		atomic_inc(&ppc_emulated.type.val);			 \
 		if (ppc_warn_emulated)					 \
@@ -66,8 +67,22 @@
 
 #else /* !CONFIG_PPC_EMULATED_STATS */
 
-#define PPC_WARN_EMULATED(type)	do { } while (0)
+#define __PPC_WARN_EMULATED(type)	do { } while (0)
 
 #endif /* !CONFIG_PPC_EMULATED_STATS */
 
+#define PPC_WARN_EMULATED(type, regs)					\
+	do {								\
+		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,		\
+			1, 0, regs, 0);					\
+		__PPC_WARN_EMULATED(type);				\
+	} while (0)
+
+#define PPC_WARN_ALIGNMENT(type, regs)					\
+	do {								\
+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,		\
+			1, 0, regs, regs->dar);				\
+		__PPC_WARN_EMULATED(type);				\
+	} while (0)
+
 #endif /* _ASM_POWERPC_EMULATED_OPS_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 6251a4b..c27caac 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -274,6 +274,8 @@
 	unsigned long	num_calls;	/* number of calls (on this CPU) */
 	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
 	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
+	unsigned long	tb_start;
+	unsigned long	purr_start;
 };
 #define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)
 
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 6315edc..bc8dd53 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -489,6 +489,8 @@
 #define SPRN_MMCR1	798
 #define SPRN_MMCRA	0x312
 #define   MMCRA_SDSYNC	0x80000000UL /* SDAR synced with SIAR */
+#define   MMCRA_SDAR_DCACHE_MISS 0x40000000UL
+#define   MMCRA_SDAR_ERAT_MISS   0x20000000UL
 #define   MMCRA_SIHV	0x10000000UL /* state of MSR HV when SIAR set */
 #define   MMCRA_SIPR	0x08000000UL /* state of MSR PR when SIAR set */
 #define   MMCRA_SLOT	0x07000000UL /* SLOT bits (37-39) */
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
new file mode 100644
index 0000000..cbe2297
--- /dev/null
+++ b/arch/powerpc/include/asm/trace.h
@@ -0,0 +1,133 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM powerpc
+
+#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWERPC_H
+
+#include <linux/tracepoint.h>
+
+struct pt_regs;
+
+TRACE_EVENT(irq_entry,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs),
+
+	TP_STRUCT__entry(
+		__field(struct pt_regs *, regs)
+	),
+
+	TP_fast_assign(
+		__entry->regs = regs;
+	),
+
+	TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(irq_exit,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs),
+
+	TP_STRUCT__entry(
+		__field(struct pt_regs *, regs)
+	),
+
+	TP_fast_assign(
+		__entry->regs = regs;
+	),
+
+	TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(timer_interrupt_entry,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs),
+
+	TP_STRUCT__entry(
+		__field(struct pt_regs *, regs)
+	),
+
+	TP_fast_assign(
+		__entry->regs = regs;
+	),
+
+	TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(timer_interrupt_exit,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs),
+
+	TP_STRUCT__entry(
+		__field(struct pt_regs *, regs)
+	),
+
+	TP_fast_assign(
+		__entry->regs = regs;
+	),
+
+	TP_printk("pt_regs=%p", __entry->regs)
+);
+
+#ifdef CONFIG_PPC_PSERIES
+extern void hcall_tracepoint_regfunc(void);
+extern void hcall_tracepoint_unregfunc(void);
+
+TRACE_EVENT_FN(hcall_entry,
+
+	TP_PROTO(unsigned long opcode, unsigned long *args),
+
+	TP_ARGS(opcode, args),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+	),
+
+	TP_printk("opcode=%lu", __entry->opcode),
+
+	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+
+TRACE_EVENT_FN(hcall_exit,
+
+	TP_PROTO(unsigned long opcode, unsigned long retval,
+		unsigned long *retbuf),
+
+	TP_ARGS(opcode, retval, retbuf),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+		__field(unsigned long, retval)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+		__entry->retval = retval;
+	),
+
+	TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
+
+	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+#endif
+
+#endif /* _TRACE_POWERPC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index a5b632e5..3839839 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -732,7 +732,7 @@
 
 #ifdef CONFIG_SPE
 	if ((instr >> 26) == 0x4) {
-		PPC_WARN_EMULATED(spe);
+		PPC_WARN_ALIGNMENT(spe, regs);
 		return emulate_spe(regs, reg, instr);
 	}
 #endif
@@ -786,7 +786,7 @@
 			flags |= SPLT;
 			nb = 8;
 		}
-		PPC_WARN_EMULATED(vsx);
+		PPC_WARN_ALIGNMENT(vsx, regs);
 		return emulate_vsx(addr, reg, areg, regs, flags, nb);
 	}
 #endif
@@ -794,7 +794,7 @@
 	 * the exception of DCBZ which is handled as a special case here
 	 */
 	if (instr == DCBZ) {
-		PPC_WARN_EMULATED(dcbz);
+		PPC_WARN_ALIGNMENT(dcbz, regs);
 		return emulate_dcbz(regs, addr);
 	}
 	if (unlikely(nb == 0))
@@ -804,7 +804,7 @@
 	 * function
 	 */
 	if (flags & M) {
-		PPC_WARN_EMULATED(multiple);
+		PPC_WARN_ALIGNMENT(multiple, regs);
 		return emulate_multiple(regs, addr, reg, nb,
 					flags, instr, swiz);
 	}
@@ -825,11 +825,11 @@
 
 	/* Special case for 16-byte FP loads and stores */
 	if (nb == 16) {
-		PPC_WARN_EMULATED(fp_pair);
+		PPC_WARN_ALIGNMENT(fp_pair, regs);
 		return emulate_fp_pair(addr, reg, flags);
 	}
 
-	PPC_WARN_EMULATED(unaligned);
+	PPC_WARN_ALIGNMENT(unaligned, regs);
 
 	/* If we are loading, get the data from user space, else
 	 * get it from register values
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 9763267..bdcb557 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -551,7 +551,7 @@
 BEGIN_FW_FTR_SECTION
 	ld	r5,SOFTE(r1)
 FW_FTR_SECTION_ELSE
-	b	iseries_check_pending_irqs
+	b	.Liseries_check_pending_irqs
 ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 2:
 	TRACE_AND_RESTORE_IRQ(r5);
@@ -623,7 +623,7 @@
 
 #endif /* CONFIG_PPC_BOOK3E */
 
-iseries_check_pending_irqs:
+.Liseries_check_pending_irqs:
 #ifdef CONFIG_PPC_ISERIES
 	ld	r5,SOFTE(r1)
 	cmpdi	0,r5,0
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1808876..c7eb4e0 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -185,12 +185,15 @@
 	 * prolog code of the PerformanceMonitor one. A little
 	 * trickery is thus necessary
 	 */
+performance_monitor_pSeries_1:
 	. = 0xf00
 	b	performance_monitor_pSeries
 
+altivec_unavailable_pSeries_1:
 	. = 0xf20
 	b	altivec_unavailable_pSeries
 
+vsx_unavailable_pSeries_1:
 	. = 0xf40
 	b	vsx_unavailable_pSeries
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index e5d1211..02a3346 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -70,6 +70,8 @@
 #include <asm/firmware.h>
 #include <asm/lv1call.h>
 #endif
+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
 
 int __irq_offset_value;
 static int ppc_spurious_interrupts;
@@ -325,6 +327,8 @@
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	unsigned int irq;
 
+	trace_irq_entry(regs);
+
 	irq_enter();
 
 	check_stack_overflow();
@@ -348,6 +352,8 @@
 		timer_interrupt(regs);
 	}
 #endif
+
+	trace_irq_exit(regs);
 }
 
 void __init init_IRQ(void)
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 87f1663..1eb85fb 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1165,7 +1165,7 @@
 	 */
 	if (record) {
 		struct perf_sample_data data = {
-			.addr	= 0,
+			.addr	= ~0ULL,
 			.period	= event->hw.last_period,
 		};
 
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 0f4c1c7..199de52 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -73,10 +73,6 @@
 #define MMCR1_PMCSEL_MSK	0x7f
 
 /*
- * Bits in MMCRA
- */
-
-/*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index c351b3a..98b6a72 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -73,10 +73,6 @@
 #define MMCR1_PMCSEL_MSK	0x7f
 
 /*
- * Bits in MMCRA
- */
-
-/*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
@@ -390,7 +386,7 @@
 			       unsigned int hwc[], unsigned long mmcr[])
 {
 	unsigned long mmcr1 = 0;
-	unsigned long mmcra = 0;
+	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
 	unsigned int pmc, unit, byte, psel;
 	unsigned int ttm, grp;
 	int i, isbus, bit, grsel;
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index ca399ba..84a607b 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -178,7 +178,7 @@
 			   unsigned int hwc[], unsigned long mmcr[])
 {
 	unsigned long mmcr1 = 0;
-	unsigned long mmcra = 0;
+	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
 	int i;
 	unsigned int pmc, ev, b, u, s, psel;
 	unsigned int ttmset = 0;
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 28a4daa..852f7b7 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -51,10 +51,6 @@
 #define MMCR1_PMCSEL_MSK	0xff
 
 /*
- * Bits in MMCRA
- */
-
-/*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
@@ -230,7 +226,7 @@
 			       unsigned int hwc[], unsigned long mmcr[])
 {
 	unsigned long mmcr1 = 0;
-	unsigned long mmcra = 0;
+	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
 	unsigned int pmc, unit, combine, l2sel, psel;
 	unsigned int pmc_inuse = 0;
 	int i;
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 4795744..8eff48e 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -84,10 +84,6 @@
 };
 
 /*
- * Bits in MMCRA
- */
-
-/*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 4271f7a..845c72a 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -660,6 +660,7 @@
 
 #ifdef CONFIG_DEBUG_FS
 struct dentry *powerpc_debugfs_root;
+EXPORT_SYMBOL(powerpc_debugfs_root);
 
 static int powerpc_debugfs_init(void)
 {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a136a11c490..36707de 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -54,6 +54,7 @@
 #include <linux/irq.h>
 #include <linux/delay.h>
 #include <linux/perf_event.h>
+#include <asm/trace.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -571,6 +572,8 @@
 	struct clock_event_device *evt = &decrementer->event;
 	u64 now;
 
+	trace_timer_interrupt_entry(regs);
+
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continuue to take decrementer exceptions */
 	set_dec(DECREMENTER_MAX);
@@ -590,6 +593,7 @@
 		now = decrementer->next_tb - now;
 		if (now <= DECREMENTER_MAX)
 			set_dec((int)now);
+		trace_timer_interrupt_exit(regs);
 		return;
 	}
 	old_regs = set_irq_regs(regs);
@@ -620,6 +624,8 @@
 
 	irq_exit();
 	set_irq_regs(old_regs);
+
+	trace_timer_interrupt_exit(regs);
 }
 
 void wakeup_decrementer(void)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 6f0ae1a..9d1f935 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -759,7 +759,7 @@
 
 	/* Emulate the mfspr rD, PVR. */
 	if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
-		PPC_WARN_EMULATED(mfpvr);
+		PPC_WARN_EMULATED(mfpvr, regs);
 		rd = (instword >> 21) & 0x1f;
 		regs->gpr[rd] = mfspr(SPRN_PVR);
 		return 0;
@@ -767,7 +767,7 @@
 
 	/* Emulating the dcba insn is just a no-op.  */
 	if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
-		PPC_WARN_EMULATED(dcba);
+		PPC_WARN_EMULATED(dcba, regs);
 		return 0;
 	}
 
@@ -776,7 +776,7 @@
 		int shift = (instword >> 21) & 0x1c;
 		unsigned long msk = 0xf0000000UL >> shift;
 
-		PPC_WARN_EMULATED(mcrxr);
+		PPC_WARN_EMULATED(mcrxr, regs);
 		regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
 		regs->xer &= ~0xf0000000UL;
 		return 0;
@@ -784,19 +784,19 @@
 
 	/* Emulate load/store string insn. */
 	if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
-		PPC_WARN_EMULATED(string);
+		PPC_WARN_EMULATED(string, regs);
 		return emulate_string_inst(regs, instword);
 	}
 
 	/* Emulate the popcntb (Population Count Bytes) instruction. */
 	if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
-		PPC_WARN_EMULATED(popcntb);
+		PPC_WARN_EMULATED(popcntb, regs);
 		return emulate_popcntb_inst(regs, instword);
 	}
 
 	/* Emulate isel (Integer Select) instruction */
 	if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
-		PPC_WARN_EMULATED(isel);
+		PPC_WARN_EMULATED(isel, regs);
 		return emulate_isel(regs, instword);
 	}
 
@@ -995,7 +995,7 @@
 #ifdef CONFIG_MATH_EMULATION
 	errcode = do_mathemu(regs);
 	if (errcode >= 0)
-		PPC_WARN_EMULATED(math);
+		PPC_WARN_EMULATED(math, regs);
 
 	switch (errcode) {
 	case 0:
@@ -1018,7 +1018,7 @@
 #elif defined(CONFIG_8XX_MINIMAL_FPEMU)
 	errcode = Soft_emulate_8xx(regs);
 	if (errcode >= 0)
-		PPC_WARN_EMULATED(8xx);
+		PPC_WARN_EMULATED(8xx, regs);
 
 	switch (errcode) {
 	case 0:
@@ -1129,7 +1129,7 @@
 
 	flush_altivec_to_thread(current);
 
-	PPC_WARN_EMULATED(altivec);
+	PPC_WARN_EMULATED(altivec, regs);
 	err = emulate_altivec(regs);
 	if (err == 0) {
 		regs->nip += 4;		/* skip emulated instruction */
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 75f3267..e68beac 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -26,11 +26,11 @@
 	srd	r8,r5,r11
 
 	mtctr	r8
-setup:
+.Lsetup:
 	dcbt	r9,r4
 	dcbz	r9,r3
 	add	r9,r9,r12
-	bdnz	setup
+	bdnz	.Lsetup
 END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
 	addi	r3,r3,-8
 	srdi    r8,r5,7		/* page is copied in 128 byte strides */
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c1427b3..383a5d0 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -14,68 +14,94 @@
 	
 #define STK_PARM(i)     (48 + ((i)-3)*8)
 
-#ifdef CONFIG_HCALL_STATS
+#ifdef CONFIG_TRACEPOINTS
+
+	.section	".toc","aw"
+
+	.globl hcall_tracepoint_refcount
+hcall_tracepoint_refcount:
+	.llong	0
+
+	.section	".text"
+
 /*
  * precall must preserve all registers.  use unused STK_PARM()
- * areas to save snapshots and opcode.
+ * areas to save snapshots and opcode. We branch around this
+ * in early init (eg when populating the MMU hashtable) by using an
+ * unconditional cpu feature.
  */
-#define HCALL_INST_PRECALL					\
-	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
-	mftb	r0;			/* get timebase and */	\
-	std     r0,STK_PARM(r5)(r1);	/* save for later */	\
+#define HCALL_INST_PRECALL(FIRST_REG)				\
 BEGIN_FTR_SECTION;						\
-	mfspr	r0,SPRN_PURR;		/* get PURR and */	\
-	std	r0,STK_PARM(r6)(r1);	/* save for later */	\
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);
-	
+	b	1f;						\
+END_FTR_SECTION(0, 1);						\
+	ld      r12,hcall_tracepoint_refcount@toc(r2);		\
+	cmpdi	r12,0;						\
+	beq+	1f;						\
+	mflr	r0;						\
+	std	r3,STK_PARM(r3)(r1);				\
+	std	r4,STK_PARM(r4)(r1);				\
+	std	r5,STK_PARM(r5)(r1);				\
+	std	r6,STK_PARM(r6)(r1);				\
+	std	r7,STK_PARM(r7)(r1);				\
+	std	r8,STK_PARM(r8)(r1);				\
+	std	r9,STK_PARM(r9)(r1);				\
+	std	r10,STK_PARM(r10)(r1);				\
+	std	r0,16(r1);					\
+	addi	r4,r1,STK_PARM(FIRST_REG);			\
+	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
+	bl	.__trace_hcall_entry;				\
+	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
+	ld	r0,16(r1);					\
+	ld	r3,STK_PARM(r3)(r1);				\
+	ld	r4,STK_PARM(r4)(r1);				\
+	ld	r5,STK_PARM(r5)(r1);				\
+	ld	r6,STK_PARM(r6)(r1);				\
+	ld	r7,STK_PARM(r7)(r1);				\
+	ld	r8,STK_PARM(r8)(r1);				\
+	ld	r9,STK_PARM(r9)(r1);				\
+	ld	r10,STK_PARM(r10)(r1);				\
+	mtlr	r0;						\
+1:
+
 /*
  * postcall is performed immediately before function return which
  * allows liberal use of volatile registers.  We branch around this
  * in early init (eg when populating the MMU hashtable) by using an
  * unconditional cpu feature.
  */
-#define HCALL_INST_POSTCALL					\
+#define __HCALL_INST_POSTCALL					\
 BEGIN_FTR_SECTION;						\
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
-	ld	r4,STK_PARM(r3)(r1);	/* validate opcode */	\
-	cmpldi	cr7,r4,MAX_HCALL_OPCODE;			\
-	bgt-	cr7,1f;						\
-								\
-	/* get time and PURR snapshots after hcall */		\
-	mftb	r7;			/* timebase after */	\
-BEGIN_FTR_SECTION;						\
-	mfspr	r8,SPRN_PURR;		/* PURR after */	\
-	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
-	subf	r6,r6,r8;		/* delta */		\
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
-	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
-	subf	r5,r5,r7;		/* time delta */	\
-								\
-	/* calculate address of stat structure r4 = opcode */	\
-	srdi	r4,r4,2;		/* index into array */	\
-	mulli	r4,r4,HCALL_STAT_SIZE;				\
-	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
-	add	r4,r4,r7;					\
-	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
-	add	r4,r4,r7;					\
-								\
-	/* update stats	*/					\
-	ld	r7,HCALL_STAT_CALLS(r4); /* count */		\
-	addi	r7,r7,1;					\
-	std	r7,HCALL_STAT_CALLS(r4);			\
-	ld      r7,HCALL_STAT_TB(r4);	/* timebase */		\
-	add	r7,r7,r5;					\
-	std	r7,HCALL_STAT_TB(r4);				\
-BEGIN_FTR_SECTION;						\
-	ld	r7,HCALL_STAT_PURR(r4);	/* PURR */		\
-	add	r7,r7,r6;					\
-	std	r7,HCALL_STAT_PURR(r4);				\
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
+	ld      r12,hcall_tracepoint_refcount@toc(r2);		\
+	cmpdi	r12,0;						\
+	beq+	1f;						\
+	mflr	r0;						\
+	ld	r6,STK_PARM(r3)(r1);				\
+	std	r3,STK_PARM(r3)(r1);				\
+	mr	r4,r3;						\
+	mr	r3,r6;						\
+	std	r0,16(r1);					\
+	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
+	bl	.__trace_hcall_exit;				\
+	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
+	ld	r0,16(r1);					\
+	ld	r3,STK_PARM(r3)(r1);				\
+	mtlr	r0;						\
 1:
+
+#define HCALL_INST_POSTCALL_NORETS				\
+	li	r5,0;						\
+	__HCALL_INST_POSTCALL
+
+#define HCALL_INST_POSTCALL(BUFREG)				\
+	mr	r5,BUFREG;					\
+	__HCALL_INST_POSTCALL
+
 #else
-#define HCALL_INST_PRECALL
-#define HCALL_INST_POSTCALL
+#define HCALL_INST_PRECALL(FIRST_ARG)
+#define HCALL_INST_POSTCALL_NORETS
+#define HCALL_INST_POSTCALL(BUFREG)
 #endif
 
 	.text
@@ -86,11 +112,11 @@
 	mfcr	r0
 	stw	r0,8(r1)
 
-	HCALL_INST_PRECALL
+	HCALL_INST_PRECALL(r4)
 
 	HVSC				/* invoke the hypervisor */
 
-	HCALL_INST_POSTCALL
+	HCALL_INST_POSTCALL_NORETS
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
@@ -102,7 +128,7 @@
 	mfcr	r0
 	stw	r0,8(r1)
 
-	HCALL_INST_PRECALL
+	HCALL_INST_PRECALL(r5)
 
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
@@ -121,7 +147,7 @@
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
-	HCALL_INST_POSTCALL
+	HCALL_INST_POSTCALL(r12)
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
@@ -168,7 +194,7 @@
 	mfcr	r0
 	stw	r0,8(r1)
 
-	HCALL_INST_PRECALL
+	HCALL_INST_PRECALL(r5)
 
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
@@ -196,7 +222,7 @@
 	std	r11,56(r12)
 	std	r0, 64(r12)
 
-	HCALL_INST_POSTCALL
+	HCALL_INST_POSTCALL(r12)
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 3631a4f..2f58c71 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -26,6 +26,7 @@
 #include <asm/hvcall.h>
 #include <asm/firmware.h>
 #include <asm/cputable.h>
+#include <asm/trace.h>
 
 DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
 
@@ -100,6 +101,35 @@
 #define	HCALL_ROOT_DIR		"hcall_inst"
 #define CPU_NAME_BUF_SIZE	32
 
+
+static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
+{
+	struct hcall_stats *h;
+
+	if (opcode > MAX_HCALL_OPCODE)
+		return;
+
+	h = &get_cpu_var(hcall_stats)[opcode / 4];
+	h->tb_start = mftb();
+	h->purr_start = mfspr(SPRN_PURR);
+}
+
+static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
+			     unsigned long *retbuf)
+{
+	struct hcall_stats *h;
+
+	if (opcode > MAX_HCALL_OPCODE)
+		return;
+
+	h = &__get_cpu_var(hcall_stats)[opcode / 4];
+	h->num_calls++;
+	h->tb_total = mftb() - h->tb_start;
+	h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
+
+	put_cpu_var(hcall_stats);
+}
+
 static int __init hcall_inst_init(void)
 {
 	struct dentry *hcall_root;
@@ -110,6 +140,14 @@
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		return 0;
 
+	if (register_trace_hcall_entry(probe_hcall_entry))
+		return -EINVAL;
+
+	if (register_trace_hcall_exit(probe_hcall_exit)) {
+		unregister_trace_hcall_entry(probe_hcall_entry);
+		return -EINVAL;
+	}
+
 	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
 	if (!hcall_root)
 		return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 903eb9e..0707653 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -39,6 +39,7 @@
 #include <asm/cputable.h>
 #include <asm/udbg.h>
 #include <asm/smp.h>
+#include <asm/trace.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
@@ -661,3 +662,35 @@
 EXPORT_SYMBOL(arch_free_page);
 
 #endif
+
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * We optimise our hcall path by placing hcall_tracepoint_refcount
+ * directly in the TOC so we can check if the hcall tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long hcall_tracepoint_refcount;
+
+void hcall_tracepoint_regfunc(void)
+{
+	hcall_tracepoint_refcount++;
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+	hcall_tracepoint_refcount--;
+}
+
+void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+{
+	trace_hcall_entry(opcode, args);
+}
+
+void __trace_hcall_exit(long opcode, unsigned long retval,
+			unsigned long *retbuf)
+{
+	trace_hcall_exit(opcode, retval, retbuf);
+}
+#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 72ace95..178084b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -49,6 +49,7 @@
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
+	select HAVE_HW_BREAKPOINT
 	select HAVE_ARCH_KMEMCHECK
 
 config OUTPUT_FORMAT
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d105f29..7d0b681 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,6 +186,15 @@
 config HAVE_MMIOTRACE_SUPPORT
 	def_bool y
 
+config X86_DECODER_SELFTEST
+     bool "x86 instruction decoder selftest"
+     depends on DEBUG_KERNEL
+	---help---
+	 Perform x86 instruction decoder selftests at build time.
+	 This option is useful for checking the sanity of x86 instruction
+	 decoder code.
+	 If unsure, say "N".
+
 #
 # IO delay types:
 #
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d2d24c9..78b32be 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -155,6 +155,9 @@
 KBUILD_IMAGE := $(boot)/bzImage
 
 bzImage: vmlinux
+ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
+	$(Q)$(MAKE) $(build)=arch/x86/tools posttest
+endif
 	$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
 	$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
 	$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80c..9f828f8 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@
 header-y += sigcontext32.h
 header-y += ucontext.h
 header-y += processor-flags.h
+header-y += hw_breakpoint.h
 
 unifdef-y += e820.h
 unifdef-y += ist.h
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index bb70e39..7a15588 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -17,6 +17,7 @@
 
 #include <linux/user.h>
 #include <linux/elfcore.h>
+#include <asm/debugreg.h>
 
 /*
  * fill in the user structure for an a.out core dump
@@ -32,14 +33,7 @@
 			>> PAGE_SHIFT;
 	dump->u_dsize -= dump->u_tsize;
 	dump->u_ssize = 0;
-	dump->u_debugreg[0] = current->thread.debugreg0;
-	dump->u_debugreg[1] = current->thread.debugreg1;
-	dump->u_debugreg[2] = current->thread.debugreg2;
-	dump->u_debugreg[3] = current->thread.debugreg3;
-	dump->u_debugreg[4] = 0;
-	dump->u_debugreg[5] = 0;
-	dump->u_debugreg[6] = current->thread.debugreg6;
-	dump->u_debugreg[7] = current->thread.debugreg7;
+	aout_dump_debugregs(dump);
 
 	if (dump->start_stack < TASK_SIZE)
 		dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 3ea6f37..8240f76 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
 #define DR_TRAP1	(0x2)		/* db1 */
 #define DR_TRAP2	(0x4)		/* db2 */
 #define DR_TRAP3	(0x8)		/* db3 */
+#define DR_TRAP_BITS	(DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
 
 #define DR_STEP		(0x4000)	/* single-step */
 #define DR_SWITCH	(0x8000)	/* task switch */
@@ -49,6 +50,8 @@
 
 #define DR_LOCAL_ENABLE_SHIFT 0    /* Extra shift to the local enable bit */
 #define DR_GLOBAL_ENABLE_SHIFT 1   /* Extra shift to the global enable bit */
+#define DR_LOCAL_ENABLE (0x1)      /* Local enable for reg 0 */
+#define DR_GLOBAL_ENABLE (0x2)     /* Global enable for reg 0 */
 #define DR_ENABLE_SIZE 2           /* 2 enable bits per register */
 
 #define DR_LOCAL_ENABLE_MASK (0x55)  /* Set  local bits for all 4 regs */
@@ -67,4 +70,34 @@
 #define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
 #define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
 
+/*
+ * HW breakpoint additions
+ */
+#ifdef __KERNEL__
+
+DECLARE_PER_CPU(unsigned long, cpu_dr7);
+
+static inline void hw_breakpoint_disable(void)
+{
+	/* Zero the control register for HW Breakpoint */
+	set_debugreg(0UL, 7);
+
+	/* Zero-out the individual HW breakpoint address registers */
+	set_debugreg(0UL, 0);
+	set_debugreg(0UL, 1);
+	set_debugreg(0UL, 2);
+	set_debugreg(0UL, 3);
+}
+
+static inline int hw_breakpoint_active(void)
+{
+	return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
+}
+
+extern void aout_dump_debugregs(struct user *dump);
+
+extern void hw_breakpoint_restore(void);
+
+#endif	/* __KERNEL__ */
+
 #endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 82e3e8f..108eb6f 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -20,11 +20,11 @@
 	unsigned int irq_call_count;
 	unsigned int irq_tlb_count;
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	unsigned int irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
 	unsigned int irq_threshold_count;
-# endif
 #endif
 } ____cacheline_aligned irq_cpustat_t;
 
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 0000000..0675a7c
--- /dev/null
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,73 @@
+#ifndef	_I386_HW_BREAKPOINT_H
+#define	_I386_HW_BREAKPOINT_H
+
+#ifdef	__KERNEL__
+#define	__ARCH_HW_BREAKPOINT_H
+
+/*
+ * The name should probably be something dealt in
+ * a higher level. While dealing with the user
+ * (display/resolving)
+ */
+struct arch_hw_breakpoint {
+	char		*name; /* Contains name of the symbol to set bkpt */
+	unsigned long	address;
+	u8		len;
+	u8		type;
+};
+
+#include <linux/kdebug.h>
+#include <linux/percpu.h>
+#include <linux/list.h>
+
+/* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_1		0x40
+#define X86_BREAKPOINT_LEN_2		0x44
+#define X86_BREAKPOINT_LEN_4		0x4c
+#define X86_BREAKPOINT_LEN_EXECUTE	0x40
+
+#ifdef CONFIG_X86_64
+#define X86_BREAKPOINT_LEN_8		0x48
+#endif
+
+/* Available HW breakpoint type encodings */
+
+/* trigger on instruction execute */
+#define X86_BREAKPOINT_EXECUTE	0x80
+/* trigger on memory write */
+#define X86_BREAKPOINT_WRITE	0x81
+/* trigger on memory read or write */
+#define X86_BREAKPOINT_RW	0x83
+
+/* Total number of available HW breakpoint registers */
+#define HBP_NUM 4
+
+struct perf_event;
+struct pmu;
+
+extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
+					 struct task_struct *tsk);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+					   unsigned long val, void *data);
+
+
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
+
+extern void
+arch_fill_perf_breakpoint(struct perf_event *bp);
+
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
+
+extern int arch_bp_generic_fields(int x86_len, int x86_type,
+				  int *gen_len, int *gen_type);
+
+extern struct pmu perf_ops_bp;
+
+#endif	/* __KERNEL__ */
+#endif	/* _I386_HW_BREAKPOINT_H */
+
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
new file mode 100644
index 0000000..205b063
--- /dev/null
+++ b/arch/x86/include/asm/inat.h
@@ -0,0 +1,220 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include <asm/inat_types.h>
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy last prefixes */
+#define INAT_PFX_OPNDSZ	1	/* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPE	2	/* 0xF3 */ /* LPFX2 */
+#define INAT_PFX_REPNE	3	/* 0xF2 */ /* LPFX3 */
+/* Other Legacy prefixes */
+#define INAT_PFX_LOCK	4	/* 0xF0 */
+#define INAT_PFX_CS	5	/* 0x2E */
+#define INAT_PFX_DS	6	/* 0x3E */
+#define INAT_PFX_ES	7	/* 0x26 */
+#define INAT_PFX_FS	8	/* 0x64 */
+#define INAT_PFX_GS	9	/* 0x65 */
+#define INAT_PFX_SS	10	/* 0x36 */
+#define INAT_PFX_ADDRSZ	11	/* 0x67 */
+/* x86-64 REX prefix */
+#define INAT_PFX_REX	12	/* 0x4X */
+/* AVX VEX prefixes */
+#define INAT_PFX_VEX2	13	/* 2-bytes VEX prefix */
+#define INAT_PFX_VEX3	14	/* 3-bytes VEX prefix */
+
+#define INAT_LSTPFX_MAX	3
+#define INAT_LGCPFX_MAX	11
+
+/* Immediate size */
+#define INAT_IMM_BYTE		1
+#define INAT_IMM_WORD		2
+#define INAT_IMM_DWORD		3
+#define INAT_IMM_QWORD		4
+#define INAT_IMM_PTR		5
+#define INAT_IMM_VWORD32	6
+#define INAT_IMM_VWORD		7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS	0
+#define INAT_PFX_BITS	4
+#define INAT_PFX_MAX    ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK	(INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS	(INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS	2
+#define INAT_ESC_MAX	((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK	(INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS	(INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS	5
+#define INAT_GRP_MAX	((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK	(INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS	(INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS	3
+#define INAT_IMM_MASK	(((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS	(INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_MODRM	(1 << (INAT_FLAG_OFFS))
+#define INAT_FORCE64	(1 << (INAT_FLAG_OFFS + 1))
+#define INAT_SCNDIMM	(1 << (INAT_FLAG_OFFS + 2))
+#define INAT_MOFFSET	(1 << (INAT_FLAG_OFFS + 3))
+#define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VEXOK	(1 << (INAT_FLAG_OFFS + 5))
+#define INAT_VEXONLY	(1 << (INAT_FLAG_OFFS + 6))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS)
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+					     insn_byte_t last_pfx,
+					     insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+					    insn_byte_t last_pfx,
+					    insn_attr_t esc_attr);
+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
+					  insn_byte_t vex_m,
+					  insn_byte_t vex_pp);
+
+/* Attribute checking functions */
+static inline int inat_is_legacy_prefix(insn_attr_t attr)
+{
+	attr &= INAT_PFX_MASK;
+	return attr && attr <= INAT_LGCPFX_MAX;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+	if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
+		return 0;
+	else
+		return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_vex_prefix(insn_attr_t attr)
+{
+	attr &= INAT_PFX_MASK;
+	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_vex3_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+	return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+	return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+	return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+	return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+	return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+	return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+	return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+	return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+	return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+	return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+	return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+	return attr & INAT_VARIANT;
+}
+
+static inline int inat_accept_vex(insn_attr_t attr)
+{
+	return attr & INAT_VEXOK;
+}
+
+static inline int inat_must_vex(insn_attr_t attr)
+{
+	return attr & INAT_VEXONLY;
+}
+#endif
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h
new file mode 100644
index 0000000..cb3c20c
--- /dev/null
+++ b/arch/x86/include/asm/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
new file mode 100644
index 0000000..96c2e0a
--- /dev/null
+++ b/arch/x86/include/asm/insn.h
@@ -0,0 +1,184 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include <asm/inat.h>
+
+struct insn_field {
+	union {
+		insn_value_t value;
+		insn_byte_t bytes[4];
+	};
+	/* !0 if we've run insn_get_xxx() for this field */
+	unsigned char got;
+	unsigned char nbytes;
+};
+
+struct insn {
+	struct insn_field prefixes;	/*
+					 * Prefixes
+					 * prefixes.bytes[3]: last prefix
+					 */
+	struct insn_field rex_prefix;	/* REX prefix */
+	struct insn_field vex_prefix;	/* VEX prefix */
+	struct insn_field opcode;	/*
+					 * opcode.bytes[0]: opcode1
+					 * opcode.bytes[1]: opcode2
+					 * opcode.bytes[2]: opcode3
+					 */
+	struct insn_field modrm;
+	struct insn_field sib;
+	struct insn_field displacement;
+	union {
+		struct insn_field immediate;
+		struct insn_field moffset1;	/* for 64bit MOV */
+		struct insn_field immediate1;	/* for 64bit imm or off16/32 */
+	};
+	union {
+		struct insn_field moffset2;	/* for 64bit MOV */
+		struct insn_field immediate2;	/* for 64bit imm or seg16 */
+	};
+
+	insn_attr_t attr;
+	unsigned char opnd_bytes;
+	unsigned char addr_bytes;
+	unsigned char length;
+	unsigned char x86_64;
+
+	const insn_byte_t *kaddr;	/* kernel address of insn to analyze */
+	const insn_byte_t *next_byte;
+};
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* VEX bit flags  */
+#define X86_VEX_W(vex)	((vex) & 0x80)	/* VEX3 Byte2 */
+#define X86_VEX_R(vex)	((vex) & 0x80)	/* VEX2/3 Byte1 */
+#define X86_VEX_X(vex)	((vex) & 0x40)	/* VEX3 Byte1 */
+#define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
+#define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
+/* VEX bit fields */
+#define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
+#define X86_VEX2_M	1			/* VEX2.M always 1 */
+#define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_P(vex)	((vex) & 0x03)		/* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_M_MAX	0x1f			/* VEX3.M Maximum value */
+
+/* The last prefix is needed for two-byte and three-byte opcodes */
+static inline insn_byte_t insn_last_prefix(struct insn *insn)
+{
+	return insn->prefixes.bytes[3];
+}
+
+extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+	insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn, const void *kaddr)
+{
+#ifdef CONFIG_X86_64
+	insn_init(insn, kaddr, 1);
+#else /* CONFIG_X86_32 */
+	insn_init(insn, kaddr, 0);
+#endif
+}
+
+static inline int insn_is_avx(struct insn *insn)
+{
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+	return (insn->vex_prefix.value != 0);
+}
+
+static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
+{
+	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
+		return X86_VEX2_M;
+	else
+		return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+{
+	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
+		return X86_VEX_P(insn->vex_prefix.bytes[1]);
+	else
+		return X86_VEX_P(insn->vex_prefix.bytes[2]);
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+	return insn->prefixes.nbytes;
+}
+static inline int insn_offset_vex_prefix(struct insn *insn)
+{
+	return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+	return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+	return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+	return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+	return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+	return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+#endif /* _ASM_X86_INSN_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index f1363b7..858baa0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -108,6 +108,8 @@
 #define K8_MCE_THRESHOLD_BANK_5    (MCE_THRESHOLD_BASE + 5 * 9)
 #define K8_MCE_THRESHOLD_DRAM_ECC  (MCE_THRESHOLD_BANK_4 + 0)
 
+extern struct atomic_notifier_head x86_mce_decoder_chain;
+
 #ifdef __KERNEL__
 
 #include <linux/percpu.h>
@@ -118,9 +120,11 @@
 extern int mce_p5_enabled;
 
 #ifdef CONFIG_X86_MCE
-void mcheck_init(struct cpuinfo_x86 *c);
+int mcheck_init(void);
+void mcheck_cpu_init(struct cpuinfo_x86 *c);
 #else
-static inline void mcheck_init(struct cpuinfo_x86 *c) {}
+static inline int mcheck_init(void) { return 0; }
+static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
 #endif
 
 #ifdef CONFIG_X86_ANCIENT_MCE
@@ -214,5 +218,11 @@
 
 void mce_log_therm_throt_event(__u64 status);
 
+#ifdef CONFIG_X86_THERMAL_VECTOR
+extern void mcheck_intel_therm_init(void);
+#else
+static inline void mcheck_intel_therm_init(void) { }
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index ad7ce3f..8d9f854 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -28,9 +28,20 @@
  */
 #define ARCH_PERFMON_EVENT_MASK				    0xffff
 
+/*
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ *  - inv
+ *  - edge
+ *  - cnt-mask
+ *  The other filters are supported by fixed counters.
+ *  The any-thread option is supported starting with v3.
+ */
+#define ARCH_PERFMON_EVENT_FILTER_MASK			0xff840000
+
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX			 0
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
 		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c978648..6f8ec1c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -30,6 +30,7 @@
 #include <linux/math64.h>
 #include <linux/init.h>
 
+#define HBP_NUM 4
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -422,6 +423,8 @@
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
 
+struct perf_event;
+
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -443,13 +446,10 @@
 	unsigned long		fs;
 #endif
 	unsigned long		gs;
-	/* Hardware debugging registers: */
-	unsigned long		debugreg0;
-	unsigned long		debugreg1;
-	unsigned long		debugreg2;
-	unsigned long		debugreg3;
-	unsigned long		debugreg6;
-	unsigned long		debugreg7;
+	/* Save middle states of ptrace breakpoints */
+	struct perf_event	*ptrace_bps[HBP_NUM];
+	/* Debug status used for traps, single steps, etc... */
+	unsigned long           debugreg6;
 	/* Fault info: */
 	unsigned long		cr2;
 	unsigned long		trap_no;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 0f0d908..3d11fd0 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -7,6 +7,7 @@
 
 #ifdef __KERNEL__
 #include <asm/segment.h>
+#include <asm/page_types.h>
 #endif
 
 #ifndef __ASSEMBLY__
@@ -216,6 +217,67 @@
 	return regs->sp;
 }
 
+/* Query offset/name of register from its name/offset */
+extern int regs_query_register_offset(const char *name);
+extern const char *regs_query_register_name(unsigned int offset);
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss))
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs:	pt_regs from which register value is gotten.
+ * @offset:	offset number of the register.
+ *
+ * regs_get_register returns the value of a register. The @offset is the
+ * offset of the register in struct pt_regs address which specified by @regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+					      unsigned int offset)
+{
+	if (unlikely(offset > MAX_REG_OFFSET))
+		return 0;
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs:	pt_regs which contains kernel stack pointer.
+ * @addr:	address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static inline int regs_within_kernel_stack(struct pt_regs *regs,
+					   unsigned long addr)
+{
+	return ((addr & ~(THREAD_SIZE - 1))  ==
+		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:	pt_regs which contains kernel stack pointer.
+ * @n:		stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+						      unsigned int n)
+{
+	unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+	addr += n;
+	if (regs_within_kernel_stack(regs, (unsigned long)addr))
+		return *addr;
+	else
+		return 0;
+}
+
+/* Get Nth argument at function call */
+extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
+					   unsigned int n);
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d8e5d0c..4f2e66e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -40,7 +40,7 @@
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
 obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
-obj-y			+= alternative.o i8253.o pci-nommu.o
+obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y			+= tsc.o io_delay.o rtc.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 68537e9..1d2cb38 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -5,6 +5,7 @@
 # Don't trace early stages of a secondary CPU boot
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_common.o = -pg
+CFLAGS_REMOVE_perf_event.o = -pg
 endif
 
 # Make sure load_percpu_segment has no stackprotector
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cc25c2b..9053be5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -837,10 +837,8 @@
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
-#ifdef CONFIG_X86_MCE
 	/* Init Machine Check Exception if available. */
-	mcheck_init(c);
-#endif
+	mcheck_cpu_init(c);
 
 	select_idle_routine(c);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 721a77c..0bcaa38 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,9 @@
 
 #include "mce-internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/mce.h>
+
 int mce_disabled __read_mostly;
 
 #define MISC_MCELOG_MINOR	227
@@ -85,18 +88,26 @@
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int			cpu_missing;
 
-static void default_decode_mce(struct mce *m)
+/*
+ * CPU/chipset specific EDAC code can register a notifier call here to print
+ * MCE errors in a human-readable form.
+ */
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
+
+static int default_decode_mce(struct notifier_block *nb, unsigned long val,
+			       void *data)
 {
 	pr_emerg("No human readable MCE decoding support on this CPU type.\n");
 	pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+
+	return NOTIFY_STOP;
 }
 
-/*
- * CPU/chipset specific EDAC code can register a callback here to print
- * MCE errors in a human-readable form:
- */
-void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
-EXPORT_SYMBOL(x86_mce_decode_callback);
+static struct notifier_block mce_dec_nb = {
+	.notifier_call = default_decode_mce,
+	.priority      = -1,
+};
 
 /* MCA banks polled by the period polling timer for corrected events */
 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -141,6 +152,9 @@
 {
 	unsigned next, entry;
 
+	/* Emit the trace record: */
+	trace_mce_record(mce);
+
 	mce->finished = 0;
 	wmb();
 	for (;;) {
@@ -204,9 +218,9 @@
 
 	/*
 	 * Print out human-readable details about the MCE error,
-	 * (if the CPU has an implementation for that):
+	 * (if the CPU has an implementation for that)
 	 */
-	x86_mce_decode_callback(m);
+	atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
 }
 
 static void print_mce_head(void)
@@ -1122,7 +1136,7 @@
 static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
 
-static void mcheck_timer(unsigned long data)
+static void mce_start_timer(unsigned long data)
 {
 	struct timer_list *t = &per_cpu(mce_timer, data);
 	int *n;
@@ -1187,7 +1201,7 @@
 }
 EXPORT_SYMBOL_GPL(mce_notify_irq);
 
-static int mce_banks_init(void)
+static int __cpuinit __mcheck_cpu_mce_banks_init(void)
 {
 	int i;
 
@@ -1206,7 +1220,7 @@
 /*
  * Initialize Machine Checks for a CPU.
  */
-static int __cpuinit mce_cap_init(void)
+static int __cpuinit __mcheck_cpu_cap_init(void)
 {
 	unsigned b;
 	u64 cap;
@@ -1228,7 +1242,7 @@
 	WARN_ON(banks != 0 && b != banks);
 	banks = b;
 	if (!mce_banks) {
-		int err = mce_banks_init();
+		int err = __mcheck_cpu_mce_banks_init();
 
 		if (err)
 			return err;
@@ -1244,7 +1258,7 @@
 	return 0;
 }
 
-static void mce_init(void)
+static void __mcheck_cpu_init_generic(void)
 {
 	mce_banks_t all_banks;
 	u64 cap;
@@ -1273,7 +1287,7 @@
 }
 
 /* Add per CPU specific workarounds here */
-static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
+static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 {
 	if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
 		pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1341,7 +1355,7 @@
 	return 0;
 }
 
-static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
+static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
 {
 	if (c->x86 != 5)
 		return;
@@ -1355,7 +1369,7 @@
 	}
 }
 
-static void mce_cpu_features(struct cpuinfo_x86 *c)
+static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 {
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
@@ -1369,7 +1383,7 @@
 	}
 }
 
-static void mce_init_timer(void)
+static void __mcheck_cpu_init_timer(void)
 {
 	struct timer_list *t = &__get_cpu_var(mce_timer);
 	int *n = &__get_cpu_var(mce_next_interval);
@@ -1380,7 +1394,7 @@
 	*n = check_interval * HZ;
 	if (!*n)
 		return;
-	setup_timer(t, mcheck_timer, smp_processor_id());
+	setup_timer(t, mce_start_timer, smp_processor_id());
 	t->expires = round_jiffies(jiffies + *n);
 	add_timer_on(t, smp_processor_id());
 }
@@ -1400,27 +1414,28 @@
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off:
  */
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
 {
 	if (mce_disabled)
 		return;
 
-	mce_ancient_init(c);
+	__mcheck_cpu_ancient_init(c);
 
 	if (!mce_available(c))
 		return;
 
-	if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
+	if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
 		mce_disabled = 1;
 		return;
 	}
 
 	machine_check_vector = do_machine_check;
 
-	mce_init();
-	mce_cpu_features(c);
-	mce_init_timer();
+	__mcheck_cpu_init_generic();
+	__mcheck_cpu_init_vendor(c);
+	__mcheck_cpu_init_timer();
 	INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
+
 }
 
 /*
@@ -1640,6 +1655,15 @@
 }
 __setup("mce", mcheck_enable);
 
+int __init mcheck_init(void)
+{
+	atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
+
+	mcheck_intel_therm_init();
+
+	return 0;
+}
+
 /*
  * Sysfs support
  */
@@ -1648,7 +1672,7 @@
  * Disable machine checks on suspend and shutdown. We can't really handle
  * them later.
  */
-static int mce_disable(void)
+static int mce_disable_error_reporting(void)
 {
 	int i;
 
@@ -1663,12 +1687,12 @@
 
 static int mce_suspend(struct sys_device *dev, pm_message_t state)
 {
-	return mce_disable();
+	return mce_disable_error_reporting();
 }
 
 static int mce_shutdown(struct sys_device *dev)
 {
-	return mce_disable();
+	return mce_disable_error_reporting();
 }
 
 /*
@@ -1678,8 +1702,8 @@
  */
 static int mce_resume(struct sys_device *dev)
 {
-	mce_init();
-	mce_cpu_features(&current_cpu_data);
+	__mcheck_cpu_init_generic();
+	__mcheck_cpu_init_vendor(&current_cpu_data);
 
 	return 0;
 }
@@ -1689,8 +1713,8 @@
 	del_timer_sync(&__get_cpu_var(mce_timer));
 	if (!mce_available(&current_cpu_data))
 		return;
-	mce_init();
-	mce_init_timer();
+	__mcheck_cpu_init_generic();
+	__mcheck_cpu_init_timer();
 }
 
 /* Reinit MCEs after user configuration changes */
@@ -1716,7 +1740,7 @@
 	cmci_reenable();
 	cmci_recheck();
 	if (all)
-		mce_init_timer();
+		__mcheck_cpu_init_timer();
 }
 
 static struct sysdev_class mce_sysclass = {
@@ -1929,13 +1953,14 @@
 }
 
 /* Make sure there are no machine checks on offlined CPUs. */
-static void mce_disable_cpu(void *h)
+static void __cpuinit mce_disable_cpu(void *h)
 {
 	unsigned long action = *(unsigned long *)h;
 	int i;
 
 	if (!mce_available(&current_cpu_data))
 		return;
+
 	if (!(action & CPU_TASKS_FROZEN))
 		cmci_clear();
 	for (i = 0; i < banks; i++) {
@@ -1946,7 +1971,7 @@
 	}
 }
 
-static void mce_reenable_cpu(void *h)
+static void __cpuinit mce_reenable_cpu(void *h)
 {
 	unsigned long action = *(unsigned long *)h;
 	int i;
@@ -2025,7 +2050,7 @@
 	}
 }
 
-static __init int mce_init_device(void)
+static __init int mcheck_init_device(void)
 {
 	int err;
 	int i = 0;
@@ -2053,7 +2078,7 @@
 	return err;
 }
 
-device_initcall(mce_init_device);
+device_initcall(mcheck_init_device);
 
 /*
  * Old style boot options parsing. Only for compatibility.
@@ -2101,7 +2126,7 @@
 DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
 			fake_panic_set, "%llu\n");
 
-static int __init mce_debugfs_init(void)
+static int __init mcheck_debugfs_init(void)
 {
 	struct dentry *dmce, *ffake_panic;
 
@@ -2115,5 +2140,5 @@
 
 	return 0;
 }
-late_initcall(mce_debugfs_init);
+late_initcall(mcheck_debugfs_init);
 #endif
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index b3a1dba..4fef985 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -49,6 +49,8 @@
 
 static atomic_t therm_throt_en	= ATOMIC_INIT(0);
 
+static u32 lvtthmr_init __read_mostly;
+
 #ifdef CONFIG_SYSFS
 #define define_therm_throt_sysdev_one_ro(_name)				\
 	static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
@@ -254,6 +256,18 @@
 	ack_APIC_irq();
 }
 
+void __init mcheck_intel_therm_init(void)
+{
+	/*
+	 * This function is only called on boot CPU. Save the init thermal
+	 * LVT value on BSP and use that value to restore APs' thermal LVT
+	 * entry BIOS programmed later
+	 */
+	if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) &&
+		cpu_has(&boot_cpu_data, X86_FEATURE_ACC))
+		lvtthmr_init = apic_read(APIC_LVTTHMR);
+}
+
 void intel_init_thermal(struct cpuinfo_x86 *c)
 {
 	unsigned int cpu = smp_processor_id();
@@ -270,7 +284,20 @@
 	 * since it might be delivered via SMI already:
 	 */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	h = apic_read(APIC_LVTTHMR);
+
+	/*
+	 * The initial value of thermal LVT entries on all APs always reads
+	 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
+	 * sequence to them and LVT registers are reset to 0s except for
+	 * the mask bits which are set to 1s when APs receive INIT IPI.
+	 * Always restore the value that BIOS has programmed on AP based on
+	 * BSP's info we saved since BIOS is always setting the same value
+	 * for all threads/cores
+	 */
+	apic_write(APIC_LVTTHMR, lvtthmr_init);
+
+	h = lvtthmr_init;
+
 	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		printk(KERN_DEBUG
 		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b5801c3..c1bbed1 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -77,6 +77,18 @@
 	struct debug_store	*ds;
 };
 
+struct event_constraint {
+	unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	int		code;
+};
+
+#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
+#define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }
+
+#define for_each_event_constraint(e, c) \
+	for ((e) = (c); (e)->idxmsk[0]; (e)++)
+
+
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -102,6 +114,8 @@
 	u64		intel_ctrl;
 	void		(*enable_bts)(u64 config);
 	void		(*disable_bts)(void);
+	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
+					 struct hw_perf_event *hwc);
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -110,6 +124,8 @@
 	.enabled = 1,
 };
 
+static const struct event_constraint *event_constraints;
+
 /*
  * Not sure about some of these
  */
@@ -155,6 +171,16 @@
 	return hw_event & P6_EVNTSEL_MASK;
 }
 
+static const struct event_constraint intel_p6_event_constraints[] =
+{
+	EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
+	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
+	EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
+	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
+	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
+	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
+	EVENT_CONSTRAINT_END
+};
 
 /*
  * Intel PerfMon v3. Used on Core2 and later.
@@ -170,6 +196,35 @@
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
+static const struct event_constraint intel_core_event_constraints[] =
+{
+	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
+	EVENT_CONSTRAINT(0x11, 0x2),	/* FP_ASSIST */
+	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
+	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
+	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
+	EVENT_CONSTRAINT(0x18, 0x1),	/* IDLE_DURING_DIV */
+	EVENT_CONSTRAINT(0x19, 0x2),	/* DELAYED_BYPASS */
+	EVENT_CONSTRAINT(0xa1, 0x1),	/* RS_UOPS_DISPATCH_CYCLES */
+	EVENT_CONSTRAINT(0xcb, 0x1),	/* MEM_LOAD_RETIRED */
+	EVENT_CONSTRAINT_END
+};
+
+static const struct event_constraint intel_nehalem_event_constraints[] =
+{
+	EVENT_CONSTRAINT(0x40, 0x3),	/* L1D_CACHE_LD */
+	EVENT_CONSTRAINT(0x41, 0x3),	/* L1D_CACHE_ST */
+	EVENT_CONSTRAINT(0x42, 0x3),	/* L1D_CACHE_LOCK */
+	EVENT_CONSTRAINT(0x43, 0x3),	/* L1D_ALL_REF */
+	EVENT_CONSTRAINT(0x4e, 0x3),	/* L1D_PREFETCH */
+	EVENT_CONSTRAINT(0x4c, 0x3),	/* LOAD_HIT_PRE */
+	EVENT_CONSTRAINT(0x51, 0x3),	/* L1D */
+	EVENT_CONSTRAINT(0x52, 0x3),	/* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
+	EVENT_CONSTRAINT(0x53, 0x3),	/* L1D_CACHE_LOCK_FB_HIT */
+	EVENT_CONSTRAINT(0xc5, 0x3),	/* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
 static u64 intel_pmu_event_map(int hw_event)
 {
 	return intel_perfmon_event_map[hw_event];
@@ -190,7 +245,7 @@
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
 
-static const u64 nehalem_hw_cache_event_ids
+static __initconst u64 nehalem_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -281,7 +336,7 @@
  },
 };
 
-static const u64 core2_hw_cache_event_ids
+static __initconst u64 core2_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -372,7 +427,7 @@
  },
 };
 
-static const u64 atom_hw_cache_event_ids
+static __initconst u64 atom_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -469,7 +524,7 @@
 #define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
 #define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
 #define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK	0xFF000000ULL
+#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
 
 #define CORE_EVNTSEL_MASK		\
 	(CORE_EVNTSEL_EVENT_MASK |	\
@@ -481,7 +536,7 @@
 	return hw_event & CORE_EVNTSEL_MASK;
 }
 
-static const u64 amd_hw_cache_event_ids
+static __initconst u64 amd_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -932,6 +987,8 @@
 	 */
 	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
 
+	hwc->idx = -1;
+
 	/*
 	 * Count user and OS events unless requested not to.
 	 */
@@ -1334,8 +1391,7 @@
 		x86_pmu_enable_event(hwc, idx);
 }
 
-static int
-fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
+static int fixed_mode_idx(struct hw_perf_event *hwc)
 {
 	unsigned int hw_event;
 
@@ -1349,6 +1405,12 @@
 	if (!x86_pmu.num_events_fixed)
 		return -1;
 
+	/*
+	 * fixed counters do not take all possible filters
+	 */
+	if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
+		return -1;
+
 	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
 		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
 	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1360,22 +1422,57 @@
 }
 
 /*
- * Find a PMC slot for the freshly enabled / scheduled in event:
+ * generic counter allocator: get next free counter
  */
-static int x86_pmu_enable(struct perf_event *event)
+static int
+gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 
-	idx = fixed_mode_idx(event, hwc);
+	idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
+	return idx == x86_pmu.num_events ? -1 : idx;
+}
+
+/*
+ * intel-specific counter allocator: check event constraints
+ */
+static int
+intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+	const struct event_constraint *event_constraint;
+	int i, code;
+
+	if (!event_constraints)
+		goto skip;
+
+	code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
+
+	for_each_event_constraint(event_constraint, event_constraints) {
+		if (code == event_constraint->code) {
+			for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
+				if (!test_and_set_bit(i, cpuc->used_mask))
+					return i;
+			}
+			return -1;
+		}
+	}
+skip:
+	return gen_get_event_idx(cpuc, hwc);
+}
+
+static int
+x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+	int idx;
+
+	idx = fixed_mode_idx(hwc);
 	if (idx == X86_PMC_IDX_FIXED_BTS) {
 		/* BTS is already occupied. */
 		if (test_and_set_bit(idx, cpuc->used_mask))
 			return -EAGAIN;
 
 		hwc->config_base	= 0;
-		hwc->event_base	= 0;
+		hwc->event_base		= 0;
 		hwc->idx		= idx;
 	} else if (idx >= 0) {
 		/*
@@ -1396,20 +1493,35 @@
 	} else {
 		idx = hwc->idx;
 		/* Try to get the previous generic event again */
-		if (test_and_set_bit(idx, cpuc->used_mask)) {
+		if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
 try_generic:
-			idx = find_first_zero_bit(cpuc->used_mask,
-						  x86_pmu.num_events);
-			if (idx == x86_pmu.num_events)
+			idx = x86_pmu.get_event_idx(cpuc, hwc);
+			if (idx == -1)
 				return -EAGAIN;
 
 			set_bit(idx, cpuc->used_mask);
 			hwc->idx = idx;
 		}
-		hwc->config_base  = x86_pmu.eventsel;
-		hwc->event_base = x86_pmu.perfctr;
+		hwc->config_base = x86_pmu.eventsel;
+		hwc->event_base  = x86_pmu.perfctr;
 	}
 
+	return idx;
+}
+
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in event:
+ */
+static int x86_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	idx = x86_schedule_event(cpuc, hwc);
+	if (idx < 0)
+		return idx;
+
 	perf_events_lapic_init();
 
 	x86_pmu.disable(hwc, idx);
@@ -1852,7 +1964,7 @@
 	.priority		= 1
 };
 
-static struct x86_pmu p6_pmu = {
+static __initconst struct x86_pmu p6_pmu = {
 	.name			= "p6",
 	.handle_irq		= p6_pmu_handle_irq,
 	.disable_all		= p6_pmu_disable_all,
@@ -1877,9 +1989,10 @@
 	 */
 	.event_bits		= 32,
 	.event_mask		= (1ULL << 32) - 1,
+	.get_event_idx		= intel_get_event_idx,
 };
 
-static struct x86_pmu intel_pmu = {
+static __initconst struct x86_pmu intel_pmu = {
 	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,
 	.disable_all		= intel_pmu_disable_all,
@@ -1900,9 +2013,10 @@
 	.max_period		= (1ULL << 31) - 1,
 	.enable_bts		= intel_pmu_enable_bts,
 	.disable_bts		= intel_pmu_disable_bts,
+	.get_event_idx		= intel_get_event_idx,
 };
 
-static struct x86_pmu amd_pmu = {
+static __initconst struct x86_pmu amd_pmu = {
 	.name			= "AMD",
 	.handle_irq		= amd_pmu_handle_irq,
 	.disable_all		= amd_pmu_disable_all,
@@ -1920,9 +2034,10 @@
 	.apic			= 1,
 	/* use highest bit to detect overflow */
 	.max_period		= (1ULL << 47) - 1,
+	.get_event_idx		= gen_get_event_idx,
 };
 
-static int p6_pmu_init(void)
+static __init int p6_pmu_init(void)
 {
 	switch (boot_cpu_data.x86_model) {
 	case 1:
@@ -1932,10 +2047,12 @@
 	case 7:
 	case 8:
 	case 11: /* Pentium III */
+		event_constraints = intel_p6_event_constraints;
 		break;
 	case 9:
 	case 13:
 		/* Pentium M */
+		event_constraints = intel_p6_event_constraints;
 		break;
 	default:
 		pr_cont("unsupported p6 CPU model %d ",
@@ -1954,7 +2071,7 @@
 	return 0;
 }
 
-static int intel_pmu_init(void)
+static __init int intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
 	union cpuid10_eax eax;
@@ -2007,12 +2124,14 @@
 		       sizeof(hw_cache_event_ids));
 
 		pr_cont("Core2 events, ");
+		event_constraints = intel_core_event_constraints;
 		break;
 	default:
 	case 26:
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
+		event_constraints = intel_nehalem_event_constraints;
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
 	case 28:
@@ -2025,7 +2144,7 @@
 	return 0;
 }
 
-static int amd_pmu_init(void)
+static __init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
 	if (boot_cpu_data.x86 < 6)
@@ -2105,11 +2224,47 @@
 	.unthrottle	= x86_pmu_unthrottle,
 };
 
+static int
+validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct hw_perf_event fake_event = event->hw;
+
+	if (event->pmu && event->pmu != &pmu)
+		return 0;
+
+	return x86_schedule_event(cpuc, &fake_event) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct cpu_hw_events fake_pmu;
+
+	memset(&fake_pmu, 0, sizeof(fake_pmu));
+
+	if (!validate_event(&fake_pmu, leader))
+		return -ENOSPC;
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (!validate_event(&fake_pmu, sibling))
+			return -ENOSPC;
+	}
+
+	if (!validate_event(&fake_pmu, event))
+		return -ENOSPC;
+
+	return 0;
+}
+
 const struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	int err;
 
 	err = __hw_perf_event_init(event);
+	if (!err) {
+		if (event->group_leader != event)
+			err = validate_group(event);
+	}
 	if (err) {
 		if (event->destroy)
 			event->destroy(event);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 7d52e9d..50b9c22 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -334,6 +334,10 @@
 END(ret_from_fork)
 
 /*
+ * Interrupt exit functions should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
+/*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
  * go as quickly as possible which is why some of this is
@@ -383,6 +387,10 @@
 END(resume_kernel)
 #endif
 	CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+	.popsection
 
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
@@ -513,6 +521,10 @@
 	PTGS_TO_GS_EX
 ENDPROC(ia32_sysenter_target)
 
+/*
+ * syscall stub including irq exit should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
 	# system call handler stub
 ENTRY(system_call)
 	RING0_INT_FRAME			# can't unwind into user space anyway
@@ -705,6 +717,10 @@
 	jmp resume_userspace
 END(syscall_badsys)
 	CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+	.popsection
 
 /*
  * System calls that need a pt_regs pointer.
@@ -814,6 +830,10 @@
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
+/*
+ *  Irq entries should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
 #define BUILD_INTERRUPT3(name, nr, fn)	\
 ENTRY(name)				\
 	RING0_INT_FRAME;		\
@@ -980,6 +1000,10 @@
 	jmp error_code
 	CFI_ENDPROC
 END(spurious_interrupt_bug)
+/*
+ * End of kprobes section
+ */
+	.popsection
 
 ENTRY(kernel_thread_helper)
 	pushl $0		# fake return address for unwinder
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bd5bbdd..722df1b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -803,6 +803,10 @@
 	call \func
 	.endm
 
+/*
+ * Interrupt entry/exit should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
 	/*
 	 * The interrupt stubs push (~vector+0x80) onto the stack and
 	 * then jump to common_interrupt.
@@ -941,6 +945,10 @@
 
 	CFI_ENDPROC
 END(common_interrupt)
+/*
+ * End of kprobes section
+ */
+       .popsection
 
 /*
  * APIC interrupts.
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 0000000..d42f65a
--- /dev/null
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,555 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) 2009 IBM Corporation
+ * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Authors: Alan Stern <stern@rowland.harvard.edu>
+ *          K.Prasad <prasad@linux.vnet.ibm.com>
+ *          Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/debugreg.h>
+
+/* Per cpu debug control register value */
+DEFINE_PER_CPU(unsigned long, cpu_dr7);
+EXPORT_PER_CPU_SYMBOL(cpu_dr7);
+
+/* Per cpu debug address registers values */
+static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for each cpus
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
+
+
+static inline unsigned long
+__encode_dr7(int drnum, unsigned int len, unsigned int type)
+{
+	unsigned long bp_info;
+
+	bp_info = (len | type) & 0xf;
+	bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
+	bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
+
+	return bp_info;
+}
+
+/*
+ * Encode the length, type, Exact, and Enable bits for a particular breakpoint
+ * as stored in debug register 7.
+ */
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
+{
+	return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
+}
+
+/*
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7.  Return the "enabled" status.
+ */
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
+{
+	int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
+
+	*len = (bp_info & 0xc) | 0x40;
+	*type = (bp_info & 0x3) | 0x80;
+
+	return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint. Eventually we enable it in the debug control register.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	unsigned long *dr7;
+	int i;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+		if (!*slot) {
+			*slot = bp;
+			break;
+		}
+	}
+
+	if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+		return -EBUSY;
+
+	set_debugreg(info->address, i);
+	__get_cpu_var(cpu_debugreg[i]) = info->address;
+
+	dr7 = &__get_cpu_var(cpu_dr7);
+	*dr7 |= encode_dr7(i, info->len, info->type);
+
+	set_debugreg(*dr7, 7);
+
+	return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	unsigned long *dr7;
+	int i;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+		if (*slot == bp) {
+			*slot = NULL;
+			break;
+		}
+	}
+
+	if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+		return;
+
+	dr7 = &__get_cpu_var(cpu_dr7);
+	*dr7 &= ~__encode_dr7(i, info->len, info->type);
+
+	set_debugreg(*dr7, 7);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+	unsigned int len_in_bytes = 0;
+
+	switch (hbp_len) {
+	case X86_BREAKPOINT_LEN_1:
+		len_in_bytes = 1;
+		break;
+	case X86_BREAKPOINT_LEN_2:
+		len_in_bytes = 2;
+		break;
+	case X86_BREAKPOINT_LEN_4:
+		len_in_bytes = 4;
+		break;
+#ifdef CONFIG_X86_64
+	case X86_BREAKPOINT_LEN_8:
+		len_in_bytes = 8;
+		break;
+#endif
+	}
+	return len_in_bytes;
+}
+
+/*
+ * Check for virtual address in user space.
+ */
+int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
+{
+	unsigned int len;
+
+	len = get_hbp_len(hbp_len);
+
+	return (va <= TASK_SIZE - len);
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+{
+	unsigned int len;
+
+	len = get_hbp_len(hbp_len);
+
+	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Store a breakpoint's encoded address, length, and type.
+ */
+static int arch_store_info(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	/*
+	 * For kernel-addresses, either the address or symbol name can be
+	 * specified.
+	 */
+	if (info->name)
+		info->address = (unsigned long)
+				kallsyms_lookup_name(info->name);
+	if (info->address)
+		return 0;
+
+	return -EINVAL;
+}
+
+int arch_bp_generic_fields(int x86_len, int x86_type,
+			   int *gen_len, int *gen_type)
+{
+	/* Len */
+	switch (x86_len) {
+	case X86_BREAKPOINT_LEN_1:
+		*gen_len = HW_BREAKPOINT_LEN_1;
+		break;
+	case X86_BREAKPOINT_LEN_2:
+		*gen_len = HW_BREAKPOINT_LEN_2;
+		break;
+	case X86_BREAKPOINT_LEN_4:
+		*gen_len = HW_BREAKPOINT_LEN_4;
+		break;
+#ifdef CONFIG_X86_64
+	case X86_BREAKPOINT_LEN_8:
+		*gen_len = HW_BREAKPOINT_LEN_8;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	/* Type */
+	switch (x86_type) {
+	case X86_BREAKPOINT_EXECUTE:
+		*gen_type = HW_BREAKPOINT_X;
+		break;
+	case X86_BREAKPOINT_WRITE:
+		*gen_type = HW_BREAKPOINT_W;
+		break;
+	case X86_BREAKPOINT_RW:
+		*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+static int arch_build_bp_info(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	info->address = bp->attr.bp_addr;
+
+	/* Len */
+	switch (bp->attr.bp_len) {
+	case HW_BREAKPOINT_LEN_1:
+		info->len = X86_BREAKPOINT_LEN_1;
+		break;
+	case HW_BREAKPOINT_LEN_2:
+		info->len = X86_BREAKPOINT_LEN_2;
+		break;
+	case HW_BREAKPOINT_LEN_4:
+		info->len = X86_BREAKPOINT_LEN_4;
+		break;
+#ifdef CONFIG_X86_64
+	case HW_BREAKPOINT_LEN_8:
+		info->len = X86_BREAKPOINT_LEN_8;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	/* Type */
+	switch (bp->attr.bp_type) {
+	case HW_BREAKPOINT_W:
+		info->type = X86_BREAKPOINT_WRITE;
+		break;
+	case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+		info->type = X86_BREAKPOINT_RW;
+		break;
+	case HW_BREAKPOINT_X:
+		info->type = X86_BREAKPOINT_EXECUTE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp,
+				  struct task_struct *tsk)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	unsigned int align;
+	int ret;
+
+
+	ret = arch_build_bp_info(bp);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+
+	if (info->type == X86_BREAKPOINT_EXECUTE)
+		/*
+		 * Ptrace-refactoring code
+		 * For now, we'll allow instruction breakpoint only for user-space
+		 * addresses
+		 */
+		if ((!arch_check_va_in_userspace(info->address, info->len)) &&
+			info->len != X86_BREAKPOINT_EXECUTE)
+			return ret;
+
+	switch (info->len) {
+	case X86_BREAKPOINT_LEN_1:
+		align = 0;
+		break;
+	case X86_BREAKPOINT_LEN_2:
+		align = 1;
+		break;
+	case X86_BREAKPOINT_LEN_4:
+		align = 3;
+		break;
+#ifdef CONFIG_X86_64
+	case X86_BREAKPOINT_LEN_8:
+		align = 7;
+		break;
+#endif
+	default:
+		return ret;
+	}
+
+	if (bp->callback)
+		ret = arch_store_info(bp);
+
+	if (ret < 0)
+		return ret;
+	/*
+	 * Check that the low-order bits of the address are appropriate
+	 * for the alignment implied by len.
+	 */
+	if (info->address & align)
+		return -EINVAL;
+
+	/* Check that the virtual address is in the proper range */
+	if (tsk) {
+		if (!arch_check_va_in_userspace(info->address, info->len))
+			return -EFAULT;
+	} else {
+		if (!arch_check_va_in_kernelspace(info->address, info->len))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * Dump the debug register contents to the user.
+ * We can't dump our per cpu values because it
+ * may contain cpu wide breakpoint, something that
+ * doesn't belong to the current task.
+ *
+ * TODO: include non-ptrace user breakpoints (perf)
+ */
+void aout_dump_debugregs(struct user *dump)
+{
+	int i;
+	int dr7 = 0;
+	struct perf_event *bp;
+	struct arch_hw_breakpoint *info;
+	struct thread_struct *thread = &current->thread;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		bp = thread->ptrace_bps[i];
+
+		if (bp && !bp->attr.disabled) {
+			dump->u_debugreg[i] = bp->attr.bp_addr;
+			info = counter_arch_bp(bp);
+			dr7 |= encode_dr7(i, info->len, info->type);
+		} else {
+			dump->u_debugreg[i] = 0;
+		}
+	}
+
+	dump->u_debugreg[4] = 0;
+	dump->u_debugreg[5] = 0;
+	dump->u_debugreg[6] = current->thread.debugreg6;
+
+	dump->u_debugreg[7] = dr7;
+}
+EXPORT_SYMBOL_GPL(aout_dump_debugregs);
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	int i;
+	struct thread_struct *t = &tsk->thread;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		unregister_hw_breakpoint(t->ptrace_bps[i]);
+		t->ptrace_bps[i] = NULL;
+	}
+}
+
+void hw_breakpoint_restore(void)
+{
+	set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
+	set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
+	set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
+	set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
+	set_debugreg(current->thread.debugreg6, 6);
+	set_debugreg(__get_cpu_var(cpu_dr7), 7);
+}
+EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
+
+/*
+ * Handle debug exception notifications.
+ *
+ * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
+ *
+ * NOTIFY_DONE returned if one of the following conditions is true.
+ * i) When the causative address is from user-space and the exception
+ * is a valid one, i.e. not triggered as a result of lazy debug register
+ * switching
+ * ii) When there are more bits than trap<n> set in DR6 register (such
+ * as BD, BS or BT) indicating that more than one debug condition is
+ * met and requires some more action in do_debug().
+ *
+ * NOTIFY_STOP returned for all other cases
+ *
+ */
+static int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+	int i, cpu, rc = NOTIFY_STOP;
+	struct perf_event *bp;
+	unsigned long dr7, dr6;
+	unsigned long *dr6_p;
+
+	/* The DR6 value is pointed by args->err */
+	dr6_p = (unsigned long *)ERR_PTR(args->err);
+	dr6 = *dr6_p;
+
+	/* Do an early return if no trap bits are set in DR6 */
+	if ((dr6 & DR_TRAP_BITS) == 0)
+		return NOTIFY_DONE;
+
+	get_debugreg(dr7, 7);
+	/* Disable breakpoints during exception handling */
+	set_debugreg(0UL, 7);
+	/*
+	 * Assert that local interrupts are disabled
+	 * Reset the DRn bits in the virtualized register value.
+	 * The ptrace trigger routine will add in whatever is needed.
+	 */
+	current->thread.debugreg6 &= ~DR_TRAP_BITS;
+	cpu = get_cpu();
+
+	/* Handle all the breakpoints that were triggered */
+	for (i = 0; i < HBP_NUM; ++i) {
+		if (likely(!(dr6 & (DR_TRAP0 << i))))
+			continue;
+
+		/*
+		 * The counter may be concurrently released but that can only
+		 * occur from a call_rcu() path. We can then safely fetch
+		 * the breakpoint, use its callback, touch its counter
+		 * while we are in an rcu_read_lock() path.
+		 */
+		rcu_read_lock();
+
+		bp = per_cpu(bp_per_reg[i], cpu);
+		if (bp)
+			rc = NOTIFY_DONE;
+		/*
+		 * Reset the 'i'th TRAP bit in dr6 to denote completion of
+		 * exception handling
+		 */
+		(*dr6_p) &= ~(DR_TRAP0 << i);
+		/*
+		 * bp can be NULL due to lazy debug register switching
+		 * or due to concurrent perf counter removing.
+		 */
+		if (!bp) {
+			rcu_read_unlock();
+			break;
+		}
+
+		(bp->callback)(bp, args->regs);
+
+		rcu_read_unlock();
+	}
+	if (dr6 & (~DR_TRAP_BITS))
+		rc = NOTIFY_DONE;
+
+	set_debugreg(dr7, 7);
+	put_cpu();
+
+	return rc;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+		struct notifier_block *unused, unsigned long val, void *data)
+{
+	if (val != DIE_DEBUG)
+		return NOTIFY_DONE;
+
+	return hw_breakpoint_handler(data);
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+	/* TODO */
+}
+
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
+{
+	/* TODO */
+}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 04bbd52..19212cb 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -92,17 +92,17 @@
 		seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
 	seq_printf(p, "  TLB shootdowns\n");
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	seq_printf(p, "%*s: ", prec, "TRM");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
 	seq_printf(p, "  Thermal event interrupts\n");
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
 	seq_printf(p, "%*s: ", prec, "THR");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 	seq_printf(p, "  Threshold APIC interrupts\n");
-# endif
 #endif
 #ifdef CONFIG_X86_MCE
 	seq_printf(p, "%*s: ", prec, "MCE");
@@ -194,11 +194,11 @@
 	sum += irq_stats(cpu)->irq_call_count;
 	sum += irq_stats(cpu)->irq_tlb_count;
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
 	sum += irq_stats(cpu)->irq_threshold_count;
-# endif
 #endif
 #ifdef CONFIG_X86_MCE
 	sum += per_cpu(mce_exception_count, cpu);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8d82a77..34e86b6 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
 #include <linux/smp.h>
 #include <linux/nmi.h>
 
+#include <asm/debugreg.h>
 #include <asm/apicdef.h>
 #include <asm/system.h>
 
@@ -434,6 +435,11 @@
 			"resuming...\n");
 	kgdb_arch_handle_exception(args->trapnr, args->signr,
 				   args->err, "c", "", regs);
+	/*
+	 * Reset the BS bit in dr6 (pointed by args->err) to
+	 * denote completion of processing
+	 */
+	(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
 
 	return NOTIFY_STOP;
 }
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d..3fe86d7 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -48,12 +48,15 @@
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/kdebug.h>
+#include <linux/kallsyms.h>
 
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/alternative.h>
+#include <asm/insn.h>
+#include <asm/debugreg.h>
 
 void jprobe_return_end(void);
 
@@ -106,50 +109,6 @@
 	/*      -----------------------------------------------         */
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 };
-static const u32 onebyte_has_modrm[256 / 32] = {
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-	/*      -----------------------------------------------         */
-	W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
-	W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
-	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
-	W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
-	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
-	W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
-	W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
-	W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
-	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
-	W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
-	W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
-	W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
-	W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
-	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
-	W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
-	W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1)   /* f0 */
-	/*      -----------------------------------------------         */
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-};
-static const u32 twobyte_has_modrm[256 / 32] = {
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-	/*      -----------------------------------------------         */
-	W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
-	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
-	W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
-	W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
-	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
-	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
-	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
-	W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
-	W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
-	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
-	W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
-	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
-	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
-	W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
-	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
-	W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* ff */
-	/*      -----------------------------------------------         */
-	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
-};
 #undef W
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = {
@@ -244,6 +203,75 @@
 	}
 }
 
+/* Recover the probed instruction at addr for further analysis. */
+static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+	struct kprobe *kp;
+	kp = get_kprobe((void *)addr);
+	if (!kp)
+		return -EINVAL;
+
+	/*
+	 *  Basically, kp->ainsn.insn has an original instruction.
+	 *  However, RIP-relative instruction can not do single-stepping
+	 *  at different place, fix_riprel() tweaks the displacement of
+	 *  that instruction. In that case, we can't recover the instruction
+	 *  from the kp->ainsn.insn.
+	 *
+	 *  On the other hand, kp->opcode has a copy of the first byte of
+	 *  the probed instruction, which is overwritten by int3. And
+	 *  the instruction at kp->addr is not modified by kprobes except
+	 *  for the first byte, we can recover the original instruction
+	 *  from it and kp->opcode.
+	 */
+	memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	buf[0] = kp->opcode;
+	return 0;
+}
+
+/* Dummy buffers for kallsyms_lookup */
+static char __dummy_buf[KSYM_NAME_LEN];
+
+/* Check if paddr is at an instruction boundary */
+static int __kprobes can_probe(unsigned long paddr)
+{
+	int ret;
+	unsigned long addr, offset = 0;
+	struct insn insn;
+	kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+	if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+		return 0;
+
+	/* Decode instructions */
+	addr = paddr - offset;
+	while (addr < paddr) {
+		kernel_insn_init(&insn, (void *)addr);
+		insn_get_opcode(&insn);
+
+		/*
+		 * Check if the instruction has been modified by another
+		 * kprobe, in which case we replace the breakpoint by the
+		 * original instruction in our buffer.
+		 */
+		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+			ret = recover_probed_instruction(buf, addr);
+			if (ret)
+				/*
+				 * Another debugging subsystem might insert
+				 * this breakpoint. In that case, we can't
+				 * recover it.
+				 */
+				return 0;
+			kernel_insn_init(&insn, buf);
+		}
+		insn_get_length(&insn);
+		addr += insn.length;
+	}
+
+	return (addr == paddr);
+}
+
 /*
  * Returns non-zero if opcode modifies the interrupt flag.
  */
@@ -277,68 +305,30 @@
 static void __kprobes fix_riprel(struct kprobe *p)
 {
 #ifdef CONFIG_X86_64
-	u8 *insn = p->ainsn.insn;
-	s64 disp;
-	int need_modrm;
+	struct insn insn;
+	kernel_insn_init(&insn, p->ainsn.insn);
 
-	/* Skip legacy instruction prefixes.  */
-	while (1) {
-		switch (*insn) {
-		case 0x66:
-		case 0x67:
-		case 0x2e:
-		case 0x3e:
-		case 0x26:
-		case 0x64:
-		case 0x65:
-		case 0x36:
-		case 0xf0:
-		case 0xf3:
-		case 0xf2:
-			++insn;
-			continue;
-		}
-		break;
-	}
-
-	/* Skip REX instruction prefix.  */
-	if (is_REX_prefix(insn))
-		++insn;
-
-	if (*insn == 0x0f) {
-		/* Two-byte opcode.  */
-		++insn;
-		need_modrm = test_bit(*insn,
-				      (unsigned long *)twobyte_has_modrm);
-	} else
-		/* One-byte opcode.  */
-		need_modrm = test_bit(*insn,
-				      (unsigned long *)onebyte_has_modrm);
-
-	if (need_modrm) {
-		u8 modrm = *++insn;
-		if ((modrm & 0xc7) == 0x05) {
-			/* %rip+disp32 addressing mode */
-			/* Displacement follows ModRM byte.  */
-			++insn;
-			/*
-			 * The copied instruction uses the %rip-relative
-			 * addressing mode.  Adjust the displacement for the
-			 * difference between the original location of this
-			 * instruction and the location of the copy that will
-			 * actually be run.  The tricky bit here is making sure
-			 * that the sign extension happens correctly in this
-			 * calculation, since we need a signed 32-bit result to
-			 * be sign-extended to 64 bits when it's added to the
-			 * %rip value and yield the same 64-bit result that the
-			 * sign-extension of the original signed 32-bit
-			 * displacement would have given.
-			 */
-			disp = (u8 *) p->addr + *((s32 *) insn) -
-			       (u8 *) p->ainsn.insn;
-			BUG_ON((s64) (s32) disp != disp); /* Sanity check.  */
-			*(s32 *)insn = (s32) disp;
-		}
+	if (insn_rip_relative(&insn)) {
+		s64 newdisp;
+		u8 *disp;
+		insn_get_displacement(&insn);
+		/*
+		 * The copied instruction uses the %rip-relative addressing
+		 * mode.  Adjust the displacement for the difference between
+		 * the original location of this instruction and the location
+		 * of the copy that will actually be run.  The tricky bit here
+		 * is making sure that the sign extension happens correctly in
+		 * this calculation, since we need a signed 32-bit result to
+		 * be sign-extended to 64 bits when it's added to the %rip
+		 * value and yield the same 64-bit result that the sign-
+		 * extension of the original signed 32-bit displacement would
+		 * have given.
+		 */
+		newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
+			  (u8 *) p->ainsn.insn;
+		BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check.  */
+		disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
+		*(s32 *) disp = (s32) newdisp;
 	}
 #endif
 }
@@ -359,6 +349,8 @@
 
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
+	if (!can_probe((unsigned long)p->addr))
+		return -EILSEQ;
 	/* insn: must be on special executable page on x86. */
 	p->ainsn.insn = get_insn_slot();
 	if (!p->ainsn.insn)
@@ -472,17 +464,6 @@
 {
 	switch (kcb->kprobe_status) {
 	case KPROBE_HIT_SSDONE:
-#ifdef CONFIG_X86_64
-		/* TODO: Provide re-entrancy from post_kprobes_handler() and
-		 * avoid exception stack corruption while single-stepping on
-		 * the instruction of the new probe.
-		 */
-		arch_disarm_kprobe(p);
-		regs->ip = (unsigned long)p->addr;
-		reset_current_kprobe();
-		preempt_enable_no_resched();
-		break;
-#endif
 	case KPROBE_HIT_ACTIVE:
 		save_previous_kprobe(kcb);
 		set_current_kprobe(p, regs, kcb);
@@ -491,18 +472,16 @@
 		kcb->kprobe_status = KPROBE_REENTER;
 		break;
 	case KPROBE_HIT_SS:
-		if (p == kprobe_running()) {
-			regs->flags &= ~X86_EFLAGS_TF;
-			regs->flags |= kcb->kprobe_saved_flags;
-			return 0;
-		} else {
-			/* A probe has been hit in the codepath leading up
-			 * to, or just after, single-stepping of a probed
-			 * instruction. This entire codepath should strictly
-			 * reside in .kprobes.text section. Raise a warning
-			 * to highlight this peculiar case.
-			 */
-		}
+		/* A probe has been hit in the codepath leading up to, or just
+		 * after, single-stepping of a probed instruction. This entire
+		 * codepath should strictly reside in .kprobes.text section.
+		 * Raise a BUG or we'll continue in an endless reentering loop
+		 * and eventually a stack overflow.
+		 */
+		printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
+		       p->addr);
+		dump_kprobe(p);
+		BUG();
 	default:
 		/* impossible cases */
 		WARN_ON(1);
@@ -967,8 +946,14 @@
 			ret = NOTIFY_STOP;
 		break;
 	case DIE_DEBUG:
-		if (post_kprobe_handler(args->regs))
+		if (post_kprobe_handler(args->regs)) {
+			/*
+			 * Reset the BS bit in dr6 (pointed by args->err) to
+			 * denote completion of processing
+			 */
+			(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
 			ret = NOTIFY_STOP;
+		}
 		break;
 	case DIE_GPF:
 		/*
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1c429d..c843f84 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
 #include <asm/desc.h>
 #include <asm/system.h>
 #include <asm/cacheflush.h>
+#include <asm/debugreg.h>
 
 static void set_idt(void *newidt, __u16 limit)
 {
@@ -202,6 +203,7 @@
 
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
+	hw_breakpoint_disable();
 
 	if (image->preserve_context) {
 #ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 84c3bf2..4a8bb82 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
+#include <asm/debugreg.h>
 
 static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
 				unsigned long addr)
@@ -282,6 +283,7 @@
 
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
+	hw_breakpoint_disable();
 
 	if (image->preserve_context) {
 #ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5284cd2..744508e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
 #include <linux/clockchips.h>
 #include <linux/random.h>
 #include <trace/events/power.h>
+#include <linux/hw_breakpoint.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -17,6 +18,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/ds.h>
+#include <asm/debugreg.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -103,14 +105,7 @@
 	}
 #endif
 
-	clear_tsk_thread_flag(tsk, TIF_DEBUG);
-
-	tsk->thread.debugreg0 = 0;
-	tsk->thread.debugreg1 = 0;
-	tsk->thread.debugreg2 = 0;
-	tsk->thread.debugreg3 = 0;
-	tsk->thread.debugreg6 = 0;
-	tsk->thread.debugreg7 = 0;
+	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 	/*
 	 * Forget coprocessor state..
@@ -192,16 +187,6 @@
 	else if (next->debugctlmsr != prev->debugctlmsr)
 		update_debugctlmsr(next->debugctlmsr);
 
-	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-		set_debugreg(next->debugreg0, 0);
-		set_debugreg(next->debugreg1, 1);
-		set_debugreg(next->debugreg2, 2);
-		set_debugreg(next->debugreg3, 3);
-		/* no 4 and 5 */
-		set_debugreg(next->debugreg6, 6);
-		set_debugreg(next->debugreg7, 7);
-	}
-
 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
 		/* prev and next are different */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4cf7956..d5bd313 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,6 +58,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/ds.h>
+#include <asm/debugreg.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -259,7 +260,12 @@
 
 	task_user_gs(p) = get_user_gs(regs);
 
+	p->thread.io_bitmap_ptr = NULL;
 	tsk = current;
+	err = -ENOMEM;
+
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
 		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
 						IO_BITMAP_BYTES, GFP_KERNEL);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eb62cbc..70cf158 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,6 +52,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/ds.h>
+#include <asm/debugreg.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -297,12 +298,16 @@
 
 	p->thread.fs = me->thread.fs;
 	p->thread.gs = me->thread.gs;
+	p->thread.io_bitmap_ptr = NULL;
 
 	savesegment(gs, p->thread.gsindex);
 	savesegment(fs, p->thread.fsindex);
 	savesegment(es, p->thread.es);
 	savesegment(ds, p->thread.ds);
 
+	err = -ENOMEM;
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
 		if (!p->thread.io_bitmap_ptr) {
@@ -341,6 +346,7 @@
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
 	}
+
 	return err;
 }
 
@@ -495,6 +501,7 @@
 	 */
 	if (preload_fpu)
 		__math_state_restore();
+
 	return prev_p;
 }
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7b058a2..04d182a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
 #include <linux/seccomp.h>
 #include <linux/signal.h>
 #include <linux/workqueue.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -34,6 +36,7 @@
 #include <asm/prctl.h>
 #include <asm/proto.h>
 #include <asm/ds.h>
+#include <asm/hw_breakpoint.h>
 
 #include "tls.h"
 
@@ -49,6 +52,118 @@
 	REGSET_IOPERM32,
 };
 
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+#ifdef CONFIG_X86_64
+	REG_OFFSET_NAME(r15),
+	REG_OFFSET_NAME(r14),
+	REG_OFFSET_NAME(r13),
+	REG_OFFSET_NAME(r12),
+	REG_OFFSET_NAME(r11),
+	REG_OFFSET_NAME(r10),
+	REG_OFFSET_NAME(r9),
+	REG_OFFSET_NAME(r8),
+#endif
+	REG_OFFSET_NAME(bx),
+	REG_OFFSET_NAME(cx),
+	REG_OFFSET_NAME(dx),
+	REG_OFFSET_NAME(si),
+	REG_OFFSET_NAME(di),
+	REG_OFFSET_NAME(bp),
+	REG_OFFSET_NAME(ax),
+#ifdef CONFIG_X86_32
+	REG_OFFSET_NAME(ds),
+	REG_OFFSET_NAME(es),
+	REG_OFFSET_NAME(fs),
+	REG_OFFSET_NAME(gs),
+#endif
+	REG_OFFSET_NAME(orig_ax),
+	REG_OFFSET_NAME(ip),
+	REG_OFFSET_NAME(cs),
+	REG_OFFSET_NAME(flags),
+	REG_OFFSET_NAME(sp),
+	REG_OFFSET_NAME(ss),
+	REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:	the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset:	the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (roff->offset == offset)
+			return roff->name;
+	return NULL;
+}
+
+static const int arg_offs_table[] = {
+#ifdef CONFIG_X86_32
+	[0] = offsetof(struct pt_regs, ax),
+	[1] = offsetof(struct pt_regs, dx),
+	[2] = offsetof(struct pt_regs, cx)
+#else /* CONFIG_X86_64 */
+	[0] = offsetof(struct pt_regs, di),
+	[1] = offsetof(struct pt_regs, si),
+	[2] = offsetof(struct pt_regs, dx),
+	[3] = offsetof(struct pt_regs, cx),
+	[4] = offsetof(struct pt_regs, r8),
+	[5] = offsetof(struct pt_regs, r9)
+#endif
+};
+
+/**
+ * regs_get_argument_nth() - get Nth argument at function call
+ * @regs:	pt_regs which contains registers at function entry.
+ * @n:		argument number.
+ *
+ * regs_get_argument_nth() returns @n th argument of a function call.
+ * Since usually the kernel stack will be changed right after function entry,
+ * you must use this at function entry. If the @n th entry is NOT in the
+ * kernel stack or pt_regs, this returns 0.
+ */
+unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
+{
+	if (n < ARRAY_SIZE(arg_offs_table))
+		return *(unsigned long *)((char *)regs + arg_offs_table[n]);
+	else {
+		/*
+		 * The typical case: arg n is on the stack.
+		 * (Note: stack[0] = return address, so skip it)
+		 */
+		n -= ARRAY_SIZE(arg_offs_table);
+		return regs_get_kernel_stack_nth(regs, 1 + n);
+	}
+}
+
 /*
  * does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.
@@ -137,11 +252,6 @@
 	return 0;
 }
 
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
-	return TASK_SIZE - 3;
-}
-
 #else  /* CONFIG_X86_64 */
 
 #define FLAG_MASK		(FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -266,15 +376,6 @@
 	return 0;
 }
 
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
-#ifdef CONFIG_IA32_EMULATION
-	if (test_tsk_thread_flag(task, TIF_IA32))
-		return IA32_PAGE_OFFSET - 3;
-#endif
-	return TASK_SIZE_MAX - 7;
-}
-
 #endif	/* CONFIG_X86_32 */
 
 static unsigned long get_flags(struct task_struct *task)
@@ -454,96 +555,236 @@
 	return ret;
 }
 
-/*
- * This function is trivial and will be inlined by the compiler.
- * Having it separates the implementation details of debug
- * registers from the interface details of ptrace.
- */
-static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+static void ptrace_triggered(struct perf_event *bp, void *data)
 {
-	switch (n) {
-	case 0:		return child->thread.debugreg0;
-	case 1:		return child->thread.debugreg1;
-	case 2:		return child->thread.debugreg2;
-	case 3:		return child->thread.debugreg3;
-	case 6:		return child->thread.debugreg6;
-	case 7:		return child->thread.debugreg7;
+	int i;
+	struct thread_struct *thread = &(current->thread);
+
+	/*
+	 * Store in the virtual DR6 register the fact that the breakpoint
+	 * was hit so the thread's debugger will see it.
+	 */
+	for (i = 0; i < HBP_NUM; i++) {
+		if (thread->ptrace_bps[i] == bp)
+			break;
 	}
+
+	thread->debugreg6 |= (DR_TRAP0 << i);
+}
+
+/*
+ * Walk through every ptrace breakpoints for this thread and
+ * build the dr7 value on top of their attributes.
+ *
+ */
+static unsigned long ptrace_get_dr7(struct perf_event *bp[])
+{
+	int i;
+	int dr7 = 0;
+	struct arch_hw_breakpoint *info;
+
+	for (i = 0; i < HBP_NUM; i++) {
+		if (bp[i] && !bp[i]->attr.disabled) {
+			info = counter_arch_bp(bp[i]);
+			dr7 |= encode_dr7(i, info->len, info->type);
+		}
+	}
+
+	return dr7;
+}
+
+static struct perf_event *
+ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
+			 struct task_struct *tsk, int disabled)
+{
+	int err;
+	int gen_len, gen_type;
+	DEFINE_BREAKPOINT_ATTR(attr);
+
+	/*
+	 * We shoud have at least an inactive breakpoint at this
+	 * slot. It means the user is writing dr7 without having
+	 * written the address register first
+	 */
+	if (!bp)
+		return ERR_PTR(-EINVAL);
+
+	err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+	if (err)
+		return ERR_PTR(err);
+
+	attr = bp->attr;
+	attr.bp_len = gen_len;
+	attr.bp_type = gen_type;
+	attr.disabled = disabled;
+
+	return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
+}
+
+/*
+ * Handle ptrace writes to debug register 7.
+ */
+static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
+{
+	struct thread_struct *thread = &(tsk->thread);
+	unsigned long old_dr7;
+	int i, orig_ret = 0, rc = 0;
+	int enabled, second_pass = 0;
+	unsigned len, type;
+	struct perf_event *bp;
+
+	data &= ~DR_CONTROL_RESERVED;
+	old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
+restore:
+	/*
+	 * Loop through all the hardware breakpoints, making the
+	 * appropriate changes to each.
+	 */
+	for (i = 0; i < HBP_NUM; i++) {
+		enabled = decode_dr7(data, i, &len, &type);
+		bp = thread->ptrace_bps[i];
+
+		if (!enabled) {
+			if (bp) {
+				/*
+				 * Don't unregister the breakpoints right-away,
+				 * unless all register_user_hw_breakpoint()
+				 * requests have succeeded. This prevents
+				 * any window of opportunity for debug
+				 * register grabbing by other users.
+				 */
+				if (!second_pass)
+					continue;
+
+				thread->ptrace_bps[i] = NULL;
+				bp = ptrace_modify_breakpoint(bp, len, type,
+							      tsk, 1);
+				if (IS_ERR(bp)) {
+					rc = PTR_ERR(bp);
+					thread->ptrace_bps[i] = NULL;
+					break;
+				}
+				thread->ptrace_bps[i] = bp;
+			}
+			continue;
+		}
+
+		bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
+
+		/* Incorrect bp, or we have a bug in bp API */
+		if (IS_ERR(bp)) {
+			rc = PTR_ERR(bp);
+			thread->ptrace_bps[i] = NULL;
+			break;
+		}
+		thread->ptrace_bps[i] = bp;
+	}
+	/*
+	 * Make a second pass to free the remaining unused breakpoints
+	 * or to restore the original breakpoints if an error occurred.
+	 */
+	if (!second_pass) {
+		second_pass = 1;
+		if (rc < 0) {
+			orig_ret = rc;
+			data = old_dr7;
+		}
+		goto restore;
+	}
+	return ((orig_ret < 0) ? orig_ret : rc);
+}
+
+/*
+ * Handle PTRACE_PEEKUSR calls for the debug register area.
+ */
+static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
+{
+	struct thread_struct *thread = &(tsk->thread);
+	unsigned long val = 0;
+
+	if (n < HBP_NUM) {
+		struct perf_event *bp;
+		bp = thread->ptrace_bps[n];
+		if (!bp)
+			return 0;
+		val = bp->hw.info.address;
+	} else if (n == 6) {
+		val = thread->debugreg6;
+	 } else if (n == 7) {
+		val = ptrace_get_dr7(thread->ptrace_bps);
+	}
+	return val;
+}
+
+static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
+				      unsigned long addr)
+{
+	struct perf_event *bp;
+	struct thread_struct *t = &tsk->thread;
+	DEFINE_BREAKPOINT_ATTR(attr);
+
+	if (!t->ptrace_bps[nr]) {
+		/*
+		 * Put stub len and type to register (reserve) an inactive but
+		 * correct bp
+		 */
+		attr.bp_addr = addr;
+		attr.bp_len = HW_BREAKPOINT_LEN_1;
+		attr.bp_type = HW_BREAKPOINT_W;
+		attr.disabled = 1;
+
+		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
+	} else {
+		bp = t->ptrace_bps[nr];
+		t->ptrace_bps[nr] = NULL;
+
+		attr = bp->attr;
+		attr.bp_addr = addr;
+		bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
+	}
+	/*
+	 * CHECKME: the previous code returned -EIO if the addr wasn't a
+	 * valid task virtual addr. The new one will return -EINVAL in this
+	 * case.
+	 * -EINVAL may be what we want for in-kernel breakpoints users, but
+	 * -EIO looks better for ptrace, since we refuse a register writing
+	 * for the user. And anyway this is the previous behaviour.
+	 */
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	t->ptrace_bps[nr] = bp;
+
 	return 0;
 }
 
-static int ptrace_set_debugreg(struct task_struct *child,
-			       int n, unsigned long data)
+/*
+ * Handle PTRACE_POKEUSR calls for the debug register area.
+ */
+int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
 {
-	int i;
+	struct thread_struct *thread = &(tsk->thread);
+	int rc = 0;
 
-	if (unlikely(n == 4 || n == 5))
+	/* There are no DR4 or DR5 registers */
+	if (n == 4 || n == 5)
 		return -EIO;
 
-	if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
-		return -EIO;
-
-	switch (n) {
-	case 0:		child->thread.debugreg0 = data; break;
-	case 1:		child->thread.debugreg1 = data; break;
-	case 2:		child->thread.debugreg2 = data; break;
-	case 3:		child->thread.debugreg3 = data; break;
-
-	case 6:
-		if ((data & ~0xffffffffUL) != 0)
-			return -EIO;
-		child->thread.debugreg6 = data;
-		break;
-
-	case 7:
-		/*
-		 * Sanity-check data. Take one half-byte at once with
-		 * check = (val >> (16 + 4*i)) & 0xf. It contains the
-		 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
-		 * 2 and 3 are LENi. Given a list of invalid values,
-		 * we do mask |= 1 << invalid_value, so that
-		 * (mask >> check) & 1 is a correct test for invalid
-		 * values.
-		 *
-		 * R/Wi contains the type of the breakpoint /
-		 * watchpoint, LENi contains the length of the watched
-		 * data in the watchpoint case.
-		 *
-		 * The invalid values are:
-		 * - LENi == 0x10 (undefined), so mask |= 0x0f00.	[32-bit]
-		 * - R/Wi == 0x10 (break on I/O reads or writes), so
-		 *   mask |= 0x4444.
-		 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
-		 *   0x1110.
-		 *
-		 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
-		 *
-		 * See the Intel Manual "System Programming Guide",
-		 * 15.2.4
-		 *
-		 * Note that LENi == 0x10 is defined on x86_64 in long
-		 * mode (i.e. even for 32-bit userspace software, but
-		 * 64-bit kernel), so the x86_64 mask value is 0x5454.
-		 * See the AMD manual no. 24593 (AMD64 System Programming)
-		 */
-#ifdef CONFIG_X86_32
-#define	DR7_MASK	0x5f54
-#else
-#define	DR7_MASK	0x5554
-#endif
-		data &= ~DR_CONTROL_RESERVED;
-		for (i = 0; i < 4; i++)
-			if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
-				return -EIO;
-		child->thread.debugreg7 = data;
-		if (data)
-			set_tsk_thread_flag(child, TIF_DEBUG);
-		else
-			clear_tsk_thread_flag(child, TIF_DEBUG);
-		break;
+	if (n == 6) {
+		thread->debugreg6 = val;
+		goto ret_path;
 	}
+	if (n < HBP_NUM) {
+		rc = ptrace_set_breakpoint_addr(tsk, n, val);
+		if (rc)
+			return rc;
+	}
+	/* All that's left is DR7 */
+	if (n == 7)
+		rc = ptrace_write_dr7(tsk, val);
 
-	return 0;
+ret_path:
+	return rc;
 }
 
 /*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2a34f9c..c0ca8f9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -109,6 +109,7 @@
 #ifdef CONFIG_X86_64
 #include <asm/numa_64.h>
 #endif
+#include <asm/mce.h>
 
 /*
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -1031,6 +1032,8 @@
 #endif
 #endif
 	x86_init.oem.banner();
+
+	mcheck_init();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6a44a76..fbf3b07 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -799,15 +799,6 @@
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
-		/*
-		 * Re-enable any watchpoints before delivering the
-		 * signal to user space. The processor register will
-		 * have been cleared if the watchpoint triggered
-		 * inside the kernel.
-		 */
-		if (current->thread.debugreg7)
-			set_debugreg(current->thread.debugreg7, 7);
-
 		/* Whee! Actually deliver the signal.  */
 		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
 			/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7e37dce..3339917 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -529,77 +529,56 @@
 dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk = current;
-	unsigned long condition;
+	unsigned long dr6;
 	int si_code;
 
-	get_debugreg(condition, 6);
+	get_debugreg(dr6, 6);
 
 	/* Catch kmemcheck conditions first of all! */
-	if (condition & DR_STEP && kmemcheck_trap(regs))
+	if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
 		return;
 
+	/* DR6 may or may not be cleared by the CPU */
+	set_debugreg(0, 6);
 	/*
 	 * The processor cleared BTF, so don't mark that we need it set.
 	 */
 	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
 	tsk->thread.debugctlmsr = 0;
 
-	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
-						SIGTRAP) == NOTIFY_STOP)
+	/* Store the virtualized DR6 value */
+	tsk->thread.debugreg6 = dr6;
+
+	if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+							SIGTRAP) == NOTIFY_STOP)
 		return;
 
 	/* It's safe to allow irq's after DR6 has been saved */
 	preempt_conditional_sti(regs);
 
-	/* Mask out spurious debug traps due to lazy DR7 setting */
-	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
-		if (!tsk->thread.debugreg7)
-			goto clear_dr7;
+	if (regs->flags & X86_VM_MASK) {
+		handle_vm86_trap((struct kernel_vm86_regs *) regs,
+				error_code, 1);
+		return;
 	}
 
-#ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK)
-		goto debug_vm86;
-#endif
-
-	/* Save debug status register where ptrace can see it */
-	tsk->thread.debugreg6 = condition;
-
 	/*
-	 * Single-stepping through TF: make sure we ignore any events in
-	 * kernel space (but re-enable TF when returning to user mode).
+	 * Single-stepping through system calls: ignore any exceptions in
+	 * kernel space, but re-enable TF when returning to user mode.
+	 *
+	 * We already checked v86 mode above, so we can check for kernel mode
+	 * by just checking the CPL of CS.
 	 */
-	if (condition & DR_STEP) {
-		if (!user_mode(regs))
-			goto clear_TF_reenable;
+	if ((dr6 & DR_STEP) && !user_mode(regs)) {
+		tsk->thread.debugreg6 &= ~DR_STEP;
+		set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+		regs->flags &= ~X86_EFLAGS_TF;
 	}
-
-	si_code = get_si_code(condition);
-	/* Ok, finally something we can handle */
-	send_sigtrap(tsk, regs, error_code, si_code);
-
-	/*
-	 * Disable additional traps. They'll be re-enabled when
-	 * the signal is delivered.
-	 */
-clear_dr7:
-	set_debugreg(0, 7);
+	si_code = get_si_code(tsk->thread.debugreg6);
+	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+		send_sigtrap(tsk, regs, error_code, si_code);
 	preempt_conditional_cli(regs);
-	return;
 
-#ifdef CONFIG_X86_32
-debug_vm86:
-	/* reenable preemption: handle_vm86_trap() might sleep */
-	dec_preempt_count();
-	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
-	conditional_cli(regs);
-	return;
-#endif
-
-clear_TF_reenable:
-	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-	regs->flags &= ~X86_EFLAGS_TF;
-	preempt_conditional_cli(regs);
 	return;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae07d26..4fc8017 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+#include <asm/debugreg.h>
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
@@ -3643,14 +3644,15 @@
 	trace_kvm_entry(vcpu->vcpu_id);
 	kvm_x86_ops->run(vcpu, kvm_run);
 
-	if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
-		set_debugreg(current->thread.debugreg0, 0);
-		set_debugreg(current->thread.debugreg1, 1);
-		set_debugreg(current->thread.debugreg2, 2);
-		set_debugreg(current->thread.debugreg3, 3);
-		set_debugreg(current->thread.debugreg6, 6);
-		set_debugreg(current->thread.debugreg7, 7);
-	}
+	/*
+	 * If the guest has used debug registers, at least dr7
+	 * will be disabled while returning to the host.
+	 * If we don't have active breakpoints in the host, we don't
+	 * care about the messed up debug address registers. But if
+	 * we have some of them active, restore the old state.
+	 */
+	if (hw_breakpoint_active())
+		hw_breakpoint_restore();
 
 	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore
new file mode 100644
index 0000000..8df89f0
--- /dev/null
+++ b/arch/x86/lib/.gitignore
@@ -0,0 +1 @@
+inat-tables.c
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 85f5db9..a2d6472 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -2,12 +2,25 @@
 # Makefile for x86 specific library files.
 #
 
+inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
+inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
+quiet_cmd_inat_tables = GEN     $@
+      cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
+
+$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+	$(call cmd,inat_tables)
+
+$(obj)/inat.o: $(obj)/inat-tables.c
+
+clean-files := inat-tables.c
+
 obj-$(CONFIG_SMP) := msr.o
 
 lib-y := delay.o
 lib-y += thunk_$(BITS).o
 lib-y += usercopy_$(BITS).o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
+lib-y += insn.o inat.o
 
 obj-y += msr-reg.o msr-reg-export.o
 
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
new file mode 100644
index 0000000..46fc4ee
--- /dev/null
+++ b/arch/x86/lib/inat.c
@@ -0,0 +1,90 @@
+/*
+ * x86 instruction attribute tables
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include <asm/insn.h>
+
+/* Attribute tables are generated from opcode map */
+#include "inat-tables.c"
+
+/* Attribute search APIs */
+insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
+{
+	return inat_primary_table[opcode];
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx,
+				      insn_attr_t esc_attr)
+{
+	const insn_attr_t *table;
+	insn_attr_t lpfx_attr;
+	int n, m = 0;
+
+	n = inat_escape_id(esc_attr);
+	if (last_pfx) {
+		lpfx_attr = inat_get_opcode_attribute(last_pfx);
+		m = inat_last_prefix_id(lpfx_attr);
+	}
+	table = inat_escape_tables[n][0];
+	if (!table)
+		return 0;
+	if (inat_has_variant(table[opcode]) && m) {
+		table = inat_escape_tables[n][m];
+		if (!table)
+			return 0;
+	}
+	return table[opcode];
+}
+
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx,
+				     insn_attr_t grp_attr)
+{
+	const insn_attr_t *table;
+	insn_attr_t lpfx_attr;
+	int n, m = 0;
+
+	n = inat_group_id(grp_attr);
+	if (last_pfx) {
+		lpfx_attr = inat_get_opcode_attribute(last_pfx);
+		m = inat_last_prefix_id(lpfx_attr);
+	}
+	table = inat_group_tables[n][0];
+	if (!table)
+		return inat_group_common_attribute(grp_attr);
+	if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) {
+		table = inat_group_tables[n][m];
+		if (!table)
+			return inat_group_common_attribute(grp_attr);
+	}
+	return table[X86_MODRM_REG(modrm)] |
+	       inat_group_common_attribute(grp_attr);
+}
+
+insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
+				   insn_byte_t vex_p)
+{
+	const insn_attr_t *table;
+	if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
+		return 0;
+	table = inat_avx_tables[vex_m][vex_p];
+	if (!table)
+		return 0;
+	return table[opcode];
+}
+
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
new file mode 100644
index 0000000..9f33b98
--- /dev/null
+++ b/arch/x86/lib/insn.c
@@ -0,0 +1,516 @@
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004, 2009
+ */
+
+#include <linux/string.h>
+#include <asm/inat.h>
+#include <asm/insn.h>
+
+#define get_next(t, insn)	\
+	({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+
+#define peek_next(t, insn)	\
+	({t r; r = *(t*)insn->next_byte; r; })
+
+#define peek_nbyte_next(t, insn, n)	\
+	({t r; r = *(t*)((insn)->next_byte + n); r; })
+
+/**
+ * insn_init() - initialize struct insn
+ * @insn:	&struct insn to be initialized
+ * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
+ * @x86_64:	!0 for 64-bit kernel or 64-bit app
+ */
+void insn_init(struct insn *insn, const void *kaddr, int x86_64)
+{
+	memset(insn, 0, sizeof(*insn));
+	insn->kaddr = kaddr;
+	insn->next_byte = kaddr;
+	insn->x86_64 = x86_64 ? 1 : 0;
+	insn->opnd_bytes = 4;
+	if (x86_64)
+		insn->addr_bytes = 8;
+	else
+		insn->addr_bytes = 4;
+}
+
+/**
+ * insn_get_prefixes - scan x86 instruction prefix bytes
+ * @insn:	&struct insn containing instruction
+ *
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode.  No effect if @insn->prefixes.got
+ * is already set.
+ */
+void insn_get_prefixes(struct insn *insn)
+{
+	struct insn_field *prefixes = &insn->prefixes;
+	insn_attr_t attr;
+	insn_byte_t b, lb;
+	int i, nb;
+
+	if (prefixes->got)
+		return;
+
+	nb = 0;
+	lb = 0;
+	b = peek_next(insn_byte_t, insn);
+	attr = inat_get_opcode_attribute(b);
+	while (inat_is_legacy_prefix(attr)) {
+		/* Skip if same prefix */
+		for (i = 0; i < nb; i++)
+			if (prefixes->bytes[i] == b)
+				goto found;
+		if (nb == 4)
+			/* Invalid instruction */
+			break;
+		prefixes->bytes[nb++] = b;
+		if (inat_is_address_size_prefix(attr)) {
+			/* address size switches 2/4 or 4/8 */
+			if (insn->x86_64)
+				insn->addr_bytes ^= 12;
+			else
+				insn->addr_bytes ^= 6;
+		} else if (inat_is_operand_size_prefix(attr)) {
+			/* oprand size switches 2/4 */
+			insn->opnd_bytes ^= 6;
+		}
+found:
+		prefixes->nbytes++;
+		insn->next_byte++;
+		lb = b;
+		b = peek_next(insn_byte_t, insn);
+		attr = inat_get_opcode_attribute(b);
+	}
+	/* Set the last prefix */
+	if (lb && lb != insn->prefixes.bytes[3]) {
+		if (unlikely(insn->prefixes.bytes[3])) {
+			/* Swap the last prefix */
+			b = insn->prefixes.bytes[3];
+			for (i = 0; i < nb; i++)
+				if (prefixes->bytes[i] == lb)
+					prefixes->bytes[i] = b;
+		}
+		insn->prefixes.bytes[3] = lb;
+	}
+
+	/* Decode REX prefix */
+	if (insn->x86_64) {
+		b = peek_next(insn_byte_t, insn);
+		attr = inat_get_opcode_attribute(b);
+		if (inat_is_rex_prefix(attr)) {
+			insn->rex_prefix.value = b;
+			insn->rex_prefix.nbytes = 1;
+			insn->next_byte++;
+			if (X86_REX_W(b))
+				/* REX.W overrides opnd_size */
+				insn->opnd_bytes = 8;
+		}
+	}
+	insn->rex_prefix.got = 1;
+
+	/* Decode VEX prefix */
+	b = peek_next(insn_byte_t, insn);
+	attr = inat_get_opcode_attribute(b);
+	if (inat_is_vex_prefix(attr)) {
+		insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
+		if (!insn->x86_64) {
+			/*
+			 * In 32-bits mode, if the [7:6] bits (mod bits of
+			 * ModRM) on the second byte are not 11b, it is
+			 * LDS or LES.
+			 */
+			if (X86_MODRM_MOD(b2) != 3)
+				goto vex_end;
+		}
+		insn->vex_prefix.bytes[0] = b;
+		insn->vex_prefix.bytes[1] = b2;
+		if (inat_is_vex3_prefix(attr)) {
+			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+			insn->vex_prefix.bytes[2] = b2;
+			insn->vex_prefix.nbytes = 3;
+			insn->next_byte += 3;
+			if (insn->x86_64 && X86_VEX_W(b2))
+				/* VEX.W overrides opnd_size */
+				insn->opnd_bytes = 8;
+		} else {
+			insn->vex_prefix.nbytes = 2;
+			insn->next_byte += 2;
+		}
+	}
+vex_end:
+	insn->vex_prefix.got = 1;
+
+	prefixes->got = 1;
+	return;
+}
+
+/**
+ * insn_get_opcode - collect opcode(s)
+ * @insn:	&struct insn containing instruction
+ *
+ * Populates @insn->opcode, updates @insn->next_byte to point past the
+ * opcode byte(s), and set @insn->attr (except for groups).
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
+ * is already 1.
+ */
+void insn_get_opcode(struct insn *insn)
+{
+	struct insn_field *opcode = &insn->opcode;
+	insn_byte_t op, pfx;
+	if (opcode->got)
+		return;
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+
+	/* Get first opcode */
+	op = get_next(insn_byte_t, insn);
+	opcode->bytes[0] = op;
+	opcode->nbytes = 1;
+
+	/* Check if there is VEX prefix or not */
+	if (insn_is_avx(insn)) {
+		insn_byte_t m, p;
+		m = insn_vex_m_bits(insn);
+		p = insn_vex_p_bits(insn);
+		insn->attr = inat_get_avx_attribute(op, m, p);
+		if (!inat_accept_vex(insn->attr))
+			insn->attr = 0;	/* This instruction is bad */
+		goto end;	/* VEX has only 1 byte for opcode */
+	}
+
+	insn->attr = inat_get_opcode_attribute(op);
+	while (inat_is_escape(insn->attr)) {
+		/* Get escaped opcode */
+		op = get_next(insn_byte_t, insn);
+		opcode->bytes[opcode->nbytes++] = op;
+		pfx = insn_last_prefix(insn);
+		insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
+	}
+	if (inat_must_vex(insn->attr))
+		insn->attr = 0;	/* This instruction is bad */
+end:
+	opcode->got = 1;
+}
+
+/**
+ * insn_get_modrm - collect ModRM byte, if any
+ * @insn:	&struct insn containing instruction
+ *
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any.  If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
+ */
+void insn_get_modrm(struct insn *insn)
+{
+	struct insn_field *modrm = &insn->modrm;
+	insn_byte_t pfx, mod;
+	if (modrm->got)
+		return;
+	if (!insn->opcode.got)
+		insn_get_opcode(insn);
+
+	if (inat_has_modrm(insn->attr)) {
+		mod = get_next(insn_byte_t, insn);
+		modrm->value = mod;
+		modrm->nbytes = 1;
+		if (inat_is_group(insn->attr)) {
+			pfx = insn_last_prefix(insn);
+			insn->attr = inat_get_group_attribute(mod, pfx,
+							      insn->attr);
+		}
+	}
+
+	if (insn->x86_64 && inat_is_force64(insn->attr))
+		insn->opnd_bytes = 8;
+	modrm->got = 1;
+}
+
+
+/**
+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.  No effect if @insn->x86_64 is 0.
+ */
+int insn_rip_relative(struct insn *insn)
+{
+	struct insn_field *modrm = &insn->modrm;
+
+	if (!insn->x86_64)
+		return 0;
+	if (!modrm->got)
+		insn_get_modrm(insn);
+	/*
+	 * For rip-relative instructions, the mod field (top 2 bits)
+	 * is zero and the r/m field (bottom 3 bits) is 0x5.
+	 */
+	return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+}
+
+/**
+ * insn_get_sib() - Get the SIB byte of instruction
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_sib(struct insn *insn)
+{
+	insn_byte_t modrm;
+
+	if (insn->sib.got)
+		return;
+	if (!insn->modrm.got)
+		insn_get_modrm(insn);
+	if (insn->modrm.nbytes) {
+		modrm = (insn_byte_t)insn->modrm.value;
+		if (insn->addr_bytes != 2 &&
+		    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
+			insn->sib.value = get_next(insn_byte_t, insn);
+			insn->sib.nbytes = 1;
+		}
+	}
+	insn->sib.got = 1;
+}
+
+
+/**
+ * insn_get_displacement() - Get the displacement of instruction
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
+ */
+void insn_get_displacement(struct insn *insn)
+{
+	insn_byte_t mod, rm, base;
+
+	if (insn->displacement.got)
+		return;
+	if (!insn->sib.got)
+		insn_get_sib(insn);
+	if (insn->modrm.nbytes) {
+		/*
+		 * Interpreting the modrm byte:
+		 * mod = 00 - no displacement fields (exceptions below)
+		 * mod = 01 - 1-byte displacement field
+		 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
+		 * 	address size = 2 (0x67 prefix in 32-bit mode)
+		 * mod = 11 - no memory operand
+		 *
+		 * If address size = 2...
+		 * mod = 00, r/m = 110 - displacement field is 2 bytes
+		 *
+		 * If address size != 2...
+		 * mod != 11, r/m = 100 - SIB byte exists
+		 * mod = 00, SIB base = 101 - displacement field is 4 bytes
+		 * mod = 00, r/m = 101 - rip-relative addressing, displacement
+		 * 	field is 4 bytes
+		 */
+		mod = X86_MODRM_MOD(insn->modrm.value);
+		rm = X86_MODRM_RM(insn->modrm.value);
+		base = X86_SIB_BASE(insn->sib.value);
+		if (mod == 3)
+			goto out;
+		if (mod == 1) {
+			insn->displacement.value = get_next(char, insn);
+			insn->displacement.nbytes = 1;
+		} else if (insn->addr_bytes == 2) {
+			if ((mod == 0 && rm == 6) || mod == 2) {
+				insn->displacement.value =
+					 get_next(short, insn);
+				insn->displacement.nbytes = 2;
+			}
+		} else {
+			if ((mod == 0 && rm == 5) || mod == 2 ||
+			    (mod == 0 && base == 5)) {
+				insn->displacement.value = get_next(int, insn);
+				insn->displacement.nbytes = 4;
+			}
+		}
+	}
+out:
+	insn->displacement.got = 1;
+}
+
+/* Decode moffset16/32/64 */
+static void __get_moffset(struct insn *insn)
+{
+	switch (insn->addr_bytes) {
+	case 2:
+		insn->moffset1.value = get_next(short, insn);
+		insn->moffset1.nbytes = 2;
+		break;
+	case 4:
+		insn->moffset1.value = get_next(int, insn);
+		insn->moffset1.nbytes = 4;
+		break;
+	case 8:
+		insn->moffset1.value = get_next(int, insn);
+		insn->moffset1.nbytes = 4;
+		insn->moffset2.value = get_next(int, insn);
+		insn->moffset2.nbytes = 4;
+		break;
+	}
+	insn->moffset1.got = insn->moffset2.got = 1;
+}
+
+/* Decode imm v32(Iz) */
+static void __get_immv32(struct insn *insn)
+{
+	switch (insn->opnd_bytes) {
+	case 2:
+		insn->immediate.value = get_next(short, insn);
+		insn->immediate.nbytes = 2;
+		break;
+	case 4:
+	case 8:
+		insn->immediate.value = get_next(int, insn);
+		insn->immediate.nbytes = 4;
+		break;
+	}
+}
+
+/* Decode imm v64(Iv/Ov) */
+static void __get_immv(struct insn *insn)
+{
+	switch (insn->opnd_bytes) {
+	case 2:
+		insn->immediate1.value = get_next(short, insn);
+		insn->immediate1.nbytes = 2;
+		break;
+	case 4:
+		insn->immediate1.value = get_next(int, insn);
+		insn->immediate1.nbytes = 4;
+		break;
+	case 8:
+		insn->immediate1.value = get_next(int, insn);
+		insn->immediate1.nbytes = 4;
+		insn->immediate2.value = get_next(int, insn);
+		insn->immediate2.nbytes = 4;
+		break;
+	}
+	insn->immediate1.got = insn->immediate2.got = 1;
+}
+
+/* Decode ptr16:16/32(Ap) */
+static void __get_immptr(struct insn *insn)
+{
+	switch (insn->opnd_bytes) {
+	case 2:
+		insn->immediate1.value = get_next(short, insn);
+		insn->immediate1.nbytes = 2;
+		break;
+	case 4:
+		insn->immediate1.value = get_next(int, insn);
+		insn->immediate1.nbytes = 4;
+		break;
+	case 8:
+		/* ptr16:64 is not exist (no segment) */
+		return;
+	}
+	insn->immediate2.value = get_next(unsigned short, insn);
+	insn->immediate2.nbytes = 2;
+	insn->immediate1.got = insn->immediate2.got = 1;
+}
+
+/**
+ * insn_get_immediate() - Get the immediates of instruction
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+ * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ */
+void insn_get_immediate(struct insn *insn)
+{
+	if (insn->immediate.got)
+		return;
+	if (!insn->displacement.got)
+		insn_get_displacement(insn);
+
+	if (inat_has_moffset(insn->attr)) {
+		__get_moffset(insn);
+		goto done;
+	}
+
+	if (!inat_has_immediate(insn->attr))
+		/* no immediates */
+		goto done;
+
+	switch (inat_immediate_size(insn->attr)) {
+	case INAT_IMM_BYTE:
+		insn->immediate.value = get_next(char, insn);
+		insn->immediate.nbytes = 1;
+		break;
+	case INAT_IMM_WORD:
+		insn->immediate.value = get_next(short, insn);
+		insn->immediate.nbytes = 2;
+		break;
+	case INAT_IMM_DWORD:
+		insn->immediate.value = get_next(int, insn);
+		insn->immediate.nbytes = 4;
+		break;
+	case INAT_IMM_QWORD:
+		insn->immediate1.value = get_next(int, insn);
+		insn->immediate1.nbytes = 4;
+		insn->immediate2.value = get_next(int, insn);
+		insn->immediate2.nbytes = 4;
+		break;
+	case INAT_IMM_PTR:
+		__get_immptr(insn);
+		break;
+	case INAT_IMM_VWORD32:
+		__get_immv32(insn);
+		break;
+	case INAT_IMM_VWORD:
+		__get_immv(insn);
+		break;
+	default:
+		break;
+	}
+	if (inat_has_second_immediate(insn->attr)) {
+		insn->immediate2.value = get_next(char, insn);
+		insn->immediate2.nbytes = 1;
+	}
+done:
+	insn->immediate.got = 1;
+}
+
+/**
+ * insn_get_length() - Get the length of instruction
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+ */
+void insn_get_length(struct insn *insn)
+{
+	if (insn->length)
+		return;
+	if (!insn->immediate.got)
+		insn_get_immediate(insn);
+	insn->length = (unsigned char)((unsigned long)insn->next_byte
+				     - (unsigned long)insn->kaddr);
+}
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
new file mode 100644
index 0000000..a793da5
--- /dev/null
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -0,0 +1,893 @@
+# x86 Opcode Maps
+#
+#<Opcode maps>
+# Table: table-name
+# Referrer: escaped-name
+# AVXcode: avx-code
+# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# (or)
+# opcode: escape # escaped-name
+# EndTable
+#
+#<group maps>
+# GrpTable: GrpXXX
+# reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# EndTable
+#
+# AVX Superscripts
+#  (VEX): this opcode can accept VEX prefix.
+#  (oVEX): this opcode requires VEX prefix.
+#  (o128): this opcode only supports 128bit VEX.
+#  (o256): this opcode only supports 256bit VEX.
+#
+
+Table: one byte opcode
+Referrer:
+AVXcode:
+# 0x00 - 0x0f
+00: ADD Eb,Gb
+01: ADD Ev,Gv
+02: ADD Gb,Eb
+03: ADD Gv,Ev
+04: ADD AL,Ib
+05: ADD rAX,Iz
+06: PUSH ES (i64)
+07: POP ES (i64)
+08: OR Eb,Gb
+09: OR Ev,Gv
+0a: OR Gb,Eb
+0b: OR Gv,Ev
+0c: OR AL,Ib
+0d: OR rAX,Iz
+0e: PUSH CS (i64)
+0f: escape # 2-byte escape
+# 0x10 - 0x1f
+10: ADC Eb,Gb
+11: ADC Ev,Gv
+12: ADC Gb,Eb
+13: ADC Gv,Ev
+14: ADC AL,Ib
+15: ADC rAX,Iz
+16: PUSH SS (i64)
+17: POP SS (i64)
+18: SBB Eb,Gb
+19: SBB Ev,Gv
+1a: SBB Gb,Eb
+1b: SBB Gv,Ev
+1c: SBB AL,Ib
+1d: SBB rAX,Iz
+1e: PUSH DS (i64)
+1f: POP DS (i64)
+# 0x20 - 0x2f
+20: AND Eb,Gb
+21: AND Ev,Gv
+22: AND Gb,Eb
+23: AND Gv,Ev
+24: AND AL,Ib
+25: AND rAx,Iz
+26: SEG=ES (Prefix)
+27: DAA (i64)
+28: SUB Eb,Gb
+29: SUB Ev,Gv
+2a: SUB Gb,Eb
+2b: SUB Gv,Ev
+2c: SUB AL,Ib
+2d: SUB rAX,Iz
+2e: SEG=CS (Prefix)
+2f: DAS (i64)
+# 0x30 - 0x3f
+30: XOR Eb,Gb
+31: XOR Ev,Gv
+32: XOR Gb,Eb
+33: XOR Gv,Ev
+34: XOR AL,Ib
+35: XOR rAX,Iz
+36: SEG=SS (Prefix)
+37: AAA (i64)
+38: CMP Eb,Gb
+39: CMP Ev,Gv
+3a: CMP Gb,Eb
+3b: CMP Gv,Ev
+3c: CMP AL,Ib
+3d: CMP rAX,Iz
+3e: SEG=DS (Prefix)
+3f: AAS (i64)
+# 0x40 - 0x4f
+40: INC eAX (i64) | REX (o64)
+41: INC eCX (i64) | REX.B (o64)
+42: INC eDX (i64) | REX.X (o64)
+43: INC eBX (i64) | REX.XB (o64)
+44: INC eSP (i64) | REX.R (o64)
+45: INC eBP (i64) | REX.RB (o64)
+46: INC eSI (i64) | REX.RX (o64)
+47: INC eDI (i64) | REX.RXB (o64)
+48: DEC eAX (i64) | REX.W (o64)
+49: DEC eCX (i64) | REX.WB (o64)
+4a: DEC eDX (i64) | REX.WX (o64)
+4b: DEC eBX (i64) | REX.WXB (o64)
+4c: DEC eSP (i64) | REX.WR (o64)
+4d: DEC eBP (i64) | REX.WRB (o64)
+4e: DEC eSI (i64) | REX.WRX (o64)
+4f: DEC eDI (i64) | REX.WRXB (o64)
+# 0x50 - 0x5f
+50: PUSH rAX/r8 (d64)
+51: PUSH rCX/r9 (d64)
+52: PUSH rDX/r10 (d64)
+53: PUSH rBX/r11 (d64)
+54: PUSH rSP/r12 (d64)
+55: PUSH rBP/r13 (d64)
+56: PUSH rSI/r14 (d64)
+57: PUSH rDI/r15 (d64)
+58: POP rAX/r8 (d64)
+59: POP rCX/r9 (d64)
+5a: POP rDX/r10 (d64)
+5b: POP rBX/r11 (d64)
+5c: POP rSP/r12 (d64)
+5d: POP rBP/r13 (d64)
+5e: POP rSI/r14 (d64)
+5f: POP rDI/r15 (d64)
+# 0x60 - 0x6f
+60: PUSHA/PUSHAD (i64)
+61: POPA/POPAD (i64)
+62: BOUND Gv,Ma (i64)
+63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
+64: SEG=FS (Prefix)
+65: SEG=GS (Prefix)
+66: Operand-Size (Prefix)
+67: Address-Size (Prefix)
+68: PUSH Iz (d64)
+69: IMUL Gv,Ev,Iz
+6a: PUSH Ib (d64)
+6b: IMUL Gv,Ev,Ib
+6c: INS/INSB Yb,DX
+6d: INS/INSW/INSD Yz,DX
+6e: OUTS/OUTSB DX,Xb
+6f: OUTS/OUTSW/OUTSD DX,Xz
+# 0x70 - 0x7f
+70: JO Jb
+71: JNO Jb
+72: JB/JNAE/JC Jb
+73: JNB/JAE/JNC Jb
+74: JZ/JE Jb
+75: JNZ/JNE Jb
+76: JBE/JNA Jb
+77: JNBE/JA Jb
+78: JS Jb
+79: JNS Jb
+7a: JP/JPE Jb
+7b: JNP/JPO Jb
+7c: JL/JNGE Jb
+7d: JNL/JGE Jb
+7e: JLE/JNG Jb
+7f: JNLE/JG Jb
+# 0x80 - 0x8f
+80: Grp1 Eb,Ib (1A)
+81: Grp1 Ev,Iz (1A)
+82: Grp1 Eb,Ib (1A),(i64)
+83: Grp1 Ev,Ib (1A)
+84: TEST Eb,Gb
+85: TEST Ev,Gv
+86: XCHG Eb,Gb
+87: XCHG Ev,Gv
+88: MOV Eb,Gb
+89: MOV Ev,Gv
+8a: MOV Gb,Eb
+8b: MOV Gv,Ev
+8c: MOV Ev,Sw
+8d: LEA Gv,M
+8e: MOV Sw,Ew
+8f: Grp1A (1A) | POP Ev (d64)
+# 0x90 - 0x9f
+90: NOP | PAUSE (F3) | XCHG r8,rAX
+91: XCHG rCX/r9,rAX
+92: XCHG rDX/r10,rAX
+93: XCHG rBX/r11,rAX
+94: XCHG rSP/r12,rAX
+95: XCHG rBP/r13,rAX
+96: XCHG rSI/r14,rAX
+97: XCHG rDI/r15,rAX
+98: CBW/CWDE/CDQE
+99: CWD/CDQ/CQO
+9a: CALLF Ap (i64)
+9b: FWAIT/WAIT
+9c: PUSHF/D/Q Fv (d64)
+9d: POPF/D/Q Fv (d64)
+9e: SAHF
+9f: LAHF
+# 0xa0 - 0xaf
+a0: MOV AL,Ob
+a1: MOV rAX,Ov
+a2: MOV Ob,AL
+a3: MOV Ov,rAX
+a4: MOVS/B Xb,Yb
+a5: MOVS/W/D/Q Xv,Yv
+a6: CMPS/B Xb,Yb
+a7: CMPS/W/D Xv,Yv
+a8: TEST AL,Ib
+a9: TEST rAX,Iz
+aa: STOS/B Yb,AL
+ab: STOS/W/D/Q Yv,rAX
+ac: LODS/B AL,Xb
+ad: LODS/W/D/Q rAX,Xv
+ae: SCAS/B AL,Yb
+af: SCAS/W/D/Q rAX,Xv
+# 0xb0 - 0xbf
+b0: MOV AL/R8L,Ib
+b1: MOV CL/R9L,Ib
+b2: MOV DL/R10L,Ib
+b3: MOV BL/R11L,Ib
+b4: MOV AH/R12L,Ib
+b5: MOV CH/R13L,Ib
+b6: MOV DH/R14L,Ib
+b7: MOV BH/R15L,Ib
+b8: MOV rAX/r8,Iv
+b9: MOV rCX/r9,Iv
+ba: MOV rDX/r10,Iv
+bb: MOV rBX/r11,Iv
+bc: MOV rSP/r12,Iv
+bd: MOV rBP/r13,Iv
+be: MOV rSI/r14,Iv
+bf: MOV rDI/r15,Iv
+# 0xc0 - 0xcf
+c0: Grp2 Eb,Ib (1A)
+c1: Grp2 Ev,Ib (1A)
+c2: RETN Iw (f64)
+c3: RETN
+c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix)
+c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix)
+c6: Grp11 Eb,Ib (1A)
+c7: Grp11 Ev,Iz (1A)
+c8: ENTER Iw,Ib
+c9: LEAVE (d64)
+ca: RETF Iw
+cb: RETF
+cc: INT3
+cd: INT Ib
+ce: INTO (i64)
+cf: IRET/D/Q
+# 0xd0 - 0xdf
+d0: Grp2 Eb,1 (1A)
+d1: Grp2 Ev,1 (1A)
+d2: Grp2 Eb,CL (1A)
+d3: Grp2 Ev,CL (1A)
+d4: AAM Ib (i64)
+d5: AAD Ib (i64)
+d6:
+d7: XLAT/XLATB
+d8: ESC
+d9: ESC
+da: ESC
+db: ESC
+dc: ESC
+dd: ESC
+de: ESC
+df: ESC
+# 0xe0 - 0xef
+e0: LOOPNE/LOOPNZ Jb (f64)
+e1: LOOPE/LOOPZ Jb (f64)
+e2: LOOP Jb (f64)
+e3: JrCXZ Jb (f64)
+e4: IN AL,Ib
+e5: IN eAX,Ib
+e6: OUT Ib,AL
+e7: OUT Ib,eAX
+e8: CALL Jz (f64)
+e9: JMP-near Jz (f64)
+ea: JMP-far Ap (i64)
+eb: JMP-short Jb (f64)
+ec: IN AL,DX
+ed: IN eAX,DX
+ee: OUT DX,AL
+ef: OUT DX,eAX
+# 0xf0 - 0xff
+f0: LOCK (Prefix)
+f1:
+f2: REPNE (Prefix)
+f3: REP/REPE (Prefix)
+f4: HLT
+f5: CMC
+f6: Grp3_1 Eb (1A)
+f7: Grp3_2 Ev (1A)
+f8: CLC
+f9: STC
+fa: CLI
+fb: STI
+fc: CLD
+fd: STD
+fe: Grp4 (1A)
+ff: Grp5 (1A)
+EndTable
+
+Table: 2-byte opcode (0x0f)
+Referrer: 2-byte escape
+AVXcode: 1
+# 0x0f 0x00-0x0f
+00: Grp6 (1A)
+01: Grp7 (1A)
+02: LAR Gv,Ew
+03: LSL Gv,Ew
+04:
+05: SYSCALL (o64)
+06: CLTS
+07: SYSRET (o64)
+08: INVD
+09: WBINVD
+0a:
+0b: UD2 (1B)
+0c:
+0d: NOP Ev | GrpP
+0e: FEMMS
+# 3DNow! uses the last imm byte as opcode extension.
+0f: 3DNow! Pq,Qq,Ib
+# 0x0f 0x10-0x1f
+10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128)
+11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128)
+12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX)
+13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128)
+14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX)
+15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX)
+16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX)
+17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128)
+18: Grp16 (1A)
+19:
+1a:
+1b:
+1c:
+1d:
+1e:
+1f: NOP Ev
+# 0x0f 0x20-0x2f
+20: MOV Rd,Cd
+21: MOV Rd,Dd
+22: MOV Cd,Rd
+23: MOV Dd,Rd
+24:
+25:
+26:
+27:
+28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX)
+29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX)
+2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128)
+2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX)
+2c: cvttps2pi Ppi,Wps | cvttss2si  Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128)
+2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128)
+2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd  Vsd,Wsd (66),(VEX),(o128)
+2f: comiss Vss,Wss (VEX),(o128) | comisd  Vsd,Wsd (66),(VEX),(o128)
+# 0x0f 0x30-0x3f
+30: WRMSR
+31: RDTSC
+32: RDMSR
+33: RDPMC
+34: SYSENTER
+35: SYSEXIT
+36:
+37: GETSEC
+38: escape # 3-byte escape 1
+39:
+3a: escape # 3-byte escape 2
+3b:
+3c:
+3d:
+3e:
+3f:
+# 0x0f 0x40-0x4f
+40: CMOVO Gv,Ev
+41: CMOVNO Gv,Ev
+42: CMOVB/C/NAE Gv,Ev
+43: CMOVAE/NB/NC Gv,Ev
+44: CMOVE/Z Gv,Ev
+45: CMOVNE/NZ Gv,Ev
+46: CMOVBE/NA Gv,Ev
+47: CMOVA/NBE Gv,Ev
+48: CMOVS Gv,Ev
+49: CMOVNS Gv,Ev
+4a: CMOVP/PE Gv,Ev
+4b: CMOVNP/PO Gv,Ev
+4c: CMOVL/NGE Gv,Ev
+4d: CMOVNL/GE Gv,Ev
+4e: CMOVLE/NG Gv,Ev
+4f: CMOVNLE/G Gv,Ev
+# 0x0f 0x50-0x5f
+50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX)
+51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128)
+52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128)
+53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128)
+54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX)
+55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX)
+56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX)
+57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX)
+58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128)
+59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128)
+5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128)
+5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX)
+5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128)
+5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128)
+5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128)
+5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128)
+# 0x0f 0x60-0x6f
+60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128)
+61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128)
+62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128)
+63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128)
+64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128)
+65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128)
+66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128)
+67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128)
+68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128)
+69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128)
+6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128)
+6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128)
+6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128)
+6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128)
+6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128)
+6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX)
+# 0x0f 0x70-0x7f
+70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128)
+71: Grp12 (1A)
+72: Grp13 (1A)
+73: Grp14 (1A)
+74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128)
+75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128)
+76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128)
+77: emms/vzeroupper/vzeroall (VEX)
+78: VMREAD Ed/q,Gd/q
+79: VMWRITE Gd/q,Ed/q
+7a:
+7b:
+7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX)
+7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX)
+7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128)
+7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX)
+# 0x0f 0x80-0x8f
+80: JO Jz (f64)
+81: JNO Jz (f64)
+82: JB/JNAE/JC Jz (f64)
+83: JNB/JAE/JNC Jz (f64)
+84: JZ/JE Jz (f64)
+85: JNZ/JNE Jz (f64)
+86: JBE/JNA Jz (f64)
+87: JNBE/JA Jz (f64)
+88: JS Jz (f64)
+89: JNS Jz (f64)
+8a: JP/JPE Jz (f64)
+8b: JNP/JPO Jz (f64)
+8c: JL/JNGE Jz (f64)
+8d: JNL/JGE Jz (f64)
+8e: JLE/JNG Jz (f64)
+8f: JNLE/JG Jz (f64)
+# 0x0f 0x90-0x9f
+90: SETO Eb
+91: SETNO Eb
+92: SETB/C/NAE Eb
+93: SETAE/NB/NC Eb
+94: SETE/Z Eb
+95: SETNE/NZ Eb
+96: SETBE/NA Eb
+97: SETA/NBE Eb
+98: SETS Eb
+99: SETNS Eb
+9a: SETP/PE Eb
+9b: SETNP/PO Eb
+9c: SETL/NGE Eb
+9d: SETNL/GE Eb
+9e: SETLE/NG Eb
+9f: SETNLE/G Eb
+# 0x0f 0xa0-0xaf
+a0: PUSH FS (d64)
+a1: POP FS (d64)
+a2: CPUID
+a3: BT Ev,Gv
+a4: SHLD Ev,Gv,Ib
+a5: SHLD Ev,Gv,CL
+a6: GrpPDLK
+a7: GrpRNG
+a8: PUSH GS (d64)
+a9: POP GS (d64)
+aa: RSM
+ab: BTS Ev,Gv
+ac: SHRD Ev,Gv,Ib
+ad: SHRD Ev,Gv,CL
+ae: Grp15 (1A),(1C)
+af: IMUL Gv,Ev
+# 0x0f 0xb0-0xbf
+b0: CMPXCHG Eb,Gb
+b1: CMPXCHG Ev,Gv
+b2: LSS Gv,Mp
+b3: BTR Ev,Gv
+b4: LFS Gv,Mp
+b5: LGS Gv,Mp
+b6: MOVZX Gv,Eb
+b7: MOVZX Gv,Ew
+b8: JMPE | POPCNT Gv,Ev (F3)
+b9: Grp10 (1A)
+ba: Grp8 Ev,Ib (1A)
+bb: BTC Ev,Gv
+bc: BSF Gv,Ev
+bd: BSR Gv,Ev
+be: MOVSX Gv,Eb
+bf: MOVSX Gv,Ew
+# 0x0f 0xc0-0xcf
+c0: XADD Eb,Gb
+c1: XADD Ev,Gv
+c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX)
+c3: movnti Md/q,Gd/q
+c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128)
+c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128)
+c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX)
+c7: Grp9 (1A)
+c8: BSWAP RAX/EAX/R8/R8D
+c9: BSWAP RCX/ECX/R9/R9D
+ca: BSWAP RDX/EDX/R10/R10D
+cb: BSWAP RBX/EBX/R11/R11D
+cc: BSWAP RSP/ESP/R12/R12D
+cd: BSWAP RBP/EBP/R13/R13D
+ce: BSWAP RSI/ESI/R14/R14D
+cf: BSWAP RDI/EDI/R15/R15D
+# 0x0f 0xd0-0xdf
+d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX)
+d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128)
+d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128)
+d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128)
+d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128)
+d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128)
+d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128)
+d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128)
+d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128)
+da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128)
+db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128)
+dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128)
+dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128)
+de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128)
+df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128)
+# 0x0f 0xe0-0xef
+e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128)
+e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128)
+e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128)
+e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128)
+e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128)
+e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128)
+e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX)
+e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX)
+e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128)
+e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128)
+ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128)
+eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128)
+ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128)
+ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128)
+ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128)
+ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128)
+# 0x0f 0xf0-0xff
+f0: lddqu Vdq,Mdq (F2),(VEX)
+f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128)
+f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128)
+f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128)
+f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128)
+f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128)
+f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128)
+f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128)
+f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128)
+f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128)
+fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128)
+fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128)
+fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128)
+fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128)
+fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128)
+ff:
+EndTable
+
+Table: 3-byte opcode 1 (0x0f 0x38)
+Referrer: 3-byte escape 1
+AVXcode: 2
+# 0x0f 0x38 0x00-0x0f
+00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128)
+01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128)
+02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128)
+03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128)
+04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128)
+05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128)
+06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128)
+07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128)
+08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128)
+09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128)
+0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128)
+0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128)
+0c: Vpermilps /r (66),(oVEX)
+0d: Vpermilpd /r (66),(oVEX)
+0e: vtestps /r (66),(oVEX)
+0f: vtestpd /r (66),(oVEX)
+# 0x0f 0x38 0x10-0x1f
+10: pblendvb Vdq,Wdq (66)
+11:
+12:
+13:
+14: blendvps Vdq,Wdq (66)
+15: blendvpd Vdq,Wdq (66)
+16:
+17: ptest Vdq,Wdq (66),(VEX)
+18: vbroadcastss /r (66),(oVEX)
+19: vbroadcastsd /r (66),(oVEX),(o256)
+1a: vbroadcastf128 /r (66),(oVEX),(o256)
+1b:
+1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128)
+1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128)
+1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128)
+1f:
+# 0x0f 0x38 0x20-0x2f
+20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128)
+21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128)
+22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128)
+23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128)
+24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128)
+25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128)
+26:
+27:
+28: pmuldq Vdq,Wdq (66),(VEX),(o128)
+29: pcmpeqq Vdq,Wdq (66),(VEX),(o128)
+2a: movntdqa Vdq,Mdq (66),(VEX),(o128)
+2b: packusdw Vdq,Wdq (66),(VEX),(o128)
+2c: vmaskmovps(ld) /r (66),(oVEX)
+2d: vmaskmovpd(ld) /r (66),(oVEX)
+2e: vmaskmovps(st) /r (66),(oVEX)
+2f: vmaskmovpd(st) /r (66),(oVEX)
+# 0x0f 0x38 0x30-0x3f
+30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128)
+31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128)
+32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128)
+33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128)
+34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128)
+35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128)
+36:
+37: pcmpgtq Vdq,Wdq (66),(VEX),(o128)
+38: pminsb Vdq,Wdq (66),(VEX),(o128)
+39: pminsd Vdq,Wdq (66),(VEX),(o128)
+3a: pminuw Vdq,Wdq (66),(VEX),(o128)
+3b: pminud Vdq,Wdq (66),(VEX),(o128)
+3c: pmaxsb Vdq,Wdq (66),(VEX),(o128)
+3d: pmaxsd Vdq,Wdq (66),(VEX),(o128)
+3e: pmaxuw Vdq,Wdq (66),(VEX),(o128)
+3f: pmaxud Vdq,Wdq (66),(VEX),(o128)
+# 0x0f 0x38 0x40-0x8f
+40: pmulld Vdq,Wdq (66),(VEX),(o128)
+41: phminposuw Vdq,Wdq (66),(VEX),(o128)
+80: INVEPT Gd/q,Mdq (66)
+81: INVPID Gd/q,Mdq (66)
+# 0x0f 0x38 0x90-0xbf (FMA)
+96: vfmaddsub132pd/ps /r (66),(VEX)
+97: vfmsubadd132pd/ps /r (66),(VEX)
+98: vfmadd132pd/ps /r (66),(VEX)
+99: vfmadd132sd/ss /r (66),(VEX),(o128)
+9a: vfmsub132pd/ps /r (66),(VEX)
+9b: vfmsub132sd/ss /r (66),(VEX),(o128)
+9c: vfnmadd132pd/ps /r (66),(VEX)
+9d: vfnmadd132sd/ss /r (66),(VEX),(o128)
+9e: vfnmsub132pd/ps /r (66),(VEX)
+9f: vfnmsub132sd/ss /r (66),(VEX),(o128)
+a6: vfmaddsub213pd/ps /r (66),(VEX)
+a7: vfmsubadd213pd/ps /r (66),(VEX)
+a8: vfmadd213pd/ps /r (66),(VEX)
+a9: vfmadd213sd/ss /r (66),(VEX),(o128)
+aa: vfmsub213pd/ps /r (66),(VEX)
+ab: vfmsub213sd/ss /r (66),(VEX),(o128)
+ac: vfnmadd213pd/ps /r (66),(VEX)
+ad: vfnmadd213sd/ss /r (66),(VEX),(o128)
+ae: vfnmsub213pd/ps /r (66),(VEX)
+af: vfnmsub213sd/ss /r (66),(VEX),(o128)
+b6: vfmaddsub231pd/ps /r (66),(VEX)
+b7: vfmsubadd231pd/ps /r (66),(VEX)
+b8: vfmadd231pd/ps /r (66),(VEX)
+b9: vfmadd231sd/ss /r (66),(VEX),(o128)
+ba: vfmsub231pd/ps /r (66),(VEX)
+bb: vfmsub231sd/ss /r (66),(VEX),(o128)
+bc: vfnmadd231pd/ps /r (66),(VEX)
+bd: vfnmadd231sd/ss /r (66),(VEX),(o128)
+be: vfnmsub231pd/ps /r (66),(VEX)
+bf: vfnmsub231sd/ss /r (66),(VEX),(o128)
+# 0x0f 0x38 0xc0-0xff
+db: aesimc Vdq,Wdq (66),(VEX),(o128)
+dc: aesenc Vdq,Wdq (66),(VEX),(o128)
+dd: aesenclast Vdq,Wdq (66),(VEX),(o128)
+de: aesdec Vdq,Wdq (66),(VEX),(o128)
+df: aesdeclast Vdq,Wdq (66),(VEX),(o128)
+f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2)
+f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2)
+EndTable
+
+Table: 3-byte opcode 2 (0x0f 0x3a)
+Referrer: 3-byte escape 2
+AVXcode: 3
+# 0x0f 0x3a 0x00-0xff
+04: vpermilps /r,Ib (66),(oVEX)
+05: vpermilpd /r,Ib (66),(oVEX)
+06: vperm2f128 /r,Ib (66),(oVEX),(o256)
+08: roundps Vdq,Wdq,Ib (66),(VEX)
+09: roundpd Vdq,Wdq,Ib (66),(VEX)
+0a: roundss Vss,Wss,Ib (66),(VEX),(o128)
+0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128)
+0c: blendps Vdq,Wdq,Ib (66),(VEX)
+0d: blendpd Vdq,Wdq,Ib (66),(VEX)
+0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128)
+0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128)
+14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128)
+15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128)
+16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128)
+17: extractps Ed,Vdq,Ib (66),(VEX),(o128)
+18: vinsertf128 /r,Ib (66),(oVEX),(o256)
+19: vextractf128 /r,Ib (66),(oVEX),(o256)
+20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128)
+21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128)
+22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128)
+40: dpps Vdq,Wdq,Ib (66),(VEX)
+41: dppd Vdq,Wdq,Ib (66),(VEX),(o128)
+42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128)
+44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128)
+4a: vblendvps /r,Ib (66),(oVEX)
+4b: vblendvpd /r,Ib (66),(oVEX)
+4c: vpblendvb /r,Ib (66),(oVEX),(o128)
+60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128)
+61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128)
+62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128)
+63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128)
+df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128)
+EndTable
+
+GrpTable: Grp1
+0: ADD
+1: OR
+2: ADC
+3: SBB
+4: AND
+5: SUB
+6: XOR
+7: CMP
+EndTable
+
+GrpTable: Grp1A
+0: POP
+EndTable
+
+GrpTable: Grp2
+0: ROL
+1: ROR
+2: RCL
+3: RCR
+4: SHL/SAL
+5: SHR
+6:
+7: SAR
+EndTable
+
+GrpTable: Grp3_1
+0: TEST Eb,Ib
+1:
+2: NOT Eb
+3: NEG Eb
+4: MUL AL,Eb
+5: IMUL AL,Eb
+6: DIV AL,Eb
+7: IDIV AL,Eb
+EndTable
+
+GrpTable: Grp3_2
+0: TEST Ev,Iz
+1:
+2: NOT Ev
+3: NEG Ev
+4: MUL rAX,Ev
+5: IMUL rAX,Ev
+6: DIV rAX,Ev
+7: IDIV rAX,Ev
+EndTable
+
+GrpTable: Grp4
+0: INC Eb
+1: DEC Eb
+EndTable
+
+GrpTable: Grp5
+0: INC Ev
+1: DEC Ev
+2: CALLN Ev (f64)
+3: CALLF Ep
+4: JMPN Ev (f64)
+5: JMPF Ep
+6: PUSH Ev (d64)
+7:
+EndTable
+
+GrpTable: Grp6
+0: SLDT Rv/Mw
+1: STR Rv/Mw
+2: LLDT Ew
+3: LTR Ew
+4: VERR Ew
+5: VERW Ew
+EndTable
+
+GrpTable: Grp7
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B)
+3: LIDT Ms
+4: SMSW Mw/Rv
+5:
+6: LMSW Ew
+7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
+EndTable
+
+GrpTable: Grp8
+4: BT
+5: BTS
+6: BTR
+7: BTC
+EndTable
+
+GrpTable: Grp9
+1: CMPXCHG8B/16B Mq/Mdq
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3)
+7: VMPTRST Mq
+EndTable
+
+GrpTable: Grp10
+EndTable
+
+GrpTable: Grp11
+0: MOV
+EndTable
+
+GrpTable: Grp12
+2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128)
+4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128)
+6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128)
+EndTable
+
+GrpTable: Grp13
+2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128)
+4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128)
+6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128)
+EndTable
+
+GrpTable: Grp14
+2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128)
+3: psrldq Udq,Ib (66),(11B),(VEX),(o128)
+6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128)
+7: pslldq Udq,Ib (66),(11B),(VEX),(o128)
+EndTable
+
+GrpTable: Grp15
+0: fxsave
+1: fxstor
+2: ldmxcsr (VEX)
+3: stmxcsr (VEX)
+4: XSAVE
+5: XRSTOR | lfence (11B)
+6: mfence (11B)
+7: clflush | sfence (11B)
+EndTable
+
+GrpTable: Grp16
+0: prefetch NTA
+1: prefetch T0
+2: prefetch T1
+3: prefetch T2
+EndTable
+
+# AMD's Prefetch Group
+GrpTable: GrpP
+0: PREFETCH
+1: PREFETCHW
+EndTable
+
+GrpTable: GrpPDLK
+0: MONTMUL
+1: XSHA1
+2: XSHA2
+EndTable
+
+GrpTable: GrpRNG
+0: xstore-rng
+1: xcrypt-ecb
+2: xcrypt-cbc
+4: xcrypt-cfb
+5: xcrypt-ofb
+EndTable
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f4cee90..8f4e2ac 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -38,7 +38,8 @@
  * Returns 0 if mmiotrace is disabled, or if the fault is not
  * handled by mmiotrace:
  */
-static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+static inline int __kprobes
+kmmio_fault(struct pt_regs *regs, unsigned long addr)
 {
 	if (unlikely(is_kmmio_active()))
 		if (kmmio_handler(regs, addr) == 1)
@@ -46,7 +47,7 @@
 	return 0;
 }
 
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline int __kprobes notify_page_fault(struct pt_regs *regs)
 {
 	int ret = 0;
 
@@ -240,7 +241,7 @@
  *
  *   Handle a fault on the vmalloc or module mapping area
  */
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
 {
 	unsigned long pgd_paddr;
 	pmd_t *pmd_k;
@@ -357,7 +358,7 @@
  *
  * This assumes no large pages in there.
  */
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
 {
 	pgd_t *pgd, *pgd_ref;
 	pud_t *pud, *pud_ref;
@@ -860,7 +861,7 @@
  * There are no security implications to leaving a stale TLB when
  * increasing the permissions on a page.
  */
-static noinline int
+static noinline __kprobes int
 spurious_fault(unsigned long error_code, unsigned long address)
 {
 	pgd_t *pgd;
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd7..11a4ad4 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -540,8 +540,14 @@
 	struct die_args *arg = args;
 
 	if (val == DIE_DEBUG && (arg->err & DR_STEP))
-		if (post_kmmio_handler(arg->err, arg->regs) == 1)
+		if (post_kmmio_handler(arg->err, arg->regs) == 1) {
+			/*
+			 * Reset the BS bit in dr6 (pointed by args->err) to
+			 * denote completion of processing
+			 */
+			(*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
 			return NOTIFY_STOP;
+		}
 
 	return NOTIFY_DONE;
 }
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 8aa85f1..0a979f3 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,6 +18,7 @@
 #include <asm/mce.h>
 #include <asm/xcr.h>
 #include <asm/suspend.h>
+#include <asm/debugreg.h>
 
 #ifdef CONFIG_X86_32
 static struct saved_context saved_context;
@@ -142,31 +143,6 @@
 #endif
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->active_mm->context);	/* This does lldt */
-
-	/*
-	 * Now maybe reload the debug registers
-	 */
-	if (current->thread.debugreg7) {
-#ifdef CONFIG_X86_32
-		set_debugreg(current->thread.debugreg0, 0);
-		set_debugreg(current->thread.debugreg1, 1);
-		set_debugreg(current->thread.debugreg2, 2);
-		set_debugreg(current->thread.debugreg3, 3);
-		/* no 4 and 5 */
-		set_debugreg(current->thread.debugreg6, 6);
-		set_debugreg(current->thread.debugreg7, 7);
-#else
-		/* CONFIG_X86_64 */
-		loaddebug(&current->thread, 0);
-		loaddebug(&current->thread, 1);
-		loaddebug(&current->thread, 2);
-		loaddebug(&current->thread, 3);
-		/* no 4 and 5 */
-		loaddebug(&current->thread, 6);
-		loaddebug(&current->thread, 7);
-#endif
-	}
-
 }
 
 /**
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
new file mode 100644
index 0000000..f820826
--- /dev/null
+++ b/arch/x86/tools/Makefile
@@ -0,0 +1,31 @@
+PHONY += posttest
+
+ifeq ($(KBUILD_VERBOSE),1)
+  posttest_verbose = -v
+else
+  posttest_verbose =
+endif
+
+ifeq ($(CONFIG_64BIT),y)
+  posttest_64bit = -y
+else
+  posttest_64bit = -n
+endif
+
+distill_awk = $(srctree)/arch/x86/tools/distill.awk
+chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
+
+quiet_cmd_posttest = TEST    $@
+      cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
+
+posttest: $(obj)/test_get_len vmlinux
+	$(call cmd,posttest)
+
+hostprogs-y	:= test_get_len
+
+# -I needed for generated C source and C source which in the kernel tree.
+HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
+
+# Dependencies are also needed.
+$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
+
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk
new file mode 100644
index 0000000..0d13cd9
--- /dev/null
+++ b/arch/x86/tools/chkobjdump.awk
@@ -0,0 +1,23 @@
+# GNU objdump version checker
+#
+# Usage:
+# objdump -v | awk -f chkobjdump.awk
+BEGIN {
+	# objdump version 2.19 or later is OK for the test.
+	od_ver = 2;
+	od_sver = 19;
+}
+
+/^GNU/ {
+	split($4, ver, ".");
+	if (ver[1] > od_ver ||
+	    (ver[1] == od_ver && ver[2] >= od_sver)) {
+		exit 1;
+	} else {
+		printf("Warning: objdump version %s is older than %d.%d\n",
+		       $4, od_ver, od_sver);
+		print("Warning: Skipping posttest.");
+		# Logic is inverted, because we just skip test without error.
+		exit 0;
+	}
+}
diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk
new file mode 100644
index 0000000..c13c0ee
--- /dev/null
+++ b/arch/x86/tools/distill.awk
@@ -0,0 +1,47 @@
+#!/bin/awk -f
+# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
+# Distills the disassembly as follows:
+# - Removes all lines except the disassembled instructions.
+# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes
+# into a single line.
+# - Remove bad(or prefix only) instructions
+
+BEGIN {
+	prev_addr = ""
+	prev_hex = ""
+	prev_mnemonic = ""
+	bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))"
+	fwait_expr = "^9b "
+	fwait_str="9b\tfwait"
+}
+
+/^ *[0-9a-f]+ <[^>]*>:/ {
+	# Symbol entry
+	printf("%s%s\n", $2, $1)
+}
+
+/^ *[0-9a-f]+:/ {
+	if (split($0, field, "\t") < 3) {
+		# This is a continuation of the same insn.
+		prev_hex = prev_hex field[2]
+	} else {
+		# Skip bad instructions
+		if (match(prev_mnemonic, bad_expr))
+			prev_addr = ""
+		# Split fwait from other f* instructions
+		if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") {
+			printf "%s\t%s\n", prev_addr, fwait_str
+			sub(fwait_expr, "", prev_hex)
+		}
+		if (prev_addr != "")
+			printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic
+		prev_addr = field[1]
+		prev_hex = field[2]
+		prev_mnemonic = field[3]
+	}
+}
+
+END {
+	if (prev_addr != "")
+		printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic
+}
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
new file mode 100644
index 0000000..e34e92a
--- /dev/null
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -0,0 +1,380 @@
+#!/bin/awk -f
+# gen-insn-attr-x86.awk: Instruction attribute table generator
+# Written by Masami Hiramatsu <mhiramat@redhat.com>
+#
+# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
+
+# Awk implementation sanity check
+function check_awk_implement() {
+	if (!match("abc", "[[:lower:]]+"))
+		return "Your awk doesn't support charactor-class."
+	if (sprintf("%x", 0) != "0")
+		return "Your awk has a printf-format problem."
+	return ""
+}
+
+# Clear working vars
+function clear_vars() {
+	delete table
+	delete lptable2
+	delete lptable1
+	delete lptable3
+	eid = -1 # escape id
+	gid = -1 # group id
+	aid = -1 # AVX id
+	tname = ""
+}
+
+BEGIN {
+	# Implementation error checking
+	awkchecked = check_awk_implement()
+	if (awkchecked != "") {
+		print "Error: " awkchecked > "/dev/stderr"
+		print "Please try to use gawk." > "/dev/stderr"
+		exit 1
+	}
+
+	# Setup generating tables
+	print "/* x86 opcode map generated from x86-opcode-map.txt */"
+	print "/* Do not change this code. */\n"
+	ggid = 1
+	geid = 1
+	gaid = 0
+	delete etable
+	delete gtable
+	delete atable
+
+	opnd_expr = "^[[:alpha:]/]"
+	ext_expr = "^\\("
+	sep_expr = "^\\|$"
+	group_expr = "^Grp[[:alnum:]]+"
+
+	imm_expr = "^[IJAO][[:lower:]]"
+	imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+	imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+	imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
+	imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
+	imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
+	imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
+	imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+	imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+	imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
+	imm_flag["Ob"] = "INAT_MOFFSET"
+	imm_flag["Ov"] = "INAT_MOFFSET"
+
+	modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])"
+	force64_expr = "\\([df]64\\)"
+	rex_expr = "^REX(\\.[XRWB]+)*"
+	fpu_expr = "^ESC" # TODO
+
+	lprefix1_expr = "\\(66\\)"
+	lprefix2_expr = "\\(F3\\)"
+	lprefix3_expr = "\\(F2\\)"
+	max_lprefix = 4
+
+	vexok_expr = "\\(VEX\\)"
+	vexonly_expr = "\\(oVEX\\)"
+
+	prefix_expr = "\\(Prefix\\)"
+	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
+	prefix_num["REPNE"] = "INAT_PFX_REPNE"
+	prefix_num["REP/REPE"] = "INAT_PFX_REPE"
+	prefix_num["LOCK"] = "INAT_PFX_LOCK"
+	prefix_num["SEG=CS"] = "INAT_PFX_CS"
+	prefix_num["SEG=DS"] = "INAT_PFX_DS"
+	prefix_num["SEG=ES"] = "INAT_PFX_ES"
+	prefix_num["SEG=FS"] = "INAT_PFX_FS"
+	prefix_num["SEG=GS"] = "INAT_PFX_GS"
+	prefix_num["SEG=SS"] = "INAT_PFX_SS"
+	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
+	prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2"
+	prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3"
+
+	clear_vars()
+}
+
+function semantic_error(msg) {
+	print "Semantic error at " NR ": " msg > "/dev/stderr"
+	exit 1
+}
+
+function debug(msg) {
+	print "DEBUG: " msg
+}
+
+function array_size(arr,   i,c) {
+	c = 0
+	for (i in arr)
+		c++
+	return c
+}
+
+/^Table:/ {
+	print "/* " $0 " */"
+	if (tname != "")
+		semantic_error("Hit Table: before EndTable:.");
+}
+
+/^Referrer:/ {
+	if (NF != 1) {
+		# escape opcode table
+		ref = ""
+		for (i = 2; i <= NF; i++)
+			ref = ref $i
+		eid = escape[ref]
+		tname = sprintf("inat_escape_table_%d", eid)
+	}
+}
+
+/^AVXcode:/ {
+	if (NF != 1) {
+		# AVX/escape opcode table
+		aid = $2
+		if (gaid <= aid)
+			gaid = aid + 1
+		if (tname == "")	# AVX only opcode table
+			tname = sprintf("inat_avx_table_%d", $2)
+	}
+	if (aid == -1 && eid == -1)	# primary opcode table
+		tname = "inat_primary_table"
+}
+
+/^GrpTable:/ {
+	print "/* " $0 " */"
+	if (!($2 in group))
+		semantic_error("No group: " $2 )
+	gid = group[$2]
+	tname = "inat_group_table_" gid
+}
+
+function print_table(tbl,name,fmt,n)
+{
+	print "const insn_attr_t " name " = {"
+	for (i = 0; i < n; i++) {
+		id = sprintf(fmt, i)
+		if (tbl[id])
+			print "	[" id "] = " tbl[id] ","
+	}
+	print "};"
+}
+
+/^EndTable/ {
+	if (gid != -1) {
+		# print group tables
+		if (array_size(table) != 0) {
+			print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
+				    "0x%x", 8)
+			gtable[gid,0] = tname
+		}
+		if (array_size(lptable1) != 0) {
+			print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
+				    "0x%x", 8)
+			gtable[gid,1] = tname "_1"
+		}
+		if (array_size(lptable2) != 0) {
+			print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
+				    "0x%x", 8)
+			gtable[gid,2] = tname "_2"
+		}
+		if (array_size(lptable3) != 0) {
+			print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
+				    "0x%x", 8)
+			gtable[gid,3] = tname "_3"
+		}
+	} else {
+		# print primary/escaped tables
+		if (array_size(table) != 0) {
+			print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
+				    "0x%02x", 256)
+			etable[eid,0] = tname
+			if (aid >= 0)
+				atable[aid,0] = tname
+		}
+		if (array_size(lptable1) != 0) {
+			print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
+				    "0x%02x", 256)
+			etable[eid,1] = tname "_1"
+			if (aid >= 0)
+				atable[aid,1] = tname "_1"
+		}
+		if (array_size(lptable2) != 0) {
+			print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
+				    "0x%02x", 256)
+			etable[eid,2] = tname "_2"
+			if (aid >= 0)
+				atable[aid,2] = tname "_2"
+		}
+		if (array_size(lptable3) != 0) {
+			print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
+				    "0x%02x", 256)
+			etable[eid,3] = tname "_3"
+			if (aid >= 0)
+				atable[aid,3] = tname "_3"
+		}
+	}
+	print ""
+	clear_vars()
+}
+
+function add_flags(old,new) {
+	if (old && new)
+		return old " | " new
+	else if (old)
+		return old
+	else
+		return new
+}
+
+# convert operands to flags.
+function convert_operands(opnd,       i,imm,mod)
+{
+	imm = null
+	mod = null
+	for (i in opnd) {
+		i  = opnd[i]
+		if (match(i, imm_expr) == 1) {
+			if (!imm_flag[i])
+				semantic_error("Unknown imm opnd: " i)
+			if (imm) {
+				if (i != "Ib")
+					semantic_error("Second IMM error")
+				imm = add_flags(imm, "INAT_SCNDIMM")
+			} else
+				imm = imm_flag[i]
+		} else if (match(i, modrm_expr))
+			mod = "INAT_MODRM"
+	}
+	return add_flags(imm, mod)
+}
+
+/^[0-9a-f]+\:/ {
+	if (NR == 1)
+		next
+	# get index
+	idx = "0x" substr($1, 1, index($1,":") - 1)
+	if (idx in table)
+		semantic_error("Redefine " idx " in " tname)
+
+	# check if escaped opcode
+	if ("escape" == $2) {
+		if ($3 != "#")
+			semantic_error("No escaped name")
+		ref = ""
+		for (i = 4; i <= NF; i++)
+			ref = ref $i
+		if (ref in escape)
+			semantic_error("Redefine escape (" ref ")")
+		escape[ref] = geid
+		geid++
+		table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
+		next
+	}
+
+	variant = null
+	# converts
+	i = 2
+	while (i <= NF) {
+		opcode = $(i++)
+		delete opnds
+		ext = null
+		flags = null
+		opnd = null
+		# parse one opcode
+		if (match($i, opnd_expr)) {
+			opnd = $i
+			split($(i++), opnds, ",")
+			flags = convert_operands(opnds)
+		}
+		if (match($i, ext_expr))
+			ext = $(i++)
+		if (match($i, sep_expr))
+			i++
+		else if (i < NF)
+			semantic_error($i " is not a separator")
+
+		# check if group opcode
+		if (match(opcode, group_expr)) {
+			if (!(opcode in group)) {
+				group[opcode] = ggid
+				ggid++
+			}
+			flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
+		}
+		# check force(or default) 64bit
+		if (match(ext, force64_expr))
+			flags = add_flags(flags, "INAT_FORCE64")
+
+		# check REX prefix
+		if (match(opcode, rex_expr))
+			flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
+
+		# check coprocessor escape : TODO
+		if (match(opcode, fpu_expr))
+			flags = add_flags(flags, "INAT_MODRM")
+
+		# check VEX only code
+		if (match(ext, vexonly_expr))
+			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
+
+		# check VEX only code
+		if (match(ext, vexok_expr))
+			flags = add_flags(flags, "INAT_VEXOK")
+
+		# check prefixes
+		if (match(ext, prefix_expr)) {
+			if (!prefix_num[opcode])
+				semantic_error("Unknown prefix: " opcode)
+			flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
+		}
+		if (length(flags) == 0)
+			continue
+		# check if last prefix
+		if (match(ext, lprefix1_expr)) {
+			lptable1[idx] = add_flags(lptable1[idx],flags)
+			variant = "INAT_VARIANT"
+		} else if (match(ext, lprefix2_expr)) {
+			lptable2[idx] = add_flags(lptable2[idx],flags)
+			variant = "INAT_VARIANT"
+		} else if (match(ext, lprefix3_expr)) {
+			lptable3[idx] = add_flags(lptable3[idx],flags)
+			variant = "INAT_VARIANT"
+		} else {
+			table[idx] = add_flags(table[idx],flags)
+		}
+	}
+	if (variant)
+		table[idx] = add_flags(table[idx],variant)
+}
+
+END {
+	if (awkchecked != "")
+		exit 1
+	# print escape opcode map's array
+	print "/* Escape opcode map array */"
+	print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \
+	      "[INAT_LSTPFX_MAX + 1] = {"
+	for (i = 0; i < geid; i++)
+		for (j = 0; j < max_lprefix; j++)
+			if (etable[i,j])
+				print "	["i"]["j"] = "etable[i,j]","
+	print "};\n"
+	# print group opcode map's array
+	print "/* Group opcode map array */"
+	print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\
+	      "[INAT_LSTPFX_MAX + 1] = {"
+	for (i = 0; i < ggid; i++)
+		for (j = 0; j < max_lprefix; j++)
+			if (gtable[i,j])
+				print "	["i"]["j"] = "gtable[i,j]","
+	print "};\n"
+	# print AVX opcode map's array
+	print "/* AVX opcode map array */"
+	print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\
+	      "[INAT_LSTPFX_MAX + 1] = {"
+	for (i = 0; i < gaid; i++)
+		for (j = 0; j < max_lprefix; j++)
+			if (atable[i,j])
+				print "	["i"]["j"] = "atable[i,j]","
+	print "};"
+}
+
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
new file mode 100644
index 0000000..d8214dc
--- /dev/null
+++ b/arch/x86/tools/test_get_len.c
@@ -0,0 +1,173 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+
+#define unlikely(cond) (cond)
+
+#include <asm/insn.h>
+#include <inat.c>
+#include <insn.c>
+
+/*
+ * Test of instruction analysis in general and insn_get_length() in
+ * particular.  See if insn_get_length() and the disassembler agree
+ * on the length of each instruction in an elf disassembly.
+ *
+ * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
+ */
+
+const char *prog;
+static int verbose;
+static int x86_64;
+
+static void usage(void)
+{
+	fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |"
+		" %s [-y|-n] [-v] \n", prog);
+	fprintf(stderr, "\t-y	64bit mode\n");
+	fprintf(stderr, "\t-n	32bit mode\n");
+	fprintf(stderr, "\t-v	verbose mode\n");
+	exit(1);
+}
+
+static void malformed_line(const char *line, int line_nr)
+{
+	fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line);
+	exit(3);
+}
+
+static void dump_field(FILE *fp, const char *name, const char *indent,
+		       struct insn_field *field)
+{
+	fprintf(fp, "%s.%s = {\n", indent, name);
+	fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
+		indent, field->value, field->bytes[0], field->bytes[1],
+		field->bytes[2], field->bytes[3]);
+	fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
+		field->got, field->nbytes);
+}
+
+static void dump_insn(FILE *fp, struct insn *insn)
+{
+	fprintf(fp, "Instruction = { \n");
+	dump_field(fp, "prefixes", "\t",	&insn->prefixes);
+	dump_field(fp, "rex_prefix", "\t",	&insn->rex_prefix);
+	dump_field(fp, "vex_prefix", "\t",	&insn->vex_prefix);
+	dump_field(fp, "opcode", "\t",		&insn->opcode);
+	dump_field(fp, "modrm", "\t",		&insn->modrm);
+	dump_field(fp, "sib", "\t",		&insn->sib);
+	dump_field(fp, "displacement", "\t",	&insn->displacement);
+	dump_field(fp, "immediate1", "\t",	&insn->immediate1);
+	dump_field(fp, "immediate2", "\t",	&insn->immediate2);
+	fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
+		insn->attr, insn->opnd_bytes, insn->addr_bytes);
+	fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
+		insn->length, insn->x86_64, insn->kaddr);
+}
+
+static void parse_args(int argc, char **argv)
+{
+	int c;
+	prog = argv[0];
+	while ((c = getopt(argc, argv, "ynv")) != -1) {
+		switch (c) {
+		case 'y':
+			x86_64 = 1;
+			break;
+		case 'n':
+			x86_64 = 0;
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		default:
+			usage();
+		}
+	}
+}
+
+#define BUFSIZE 256
+
+int main(int argc, char **argv)
+{
+	char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
+	unsigned char insn_buf[16];
+	struct insn insn;
+	int insns = 0, c;
+	int warnings = 0;
+
+	parse_args(argc, argv);
+
+	while (fgets(line, BUFSIZE, stdin)) {
+		char copy[BUFSIZE], *s, *tab1, *tab2;
+		int nb = 0;
+		unsigned int b;
+
+		if (line[0] == '<') {
+			/* Symbol line */
+			strcpy(sym, line);
+			continue;
+		}
+
+		insns++;
+		memset(insn_buf, 0, 16);
+		strcpy(copy, line);
+		tab1 = strchr(copy, '\t');
+		if (!tab1)
+			malformed_line(line, insns);
+		s = tab1 + 1;
+		s += strspn(s, " ");
+		tab2 = strchr(s, '\t');
+		if (!tab2)
+			malformed_line(line, insns);
+		*tab2 = '\0';	/* Characters beyond tab2 aren't examined */
+		while (s < tab2) {
+			if (sscanf(s, "%x", &b) == 1) {
+				insn_buf[nb++] = (unsigned char) b;
+				s += 3;
+			} else
+				break;
+		}
+		/* Decode an instruction */
+		insn_init(&insn, insn_buf, x86_64);
+		insn_get_length(&insn);
+		if (insn.length != nb) {
+			warnings++;
+			fprintf(stderr, "Warning: %s found difference at %s\n",
+				prog, sym);
+			fprintf(stderr, "Warning: %s", line);
+			fprintf(stderr, "Warning: objdump says %d bytes, but "
+				"insn_get_length() says %d\n", nb,
+				insn.length);
+			if (verbose)
+				dump_insn(stderr, &insn);
+		}
+	}
+	if (warnings)
+		fprintf(stderr, "Warning: decoded and checked %d"
+			" instructions with %d warnings\n", insns, warnings);
+	else
+		fprintf(stderr, "Succeed: decoded and checked %d"
+			" instructions\n", insns);
+	return 0;
+}
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
index 713ed7d..689cc6a 100644
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -3,7 +3,6 @@
 
 static bool report_gart_errors;
 static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
-static void (*orig_mce_callback)(struct mce *m);
 
 void amd_report_gart_errors(bool v)
 {
@@ -363,8 +362,10 @@
 		pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
 }
 
-static void amd_decode_mce(struct mce *m)
+static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
+			   void *data)
 {
+	struct mce *m = (struct mce *)data;
 	struct err_regs regs;
 	int node, ecc;
 
@@ -420,20 +421,22 @@
 	}
 
 	amd_decode_err_code(m->status & 0xffff);
+
+	return NOTIFY_STOP;
 }
 
+static struct notifier_block amd_mce_dec_nb = {
+	.notifier_call	= amd_decode_mce,
+};
+
 static int __init mce_amd_init(void)
 {
 	/*
 	 * We can decode MCEs for Opteron and later CPUs:
 	 */
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
-	    (boot_cpu_data.x86 >= 0xf)) {
-		/* safe the default decode mce callback */
-		orig_mce_callback = x86_mce_decode_callback;
-
-		x86_mce_decode_callback = amd_decode_mce;
-	}
+	    (boot_cpu_data.x86 >= 0xf))
+		atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
 
 	return 0;
 }
@@ -442,7 +445,7 @@
 #ifdef MODULE
 static void __exit mce_amd_exit(void)
 {
-	x86_mce_decode_callback = orig_mce_callback;
+	atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
 }
 
 MODULE_DESCRIPTION("AMD MCE decoder");
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4ec5e67..47bbdf9 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -117,12 +117,12 @@
 	struct dentry		*dir;
 	struct trace_event	*event;
 	int			enabled;
-	int			(*regfunc)(void *);
-	void			(*unregfunc)(void *);
+	int			(*regfunc)(struct ftrace_event_call *);
+	void			(*unregfunc)(struct ftrace_event_call *);
 	int			id;
-	int			(*raw_init)(void);
-	int			(*show_format)(struct ftrace_event_call *call,
-					       struct trace_seq *s);
+	int			(*raw_init)(struct ftrace_event_call *);
+	int			(*show_format)(struct ftrace_event_call *,
+					       struct trace_seq *);
 	int			(*define_fields)(struct ftrace_event_call *);
 	struct list_head	fields;
 	int			filter_active;
@@ -131,20 +131,20 @@
 	void			*data;
 
 	atomic_t		profile_count;
-	int			(*profile_enable)(void);
-	void			(*profile_disable)(void);
+	int			(*profile_enable)(struct ftrace_event_call *);
+	void			(*profile_disable)(struct ftrace_event_call *);
 };
 
 #define FTRACE_MAX_PROFILE_SIZE	2048
 
-extern char			*trace_profile_buf;
-extern char			*trace_profile_buf_nmi;
+extern char *perf_trace_buf;
+extern char *perf_trace_buf_nmi;
 
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
 extern void destroy_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_match_preds(struct event_filter *filter, void *rec);
 extern int filter_current_check_discard(struct ring_buffer *buffer,
 					struct ftrace_event_call *call,
 					void *rec,
@@ -157,11 +157,12 @@
 	FILTER_PTR_STRING,
 };
 
-extern int trace_define_field(struct ftrace_event_call *call,
-			      const char *type, const char *name,
-			      int offset, int size, int is_signed,
-			      int filter_type);
 extern int trace_define_common_fields(struct ftrace_event_call *call);
+extern int trace_define_field(struct ftrace_event_call *call, const char *type,
+			      const char *name, int offset, int size,
+			      int is_signed, int filter_type);
+extern int trace_add_event_call(struct ftrace_event_call *call);
+extern void trace_remove_event_call(struct ftrace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < 0)
 
@@ -186,4 +187,13 @@
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
+#ifdef CONFIG_EVENT_PROFILE
+struct perf_event;
+extern int ftrace_profile_enable(int event_id);
+extern void ftrace_profile_disable(int event_id);
+extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+				     char *filter_str);
+extern void ftrace_profile_free_filter(struct perf_event *event);
+#endif
+
 #endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
new file mode 100644
index 0000000..a03daed
--- /dev/null
+++ b/include/linux/hw_breakpoint.h
@@ -0,0 +1,131 @@
+#ifndef _LINUX_HW_BREAKPOINT_H
+#define _LINUX_HW_BREAKPOINT_H
+
+enum {
+	HW_BREAKPOINT_LEN_1 = 1,
+	HW_BREAKPOINT_LEN_2 = 2,
+	HW_BREAKPOINT_LEN_4 = 4,
+	HW_BREAKPOINT_LEN_8 = 8,
+};
+
+enum {
+	HW_BREAKPOINT_R = 1,
+	HW_BREAKPOINT_W = 2,
+	HW_BREAKPOINT_X = 4,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/perf_event.h>
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
+/* As it's for in-kernel or ptrace use, we want it to be pinned */
+#define DEFINE_BREAKPOINT_ATTR(name)	\
+struct perf_event_attr name = {		\
+	.type = PERF_TYPE_BREAKPOINT,	\
+	.size = sizeof(name),		\
+	.pinned = 1,			\
+};
+
+static inline void hw_breakpoint_init(struct perf_event_attr *attr)
+{
+	attr->type = PERF_TYPE_BREAKPOINT;
+	attr->size = sizeof(*attr);
+	attr->pinned = 1;
+}
+
+static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
+{
+	return bp->attr.bp_addr;
+}
+
+static inline int hw_breakpoint_type(struct perf_event *bp)
+{
+	return bp->attr.bp_type;
+}
+
+static inline int hw_breakpoint_len(struct perf_event *bp)
+{
+	return bp->attr.bp_len;
+}
+
+extern struct perf_event *
+register_user_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered,
+			    struct task_struct *tsk);
+
+/* FIXME: only change from the attr, and don't unregister */
+extern struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+			  struct perf_event_attr *attr,
+			  perf_callback_t triggered,
+			  struct task_struct *tsk);
+
+/*
+ * Kernel breakpoints are not associated with any particular thread.
+ */
+extern struct perf_event *
+register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
+				perf_callback_t triggered,
+				int cpu);
+
+extern struct perf_event **
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered);
+
+extern int register_perf_hw_breakpoint(struct perf_event *bp);
+extern int __register_perf_hw_breakpoint(struct perf_event *bp);
+extern void unregister_hw_breakpoint(struct perf_event *bp);
+extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
+
+extern int reserve_bp_slot(struct perf_event *bp);
+extern void release_bp_slot(struct perf_event *bp);
+
+extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
+
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+	return &bp->hw.info;
+}
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+
+static inline struct perf_event *
+register_user_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered,
+			    struct task_struct *tsk)	{ return NULL; }
+static inline struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+			  struct perf_event_attr *attr,
+			  perf_callback_t triggered,
+			  struct task_struct *tsk)	{ return NULL; }
+static inline struct perf_event *
+register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
+				perf_callback_t triggered,
+				int cpu)		{ return NULL; }
+static inline struct perf_event **
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered)	{ return NULL; }
+static inline int
+register_perf_hw_breakpoint(struct perf_event *bp)	{ return -ENOSYS; }
+static inline int
+__register_perf_hw_breakpoint(struct perf_event *bp) 	{ return -ENOSYS; }
+static inline void unregister_hw_breakpoint(struct perf_event *bp)	{ }
+static inline void
+unregister_wide_hw_breakpoint(struct perf_event **cpu_events)		{ }
+static inline int
+reserve_bp_slot(struct perf_event *bp)			{return -ENOSYS; }
+static inline void release_bp_slot(struct perf_event *bp) 		{ }
+
+static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk)	{ }
+
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+	return NULL;
+}
+
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_HW_BREAKPOINT_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 3a46b7b..1b672f7 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -296,6 +296,8 @@
 int disable_kprobe(struct kprobe *kp);
 int enable_kprobe(struct kprobe *kp);
 
+void dump_kprobe(struct kprobe *kp);
+
 #else /* !CONFIG_KPROBES: */
 
 static inline int kprobes_built_in(void)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7b7fbf4..e3fb256 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -106,6 +106,8 @@
 	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
@@ -225,6 +227,7 @@
 #define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
 #define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_COUNTER_IOC_SET_FILTER	_IOW('$', 6, char *)
 
 enum perf_counter_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9e70126..43adbd7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,6 +18,10 @@
 #include <linux/ioctl.h>
 #include <asm/byteorder.h>
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#include <asm/hw_breakpoint.h>
+#endif
+
 /*
  * User-space ABI bits:
  */
@@ -31,6 +35,7 @@
 	PERF_TYPE_TRACEPOINT			= 2,
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_BREAKPOINT			= 5,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
@@ -102,6 +107,8 @@
 	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
@@ -207,6 +214,15 @@
 		__u32		wakeup_events;	  /* wakeup every n events */
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
+
+	union {
+		struct { /* Hardware breakpoint info */
+			__u64		bp_addr;
+			__u32		bp_type;
+			__u32		bp_len;
+		};
+	};
+
 	__u32			__reserved_2;
 
 	__u64			__reserved_3;
@@ -219,8 +235,9 @@
 #define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
 #define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
@@ -475,6 +492,11 @@
 			s64		remaining;
 			struct hrtimer	hrtimer;
 		};
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+		union { /* breakpoint */
+			struct arch_hw_breakpoint	info;
+		};
+#endif
 	};
 	atomic64_t			prev_count;
 	u64				sample_period;
@@ -543,6 +565,10 @@
 	void (*func)(struct perf_pending_entry *);
 };
 
+typedef void (*perf_callback_t)(struct perf_event *, void *);
+
+struct perf_sample_data;
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -585,7 +611,7 @@
 	u64				tstamp_running;
 	u64				tstamp_stopped;
 
-	struct perf_event_attr	attr;
+	struct perf_event_attr		attr;
 	struct hw_perf_event		hw;
 
 	struct perf_event_context	*ctx;
@@ -633,7 +659,20 @@
 
 	struct pid_namespace		*ns;
 	u64				id;
+
+	void (*overflow_handler)(struct perf_event *event,
+			int nmi, struct perf_sample_data *data,
+			struct pt_regs *regs);
+
+#ifdef CONFIG_EVENT_PROFILE
+	struct event_filter		*filter;
 #endif
+
+	perf_callback_t			callback;
+
+	perf_callback_t			event_callback;
+
+#endif /* CONFIG_PERF_EVENTS */
 };
 
 /**
@@ -706,7 +745,6 @@
 	int				nmi;
 	int				sample;
 	int				locked;
-	unsigned long			flags;
 };
 
 #ifdef CONFIG_PERF_EVENTS
@@ -738,6 +776,14 @@
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_event_context *ctx, int cpu);
 extern void perf_event_update_userpage(struct perf_event *event);
+extern int perf_event_release_kernel(struct perf_event *event);
+extern struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr,
+				int cpu,
+				pid_t pid,
+				perf_callback_t callback);
+extern u64 perf_event_read_value(struct perf_event *event,
+				 u64 *enabled, u64 *running);
 
 struct perf_sample_data {
 	u64				type;
@@ -814,6 +860,7 @@
 extern void perf_event_init(void);
 extern void perf_tp_event(int event_id, u64 addr, u64 count,
 				 void *record, int entry_size);
+extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
 #define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
@@ -827,6 +874,8 @@
 extern void perf_output_end(struct perf_output_handle *handle);
 extern void perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
+extern int perf_swevent_get_recursion_context(void);
+extern void perf_swevent_put_recursion_context(int rctx);
 #else
 static inline void
 perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -848,11 +897,15 @@
 static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
+static inline void
+perf_bp_event(struct perf_event *event, void *data)		{ }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
+static inline int  perf_swevent_get_recursion_context(void)  { return -1; }
+static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 
 #endif
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace..e79e2f3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -99,37 +99,16 @@
 #define __SC_TEST6(t6, a6, ...)	__SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
 
 #ifdef CONFIG_EVENT_PROFILE
-#define TRACE_SYS_ENTER_PROFILE(sname)					       \
-static int prof_sysenter_enable_##sname(void)				       \
-{									       \
-	return reg_prof_syscall_enter("sys"#sname);			       \
-}									       \
-									       \
-static void prof_sysenter_disable_##sname(void)				       \
-{									       \
-	unreg_prof_syscall_enter("sys"#sname);				       \
-}
-
-#define TRACE_SYS_EXIT_PROFILE(sname)					       \
-static int prof_sysexit_enable_##sname(void)				       \
-{									       \
-	return reg_prof_syscall_exit("sys"#sname);			       \
-}									       \
-									       \
-static void prof_sysexit_disable_##sname(void)				       \
-{                                                                              \
-	unreg_prof_syscall_exit("sys"#sname);				       \
-}
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
 	.profile_count = ATOMIC_INIT(-1),				       \
-	.profile_enable = prof_sysenter_enable_##sname,			       \
-	.profile_disable = prof_sysenter_disable_##sname,
+	.profile_enable = prof_sysenter_enable,				       \
+	.profile_disable = prof_sysenter_disable,
 
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
 	.profile_count = ATOMIC_INIT(-1),				       \
-	.profile_enable = prof_sysexit_enable_##sname,			       \
-	.profile_disable = prof_sysexit_disable_##sname,
+	.profile_enable = prof_sysexit_enable,				       \
+	.profile_disable = prof_sysexit_disable,
 #else
 #define TRACE_SYS_ENTER_PROFILE(sname)
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)
@@ -153,74 +132,46 @@
 #define __SC_STR_TDECL6(t, a, ...)	#t, __SC_STR_TDECL5(__VA_ARGS__)
 
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
+	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_enter_##sname;		\
-	struct trace_event enter_syscall_print_##sname = {		\
+	static struct trace_event enter_syscall_print_##sname = {	\
 		.trace                  = print_syscall_enter,		\
 	};								\
-	static int init_enter_##sname(void)				\
-	{								\
-		int num, id;						\
-		num = syscall_name_to_nr("sys"#sname);			\
-		if (num < 0)						\
-			return -ENOSYS;					\
-		id = register_ftrace_event(&enter_syscall_print_##sname);\
-		if (!id)						\
-			return -ENODEV;					\
-		event_enter_##sname.id = id;				\
-		set_syscall_enter_id(num, id);				\
-		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
-		return 0;						\
-	}								\
-	TRACE_SYS_ENTER_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
 	  event_enter_##sname = {					\
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
-		.event                  = &event_syscall_enter,		\
-		.raw_init		= init_enter_##sname,		\
+		.event                  = &enter_syscall_print_##sname,	\
+		.raw_init		= init_syscall_trace,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
-		.data			= "sys"#sname,			\
+		.data			= (void *)&__syscall_meta_##sname,\
 		TRACE_SYS_ENTER_PROFILE_INIT(sname)			\
 	}
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
+	static const struct syscall_metadata __syscall_meta_##sname;	\
 	static struct ftrace_event_call event_exit_##sname;		\
-	struct trace_event exit_syscall_print_##sname = {		\
+	static struct trace_event exit_syscall_print_##sname = {	\
 		.trace                  = print_syscall_exit,		\
 	};								\
-	static int init_exit_##sname(void)				\
-	{								\
-		int num, id;						\
-		num = syscall_name_to_nr("sys"#sname);			\
-		if (num < 0)						\
-			return -ENOSYS;					\
-		id = register_ftrace_event(&exit_syscall_print_##sname);\
-		if (!id)						\
-			return -ENODEV;					\
-		event_exit_##sname.id = id;				\
-		set_syscall_exit_id(num, id);				\
-		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
-		return 0;						\
-	}								\
-	TRACE_SYS_EXIT_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
 	  event_exit_##sname = {					\
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
-		.event                  = &event_syscall_exit,		\
-		.raw_init		= init_exit_##sname,		\
+		.event                  = &exit_syscall_print_##sname,	\
+		.raw_init		= init_syscall_trace,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
 		.unregfunc		= unreg_event_syscall_exit,	\
-		.data			= "sys"#sname,			\
+		.data			= (void *)&__syscall_meta_##sname,\
 		TRACE_SYS_EXIT_PROFILE_INIT(sname)			\
 	}
 
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 2aac8a8..f59604e 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -280,6 +280,12 @@
  * TRACE_EVENT_FN to perform any (un)registration work.
  */
 
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
+#define DEFINE_EVENT(template, name, proto, args)		\
+	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,		\
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 2a4b3bf..5acfb1e 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -31,6 +31,14 @@
 		assign, print, reg, unreg)			\
 	DEFINE_TRACE_FN(name, reg, unreg)
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args) \
+	DEFINE_TRACE(name)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_TRACE(name)
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
@@ -63,6 +71,9 @@
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
+#undef DECLARE_EVENT_CLASS
+#undef DEFINE_EVENT
+#undef DEFINE_EVENT_PRINT
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h
index 8abd620..1af72dc 100644
--- a/include/trace/events/bkl.h
+++ b/include/trace/events/bkl.h
@@ -13,7 +13,7 @@
 	TP_ARGS(func, file, line),
 
 	TP_STRUCT__entry(
-		__field(	int,		lock_depth		)
+		__field(	int,		depth			)
 		__field_ext(	const char *,	func, FILTER_PTR_STRING	)
 		__field_ext(	const char *,	file, FILTER_PTR_STRING	)
 		__field(	int,		line			)
@@ -21,13 +21,13 @@
 
 	TP_fast_assign(
 		/* We want to record the lock_depth after lock is acquired */
-		__entry->lock_depth = current->lock_depth + 1;
+		__entry->depth = current->lock_depth + 1;
 		__entry->func = func;
 		__entry->file = file;
 		__entry->line = line;
 	),
 
-	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+	TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
 		  __entry->file, __entry->line, __entry->func)
 );
 
@@ -38,20 +38,20 @@
 	TP_ARGS(func, file, line),
 
 	TP_STRUCT__entry(
-		__field(int,		lock_depth)
-		__field(const char *,	func)
-		__field(const char *,	file)
-		__field(int,		line)
+		__field(int,		depth		)
+		__field(const char *,	func		)
+		__field(const char *,	file		)
+		__field(int,		line		)
 	),
 
 	TP_fast_assign(
-		__entry->lock_depth = current->lock_depth;
+		__entry->depth = current->lock_depth;
 		__entry->func = func;
 		__entry->file = file;
 		__entry->line = line;
 	),
 
-	TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth,
+	TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
 		  __entry->file, __entry->line, __entry->func)
 );
 
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 00405b5..5fb7273 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -8,7 +8,7 @@
 #include <linux/blkdev.h>
 #include <linux/tracepoint.h>
 
-TRACE_EVENT(block_rq_abort,
+DECLARE_EVENT_CLASS(block_rq_with_error,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
 
@@ -40,7 +40,28 @@
 		  __entry->nr_sector, __entry->errors)
 );
 
-TRACE_EVENT(block_rq_insert,
+DEFINE_EVENT(block_rq_with_error, block_rq_abort,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq)
+);
+
+DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq)
+);
+
+DEFINE_EVENT(block_rq_with_error, block_rq_complete,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq)
+);
+
+DECLARE_EVENT_CLASS(block_rq,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
 
@@ -74,102 +95,18 @@
 		  __entry->nr_sector, __entry->comm)
 );
 
-TRACE_EVENT(block_rq_issue,
+DEFINE_EVENT(block_rq, block_rq_insert,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
 
-	TP_ARGS(q, rq),
-
-	TP_STRUCT__entry(
-		__field(  dev_t,	dev			)
-		__field(  sector_t,	sector			)
-		__field(  unsigned int,	nr_sector		)
-		__field(  unsigned int,	bytes			)
-		__array(  char,		rwbs,	6		)
-		__array(  char,		comm,   TASK_COMM_LEN   )
-		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
-	),
-
-	TP_fast_assign(
-		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-		__entry->bytes     = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0;
-
-		blk_fill_rwbs_rq(__entry->rwbs, rq);
-		blk_dump_cmd(__get_str(cmd), rq);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
-
-	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs, __entry->bytes, __get_str(cmd),
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_ARGS(q, rq)
 );
 
-TRACE_EVENT(block_rq_requeue,
+DEFINE_EVENT(block_rq, block_rq_issue,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
 
-	TP_ARGS(q, rq),
-
-	TP_STRUCT__entry(
-		__field(  dev_t,	dev			)
-		__field(  sector_t,	sector			)
-		__field(  unsigned int,	nr_sector		)
-		__field(  int,		errors			)
-		__array(  char,		rwbs,	6		)
-		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
-	),
-
-	TP_fast_assign(
-		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-		__entry->errors	   = rq->errors;
-
-		blk_fill_rwbs_rq(__entry->rwbs, rq);
-		blk_dump_cmd(__get_str(cmd), rq);
-	),
-
-	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs, __get_str(cmd),
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->errors)
-);
-
-TRACE_EVENT(block_rq_complete,
-
-	TP_PROTO(struct request_queue *q, struct request *rq),
-
-	TP_ARGS(q, rq),
-
-	TP_STRUCT__entry(
-		__field(  dev_t,	dev			)
-		__field(  sector_t,	sector			)
-		__field(  unsigned int,	nr_sector		)
-		__field(  int,		errors			)
-		__array(  char,		rwbs,	6		)
-		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
-	),
-
-	TP_fast_assign(
-		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-		__entry->errors    = rq->errors;
-
-		blk_fill_rwbs_rq(__entry->rwbs, rq);
-		blk_dump_cmd(__get_str(cmd), rq);
-	),
-
-	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs, __get_str(cmd),
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->errors)
+	TP_ARGS(q, rq)
 );
 
 TRACE_EVENT(block_bio_bounce,
@@ -228,7 +165,7 @@
 		  __entry->nr_sector, __entry->error)
 );
 
-TRACE_EVENT(block_bio_backmerge,
+DECLARE_EVENT_CLASS(block_bio,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio),
 
@@ -256,63 +193,28 @@
 		  __entry->nr_sector, __entry->comm)
 );
 
-TRACE_EVENT(block_bio_frontmerge,
+DEFINE_EVENT(block_bio, block_bio_backmerge,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio),
 
-	TP_ARGS(q, bio),
-
-	TP_STRUCT__entry(
-		__field( dev_t,		dev			)
-		__field( sector_t,	sector			)
-		__field( unsigned,	nr_sector		)
-		__array( char,		rwbs,	6		)
-		__array( char,		comm,	TASK_COMM_LEN	)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
-		__entry->nr_sector	= bio->bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
-
-	TP_printk("%d,%d %s %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_ARGS(q, bio)
 );
 
-TRACE_EVENT(block_bio_queue,
+DEFINE_EVENT(block_bio, block_bio_frontmerge,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio),
 
-	TP_ARGS(q, bio),
-
-	TP_STRUCT__entry(
-		__field( dev_t,		dev			)
-		__field( sector_t,	sector			)
-		__field( unsigned int,	nr_sector		)
-		__array( char,		rwbs,	6		)
-		__array( char,		comm,	TASK_COMM_LEN	)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
-		__entry->nr_sector	= bio->bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
-
-	TP_printk("%d,%d %s %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_ARGS(q, bio)
 );
 
-TRACE_EVENT(block_getrq,
+DEFINE_EVENT(block_bio, block_bio_queue,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio)
+);
+
+DECLARE_EVENT_CLASS(block_get_rq,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
 
@@ -341,33 +243,18 @@
 		  __entry->nr_sector, __entry->comm)
 );
 
-TRACE_EVENT(block_sleeprq,
+DEFINE_EVENT(block_get_rq, block_getrq,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
 
-	TP_ARGS(q, bio, rw),
+	TP_ARGS(q, bio, rw)
+);
 
-	TP_STRUCT__entry(
-		__field( dev_t,		dev			)
-		__field( sector_t,	sector			)
-		__field( unsigned int,	nr_sector		)
-		__array( char,		rwbs,	6		)
-		__array( char,		comm,	TASK_COMM_LEN	)
-	),
+DEFINE_EVENT(block_get_rq, block_sleeprq,
 
-	TP_fast_assign(
-		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
-		__entry->sector		= bio ? bio->bi_sector : 0;
-		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
-		blk_fill_rwbs(__entry->rwbs,
-			    bio ? bio->bi_rw : 0, __entry->nr_sector);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
 
-	TP_printk("%d,%d %s %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_ARGS(q, bio, rw)
 );
 
 TRACE_EVENT(block_plug,
@@ -387,7 +274,7 @@
 	TP_printk("[%s]", __entry->comm)
 );
 
-TRACE_EVENT(block_unplug_timer,
+DECLARE_EVENT_CLASS(block_unplug,
 
 	TP_PROTO(struct request_queue *q),
 
@@ -406,23 +293,18 @@
 	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
 );
 
-TRACE_EVENT(block_unplug_io,
+DEFINE_EVENT(block_unplug, block_unplug_timer,
 
 	TP_PROTO(struct request_queue *q),
 
-	TP_ARGS(q),
+	TP_ARGS(q)
+);
 
-	TP_STRUCT__entry(
-		__field( int,		nr_rq			)
-		__array( char,		comm,	TASK_COMM_LEN	)
-	),
+DEFINE_EVENT(block_unplug, block_unplug_io,
 
-	TP_fast_assign(
-		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-	),
+	TP_PROTO(struct request_queue *q),
 
-	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+	TP_ARGS(q)
 );
 
 TRACE_EVENT(block_split,
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d09550b..318f765 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -90,7 +90,7 @@
 		  (unsigned long) __entry->dir, __entry->mode)
 );
 
-TRACE_EVENT(ext4_write_begin,
+DECLARE_EVENT_CLASS(ext4__write_begin,
 
 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
 		 unsigned int flags),
@@ -118,7 +118,23 @@
 		  __entry->pos, __entry->len, __entry->flags)
 );
 
-TRACE_EVENT(ext4_ordered_write_end,
+DEFINE_EVENT(ext4__write_begin, ext4_write_begin,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int flags),
+
+	TP_ARGS(inode, pos, len, flags)
+);
+
+DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int flags),
+
+	TP_ARGS(inode, pos, len, flags)
+);
+
+DECLARE_EVENT_CLASS(ext4__write_end,
 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
 			unsigned int copied),
 
@@ -145,57 +161,36 @@
 		  __entry->pos, __entry->len, __entry->copied)
 );
 
-TRACE_EVENT(ext4_writeback_write_end,
+DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
+
 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
 		 unsigned int copied),
 
-	TP_ARGS(inode, pos, len, copied),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	loff_t,	pos			)
-		__field(	unsigned int, len		)
-		__field(	unsigned int, copied		)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->pos	= pos;
-		__entry->len	= len;
-		__entry->copied	= copied;
-	),
-
-	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pos, __entry->len, __entry->copied)
+	TP_ARGS(inode, pos, len, copied)
 );
 
-TRACE_EVENT(ext4_journalled_write_end,
+DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end,
+
 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
 		 unsigned int copied),
-	TP_ARGS(inode, pos, len, copied),
 
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	loff_t,	pos			)
-		__field(	unsigned int, len		)
-		__field(	unsigned int, copied		)
-	),
+	TP_ARGS(inode, pos, len, copied)
+);
 
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->pos	= pos;
-		__entry->len	= len;
-		__entry->copied	= copied;
-	),
+DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end,
 
-	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pos, __entry->len, __entry->copied)
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int copied),
+
+	TP_ARGS(inode, pos, len, copied)
+);
+
+DEFINE_EVENT(ext4__write_end, ext4_da_write_end,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int copied),
+
+	TP_ARGS(inode, pos, len, copied)
 );
 
 TRACE_EVENT(ext4_writepage,
@@ -337,60 +332,6 @@
 		  (unsigned long) __entry->writeback_index)
 );
 
-TRACE_EVENT(ext4_da_write_begin,
-	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
-			unsigned int flags),
-
-	TP_ARGS(inode, pos, len, flags),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	loff_t,	pos			)
-		__field(	unsigned int, len		)
-		__field(	unsigned int, flags		)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->pos	= pos;
-		__entry->len	= len;
-		__entry->flags	= flags;
-	),
-
-	TP_printk("dev %s ino %lu pos %llu len %u flags %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pos, __entry->len, __entry->flags)
-);
-
-TRACE_EVENT(ext4_da_write_end,
-	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
-			unsigned int copied),
-
-	TP_ARGS(inode, pos, len, copied),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	loff_t,	pos			)
-		__field(	unsigned int, len		)
-		__field(	unsigned int, copied		)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->pos	= pos;
-		__entry->len	= len;
-		__entry->copied	= copied;
-	),
-
-	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pos, __entry->len, __entry->copied)
-);
-
 TRACE_EVENT(ext4_discard_blocks,
 	TP_PROTO(struct super_block *sb, unsigned long long blk,
 			unsigned long long count),
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index b89f9db..0e4cfb6 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -48,7 +48,7 @@
 		__assign_str(name, action->name);
 	),
 
-	TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
+	TP_printk("irq=%d name=%s", __entry->irq, __get_str(name))
 );
 
 /**
@@ -78,10 +78,28 @@
 		__entry->ret	= ret;
 	),
 
-	TP_printk("irq=%d return=%s",
+	TP_printk("irq=%d ret=%s",
 		  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
+DECLARE_EVENT_CLASS(softirq,
+
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+	TP_ARGS(h, vec),
+
+	TP_STRUCT__entry(
+		__field(	int,	vec			)
+	),
+
+	TP_fast_assign(
+		__entry->vec = (int)(h - vec);
+	),
+
+	TP_printk("vec=%d [action=%s]", __entry->vec,
+		  show_softirq_name(__entry->vec))
+);
+
 /**
  * softirq_entry - called immediately before the softirq handler
  * @h: pointer to struct softirq_action
@@ -93,22 +111,11 @@
  * number. Also, when used in combination with the softirq_exit tracepoint
  * we can determine the softirq latency.
  */
-TRACE_EVENT(softirq_entry,
+DEFINE_EVENT(softirq, softirq_entry,
 
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
 
-	TP_ARGS(h, vec),
-
-	TP_STRUCT__entry(
-		__field(	int,	vec			)
-	),
-
-	TP_fast_assign(
-		__entry->vec = (int)(h - vec);
-	),
-
-	TP_printk("softirq=%d action=%s", __entry->vec,
-		  show_softirq_name(__entry->vec))
+	TP_ARGS(h, vec)
 );
 
 /**
@@ -122,22 +129,11 @@
  * combination with the softirq_entry tracepoint we can determine the softirq
  * latency.
  */
-TRACE_EVENT(softirq_exit,
+DEFINE_EVENT(softirq, softirq_exit,
 
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
 
-	TP_ARGS(h, vec),
-
-	TP_STRUCT__entry(
-		__field(	int,	vec			)
-	),
-
-	TP_fast_assign(
-		__entry->vec = (int)(h - vec);
-	),
-
-	TP_printk("softirq=%d action=%s", __entry->vec,
-		  show_softirq_name(__entry->vec))
+	TP_ARGS(h, vec)
 );
 
 #endif /*  _TRACE_IRQ_H */
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 3c60b75..96b370a 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -30,7 +30,7 @@
 		  jbd2_dev_to_name(__entry->dev), __entry->result)
 );
 
-TRACE_EVENT(jbd2_start_commit,
+DECLARE_EVENT_CLASS(jbd2_commit,
 
 	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
@@ -53,73 +53,32 @@
 		  __entry->sync_commit)
 );
 
-TRACE_EVENT(jbd2_commit_locking,
+DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
 
 	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-	TP_ARGS(journal, commit_transaction),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		  )
-		__field(	int,	transaction		  )
-	),
-
-	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
-		__entry->transaction	= commit_transaction->t_tid;
-	),
-
-	TP_printk("dev %s transaction %d sync %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit)
+	TP_ARGS(journal, commit_transaction)
 );
 
-TRACE_EVENT(jbd2_commit_flushing,
+DEFINE_EVENT(jbd2_commit, jbd2_commit_locking,
 
 	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-	TP_ARGS(journal, commit_transaction),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		  )
-		__field(	int,	transaction		  )
-	),
-
-	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
-		__entry->transaction	= commit_transaction->t_tid;
-	),
-
-	TP_printk("dev %s transaction %d sync %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit)
+	TP_ARGS(journal, commit_transaction)
 );
 
-TRACE_EVENT(jbd2_commit_logging,
+DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing,
 
 	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-	TP_ARGS(journal, commit_transaction),
+	TP_ARGS(journal, commit_transaction)
+);
 
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		  )
-		__field(	int,	transaction		  )
-	),
+DEFINE_EVENT(jbd2_commit, jbd2_commit_logging,
 
-	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
-		__entry->transaction	= commit_transaction->t_tid;
-	),
+	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
 
-	TP_printk("dev %s transaction %d sync %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit)
+	TP_ARGS(journal, commit_transaction)
 );
 
 TRACE_EVENT(jbd2_end_commit,
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eaf46bd..3adca0c 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -44,7 +44,7 @@
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"}		\
 	) : "GFP_NOWAIT"
 
-TRACE_EVENT(kmalloc,
+DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
@@ -78,41 +78,23 @@
 		show_gfp_flags(__entry->gfp_flags))
 );
 
-TRACE_EVENT(kmem_cache_alloc,
+DEFINE_EVENT(kmem_alloc, kmalloc,
 
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
+	TP_PROTO(unsigned long call_site, const void *ptr,
+		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
 );
 
-TRACE_EVENT(kmalloc_node,
+DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
+
+	TP_PROTO(unsigned long call_site, const void *ptr,
+		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+);
+
+DECLARE_EVENT_CLASS(kmem_alloc_node,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
@@ -150,45 +132,25 @@
 		__entry->node)
 );
 
-TRACE_EVENT(kmem_cache_alloc_node,
+DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
 
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
+	TP_PROTO(unsigned long call_site, const void *ptr,
+		 size_t bytes_req, size_t bytes_alloc,
+		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags),
-		__entry->node)
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
-TRACE_EVENT(kfree,
+DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
+
+	TP_PROTO(unsigned long call_site, const void *ptr,
+		 size_t bytes_req, size_t bytes_alloc,
+		 gfp_t gfp_flags, int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+);
+
+DECLARE_EVENT_CLASS(kmem_free,
 
 	TP_PROTO(unsigned long call_site, const void *ptr),
 
@@ -207,23 +169,18 @@
 	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
 );
 
-TRACE_EVENT(kmem_cache_free,
+DEFINE_EVENT(kmem_free, kfree,
 
 	TP_PROTO(unsigned long call_site, const void *ptr),
 
-	TP_ARGS(call_site, ptr),
+	TP_ARGS(call_site, ptr)
+);
 
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
+DEFINE_EVENT(kmem_free, kmem_cache_free,
 
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
+	TP_PROTO(unsigned long call_site, const void *ptr),
 
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+	TP_ARGS(call_site, ptr)
 );
 
 TRACE_EVENT(mm_page_free_direct,
@@ -299,7 +256,7 @@
 		show_gfp_flags(__entry->gfp_flags))
 );
 
-TRACE_EVENT(mm_page_alloc_zone_locked,
+DECLARE_EVENT_CLASS(mm_page,
 
 	TP_PROTO(struct page *page, unsigned int order, int migratetype),
 
@@ -325,29 +282,22 @@
 		__entry->order == 0)
 );
 
-TRACE_EVENT(mm_page_pcpu_drain,
+DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked,
 
-	TP_PROTO(struct page *page, int order, int migratetype),
+	TP_PROTO(struct page *page, unsigned int order, int migratetype),
+
+	TP_ARGS(page, order, migratetype)
+);
+
+DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain,
+
+	TP_PROTO(struct page *page, unsigned int order, int migratetype),
 
 	TP_ARGS(page, order, migratetype),
 
-	TP_STRUCT__entry(
-		__field(	struct page *,	page		)
-		__field(	int,		order		)
-		__field(	int,		migratetype	)
-	),
-
-	TP_fast_assign(
-		__entry->page		= page;
-		__entry->order		= order;
-		__entry->migratetype	= migratetype;
-	),
-
 	TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
-		__entry->page,
-		page_to_pfn(__entry->page),
-		__entry->order,
-		__entry->migratetype)
+		__entry->page, page_to_pfn(__entry->page),
+		__entry->order, __entry->migratetype)
 );
 
 TRACE_EVENT(mm_page_alloc_extfrag,
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lock.h
similarity index 92%
rename from include/trace/events/lockdep.h
rename to include/trace/events/lock.h
index bcf1d20..a870ba1 100644
--- a/include/trace/events/lockdep.h
+++ b/include/trace/events/lock.h
@@ -1,8 +1,8 @@
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM lockdep
+#define TRACE_SYSTEM lock
 
-#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_LOCKDEP_H
+#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCK_H
 
 #include <linux/lockdep.h>
 #include <linux/tracepoint.h>
@@ -90,7 +90,7 @@
 #endif
 #endif
 
-#endif /* _TRACE_LOCKDEP_H */
+#endif /* _TRACE_LOCK_H */
 
 /* This part must be outside protection */
 #include <trace/define_trace.h>
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
new file mode 100644
index 0000000..7eee778
--- /dev/null
+++ b/include/trace/events/mce.h
@@ -0,0 +1,69 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mce
+
+#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MCE_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+#include <asm/mce.h>
+
+TRACE_EVENT(mce_record,
+
+	TP_PROTO(struct mce *m),
+
+	TP_ARGS(m),
+
+	TP_STRUCT__entry(
+		__field(	u64,		mcgcap		)
+		__field(	u64,		mcgstatus	)
+		__field(	u8,		bank		)
+		__field(	u64,		status		)
+		__field(	u64,		addr		)
+		__field(	u64,		misc		)
+		__field(	u64,		ip		)
+		__field(	u8,		cs		)
+		__field(	u64,		tsc		)
+		__field(	u64,		walltime	)
+		__field(	u32,		cpu		)
+		__field(	u32,		cpuid		)
+		__field(	u32,		apicid		)
+		__field(	u32,		socketid	)
+		__field(	u8,		cpuvendor	)
+	),
+
+	TP_fast_assign(
+		__entry->mcgcap		= m->mcgcap;
+		__entry->mcgstatus	= m->mcgstatus;
+		__entry->bank		= m->bank;
+		__entry->status		= m->status;
+		__entry->addr		= m->addr;
+		__entry->misc		= m->misc;
+		__entry->ip		= m->ip;
+		__entry->cs		= m->cs;
+		__entry->tsc		= m->tsc;
+		__entry->walltime	= m->time;
+		__entry->cpu		= m->extcpu;
+		__entry->cpuid		= m->cpuid;
+		__entry->apicid		= m->apicid;
+		__entry->socketid	= m->socketid;
+		__entry->cpuvendor	= m->cpuvendor;
+	),
+
+	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+		__entry->cpu,
+		__entry->mcgcap, __entry->mcgstatus,
+		__entry->bank, __entry->status,
+		__entry->addr, __entry->misc,
+		__entry->cs, __entry->ip,
+		__entry->tsc,
+		__entry->cpuvendor, __entry->cpuid,
+		__entry->walltime,
+		__entry->socketid,
+		__entry->apicid)
+);
+
+#endif /* _TRACE_MCE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 84160fb..4b0f48b 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -51,7 +51,7 @@
 	TP_printk("%s", __get_str(name))
 );
 
-TRACE_EVENT(module_get,
+DECLARE_EVENT_CLASS(module_refcnt,
 
 	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
 
@@ -73,26 +73,18 @@
 		  __get_str(name), (void *)__entry->ip, __entry->refcnt)
 );
 
-TRACE_EVENT(module_put,
+DEFINE_EVENT(module_refcnt, module_get,
 
 	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
 
-	TP_ARGS(mod, ip, refcnt),
+	TP_ARGS(mod, ip, refcnt)
+);
 
-	TP_STRUCT__entry(
-		__field(	unsigned long,	ip		)
-		__field(	int,		refcnt		)
-		__string(	name,		mod->name	)
-	),
+DEFINE_EVENT(module_refcnt, module_put,
 
-	TP_fast_assign(
-		__entry->ip	= ip;
-		__entry->refcnt	= refcnt;
-		__assign_str(name, mod->name);
-	),
+	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
 
-	TP_printk("%s call_site=%pf refcnt=%d",
-		  __get_str(name), (void *)__entry->ip, __entry->refcnt)
+	TP_ARGS(mod, ip, refcnt)
 );
 
 TRACE_EVENT(module_request,
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index ea6d579..c4efe9b 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -16,9 +16,7 @@
 };
 #endif
 
-
-
-TRACE_EVENT(power_start,
+DECLARE_EVENT_CLASS(power,
 
 	TP_PROTO(unsigned int type, unsigned int state),
 
@@ -37,6 +35,20 @@
 	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
 );
 
+DEFINE_EVENT(power, power_start,
+
+	TP_PROTO(unsigned int type, unsigned int state),
+
+	TP_ARGS(type, state)
+);
+
+DEFINE_EVENT(power, power_frequency,
+
+	TP_PROTO(unsigned int type, unsigned int state),
+
+	TP_ARGS(type, state)
+);
+
 TRACE_EVENT(power_end,
 
 	TP_PROTO(int dummy),
@@ -55,26 +67,6 @@
 
 );
 
-
-TRACE_EVENT(power_frequency,
-
-	TP_PROTO(unsigned int type, unsigned int state),
-
-	TP_ARGS(type, state),
-
-	TP_STRUCT__entry(
-		__field(	u64,		type		)
-		__field(	u64,		state		)
-	),
-
-	TP_fast_assign(
-		__entry->type = type;
-		__entry->state = state;
-	),
-
-	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state)
-);
-
 #endif /* _TRACE_POWER_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 4069c43..cfceb0b 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -26,7 +26,7 @@
 		__entry->pid	= t->pid;
 	),
 
-	TP_printk("task %s:%d", __entry->comm, __entry->pid)
+	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
 );
 
 /*
@@ -46,7 +46,7 @@
 		__entry->ret	= ret;
 	),
 
-	TP_printk("ret %d", __entry->ret)
+	TP_printk("ret=%d", __entry->ret)
 );
 
 /*
@@ -73,7 +73,7 @@
 		__entry->prio	= p->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -83,7 +83,7 @@
  * (NOTE: the 'rq' argument is not used by generic trace events,
  *        but used by the latency tracer plugin. )
  */
-TRACE_EVENT(sched_wakeup,
+DECLARE_EVENT_CLASS(sched_wakeup_template,
 
 	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
 
@@ -94,7 +94,7 @@
 		__field(	pid_t,	pid			)
 		__field(	int,	prio			)
 		__field(	int,	success			)
-		__field(	int,	cpu			)
+		__field(	int,	target_cpu		)
 	),
 
 	TP_fast_assign(
@@ -102,46 +102,27 @@
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
 		__entry->success	= success;
-		__entry->cpu		= task_cpu(p);
+		__entry->target_cpu	= task_cpu(p);
 	),
 
-	TP_printk("task %s:%d [%d] success=%d [%03d]",
+	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
 		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success, __entry->cpu)
+		  __entry->success, __entry->target_cpu)
 );
 
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
+	     TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+	     TP_ARGS(rq, p, success));
+
 /*
  * Tracepoint for waking up a new task:
  *
  * (NOTE: the 'rq' argument is not used by generic trace events,
  *        but used by the latency tracer plugin. )
  */
-TRACE_EVENT(sched_wakeup_new,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-		__field(	int,	cpu			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-		__entry->cpu		= task_cpu(p);
-	),
-
-	TP_printk("task %s:%d [%d] success=%d [%03d]",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success, __entry->cpu)
-);
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
+	     TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+	     TP_ARGS(rq, p, success));
 
 /*
  * Tracepoint for task switches, performed by the scheduler:
@@ -176,7 +157,7 @@
 		__entry->next_prio	= next->prio;
 	),
 
-	TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
+	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
 		__entry->prev_state ?
 		  __print_flags(__entry->prev_state, "|",
@@ -211,60 +192,47 @@
 		__entry->dest_cpu	= dest_cpu;
 	),
 
-	TP_printk("task %s:%d [%d] from: %d  to: %d",
+	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
 		  __entry->comm, __entry->pid, __entry->prio,
 		  __entry->orig_cpu, __entry->dest_cpu)
 );
 
+DECLARE_EVENT_CLASS(sched_process_template,
+
+	TP_PROTO(struct task_struct *p),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+	),
+
+	TP_printk("comm=%s pid=%d prio=%d",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
 /*
  * Tracepoint for freeing a task:
  */
-TRACE_EVENT(sched_process_free,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
+DEFINE_EVENT(sched_process_template, sched_process_free,
+	     TP_PROTO(struct task_struct *p),
+	     TP_ARGS(p));
+	     
 
 /*
  * Tracepoint for a task exiting:
  */
-TRACE_EVENT(sched_process_exit,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
+DEFINE_EVENT(sched_process_template, sched_process_exit,
+	     TP_PROTO(struct task_struct *p),
+	     TP_ARGS(p));
 
 /*
  * Tracepoint for a waiting task:
@@ -287,7 +255,7 @@
 		__entry->prio		= current->prio;
 	),
 
-	TP_printk("task %s:%d [%d]",
+	TP_printk("comm=%s pid=%d prio=%d",
 		  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -314,46 +282,16 @@
 		__entry->child_pid	= child->pid;
 	),
 
-	TP_printk("parent %s:%d  child %s:%d",
+	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
 		__entry->parent_comm, __entry->parent_pid,
 		__entry->child_comm, __entry->child_pid)
 );
 
 /*
- * Tracepoint for sending a signal:
- */
-TRACE_EVENT(sched_signal_send,
-
-	TP_PROTO(int sig, struct task_struct *p),
-
-	TP_ARGS(sig, p),
-
-	TP_STRUCT__entry(
-		__field(	int,	sig			)
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->sig	= sig;
-	),
-
-	TP_printk("sig: %d  task %s:%d",
-		  __entry->sig, __entry->comm, __entry->pid)
-);
-
-/*
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
  */
-
-/*
- * Tracepoint for accounting wait time (time the task is runnable
- * but not actually running due to scheduler contention).
- */
-TRACE_EVENT(sched_stat_wait,
+DECLARE_EVENT_CLASS(sched_stat_template,
 
 	TP_PROTO(struct task_struct *tsk, u64 delay),
 
@@ -374,11 +312,36 @@
 		__perf_count(delay);
 	),
 
-	TP_printk("task: %s:%d wait: %Lu [ns]",
+	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->delay)
 );
 
+
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+	     TP_PROTO(struct task_struct *tsk, u64 delay),
+	     TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting sleep time (time the task is not runnable,
+ * including iowait, see below).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+	     TP_PROTO(struct task_struct *tsk, u64 delay),
+	     TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting iowait time (time the task is not runnable
+ * due to waiting on IO to complete).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+	     TP_PROTO(struct task_struct *tsk, u64 delay),
+	     TP_ARGS(tsk, delay));
+
 /*
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
@@ -406,72 +369,12 @@
 		__perf_count(runtime);
 	),
 
-	TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]",
+	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
 			__entry->comm, __entry->pid,
 			(unsigned long long)__entry->runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
-/*
- * Tracepoint for accounting sleep time (time the task is not runnable,
- * including iowait, see below).
- */
-TRACE_EVENT(sched_stat_sleep,
-
-	TP_PROTO(struct task_struct *tsk, u64 delay),
-
-	TP_ARGS(tsk, delay),
-
-	TP_STRUCT__entry(
-		__array( char,	comm,	TASK_COMM_LEN	)
-		__field( pid_t,	pid			)
-		__field( u64,	delay			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
-		__entry->pid	= tsk->pid;
-		__entry->delay	= delay;
-	)
-	TP_perf_assign(
-		__perf_count(delay);
-	),
-
-	TP_printk("task: %s:%d sleep: %Lu [ns]",
-			__entry->comm, __entry->pid,
-			(unsigned long long)__entry->delay)
-);
-
-/*
- * Tracepoint for accounting iowait time (time the task is not runnable
- * due to waiting on IO to complete).
- */
-TRACE_EVENT(sched_stat_iowait,
-
-	TP_PROTO(struct task_struct *tsk, u64 delay),
-
-	TP_ARGS(tsk, delay),
-
-	TP_STRUCT__entry(
-		__array( char,	comm,	TASK_COMM_LEN	)
-		__field( pid_t,	pid			)
-		__field( u64,	delay			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
-		__entry->pid	= tsk->pid;
-		__entry->delay	= delay;
-	)
-	TP_perf_assign(
-		__perf_count(delay);
-	),
-
-	TP_printk("task: %s:%d iowait: %Lu [ns]",
-			__entry->comm, __entry->pid,
-			(unsigned long long)__entry->delay)
-);
-
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
new file mode 100644
index 0000000..a510b75
--- /dev/null
+++ b/include/trace/events/signal.h
@@ -0,0 +1,173 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM signal
+
+#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SIGNAL_H
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#define TP_STORE_SIGINFO(__entry, info)				\
+	do {							\
+		if (info == SEND_SIG_NOINFO) {			\
+			__entry->errno	= 0;			\
+			__entry->code	= SI_USER;		\
+		} else if (info == SEND_SIG_PRIV) {		\
+			__entry->errno	= 0;			\
+			__entry->code	= SI_KERNEL;		\
+		} else {					\
+			__entry->errno	= info->si_errno;	\
+			__entry->code	= info->si_code;	\
+		}						\
+	} while (0)
+
+/**
+ * signal_generate - called when a signal is generated
+ * @sig: signal number
+ * @info: pointer to struct siginfo
+ * @task: pointer to struct task_struct
+ *
+ * Current process sends a 'sig' signal to 'task' process with
+ * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
+ * 'info' is not a pointer and you can't access its field. Instead,
+ * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV
+ * means that si_code is SI_KERNEL.
+ */
+TRACE_EVENT(signal_generate,
+
+	TP_PROTO(int sig, struct siginfo *info, struct task_struct *task),
+
+	TP_ARGS(sig, info, task),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig			)
+		__field(	int,	errno			)
+		__field(	int,	code			)
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		__entry->sig	= sig;
+		TP_STORE_SIGINFO(__entry, info);
+		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->pid	= task->pid;
+	),
+
+	TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d",
+		  __entry->sig, __entry->errno, __entry->code,
+		  __entry->comm, __entry->pid)
+);
+
+/**
+ * signal_deliver - called when a signal is delivered
+ * @sig: signal number
+ * @info: pointer to struct siginfo
+ * @ka: pointer to struct k_sigaction
+ *
+ * A 'sig' signal is delivered to current process with 'info' siginfo,
+ * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or
+ * SIG_DFL.
+ * Note that some signals reported by signal_generate tracepoint can be
+ * lost, ignored or modified (by debugger) before hitting this tracepoint.
+ * This means, this can show which signals are actually delivered, but
+ * matching generated signals and delivered signals may not be correct.
+ */
+TRACE_EVENT(signal_deliver,
+
+	TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka),
+
+	TP_ARGS(sig, info, ka),
+
+	TP_STRUCT__entry(
+		__field(	int,		sig		)
+		__field(	int,		errno		)
+		__field(	int,		code		)
+		__field(	unsigned long,	sa_handler	)
+		__field(	unsigned long,	sa_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->sig	= sig;
+		TP_STORE_SIGINFO(__entry, info);
+		__entry->sa_handler	= (unsigned long)ka->sa.sa_handler;
+		__entry->sa_flags	= ka->sa.sa_flags;
+	),
+
+	TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx",
+		  __entry->sig, __entry->errno, __entry->code,
+		  __entry->sa_handler, __entry->sa_flags)
+);
+
+/**
+ * signal_overflow_fail - called when signal queue is overflow
+ * @sig: signal number
+ * @group: signal to process group or not (bool)
+ * @info: pointer to struct siginfo
+ *
+ * Kernel fails to generate 'sig' signal with 'info' siginfo, because
+ * siginfo queue is overflow, and the signal is dropped.
+ * 'group' is not 0 if the signal will be sent to a process group.
+ * 'sig' is always one of RT signals.
+ */
+TRACE_EVENT(signal_overflow_fail,
+
+	TP_PROTO(int sig, int group, struct siginfo *info),
+
+	TP_ARGS(sig, group, info),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig	)
+		__field(	int,	group	)
+		__field(	int,	errno	)
+		__field(	int,	code	)
+	),
+
+	TP_fast_assign(
+		__entry->sig	= sig;
+		__entry->group	= group;
+		TP_STORE_SIGINFO(__entry, info);
+	),
+
+	TP_printk("sig=%d group=%d errno=%d code=%d",
+		  __entry->sig, __entry->group, __entry->errno, __entry->code)
+);
+
+/**
+ * signal_lose_info - called when siginfo is lost
+ * @sig: signal number
+ * @group: signal to process group or not (bool)
+ * @info: pointer to struct siginfo
+ *
+ * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo
+ * queue is overflow.
+ * 'group' is not 0 if the signal will be sent to a process group.
+ * 'sig' is always one of non-RT signals.
+ */
+TRACE_EVENT(signal_lose_info,
+
+	TP_PROTO(int sig, int group, struct siginfo *info),
+
+	TP_ARGS(sig, group, info),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig	)
+		__field(	int,	group	)
+		__field(	int,	errno	)
+		__field(	int,	code	)
+	),
+
+	TP_fast_assign(
+		__entry->sig	= sig;
+		__entry->group	= group;
+		TP_STORE_SIGINFO(__entry, info);
+	),
+
+	TP_printk("sig=%d group=%d errno=%d code=%d",
+		  __entry->sig, __entry->group, __entry->errno, __entry->code)
+);
+#endif /* _TRACE_SIGNAL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 1844c48..e5ce87a 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -26,7 +26,7 @@
 		__entry->timer	= timer;
 	),
 
-	TP_printk("timer %p", __entry->timer)
+	TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -54,7 +54,7 @@
 		__entry->now		= jiffies;
 	),
 
-	TP_printk("timer %p: func %pf, expires %lu, timeout %ld",
+	TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]",
 		  __entry->timer, __entry->function, __entry->expires,
 		  (long)__entry->expires - __entry->now)
 );
@@ -81,7 +81,7 @@
 		__entry->now		= jiffies;
 	),
 
-	TP_printk("timer %p: now %lu", __entry->timer, __entry->now)
+	TP_printk("timer=%p now=%lu", __entry->timer, __entry->now)
 );
 
 /**
@@ -108,7 +108,7 @@
 		__entry->timer	= timer;
 	),
 
-	TP_printk("timer %p", __entry->timer)
+	TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -129,7 +129,7 @@
 		__entry->timer	= timer;
 	),
 
-	TP_printk("timer %p", __entry->timer)
+	TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -140,24 +140,24 @@
  */
 TRACE_EVENT(hrtimer_init,
 
-	TP_PROTO(struct hrtimer *timer, clockid_t clockid,
+	TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid,
 		 enum hrtimer_mode mode),
 
-	TP_ARGS(timer, clockid, mode),
+	TP_ARGS(hrtimer, clockid, mode),
 
 	TP_STRUCT__entry(
-		__field( void *,		timer		)
+		__field( void *,		hrtimer		)
 		__field( clockid_t,		clockid		)
 		__field( enum hrtimer_mode,	mode		)
 	),
 
 	TP_fast_assign(
-		__entry->timer		= timer;
+		__entry->hrtimer	= hrtimer;
 		__entry->clockid	= clockid;
 		__entry->mode		= mode;
 	),
 
-	TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer,
+	TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer,
 		  __entry->clockid == CLOCK_REALTIME ?
 			"CLOCK_REALTIME" : "CLOCK_MONOTONIC",
 		  __entry->mode == HRTIMER_MODE_ABS ?
@@ -170,26 +170,26 @@
  */
 TRACE_EVENT(hrtimer_start,
 
-	TP_PROTO(struct hrtimer *timer),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(timer),
+	TP_ARGS(hrtimer),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer		)
+		__field( void *,	hrtimer		)
 		__field( void *,	function	)
 		__field( s64,		expires		)
 		__field( s64,		softexpires	)
 	),
 
 	TP_fast_assign(
-		__entry->timer		= timer;
-		__entry->function	= timer->function;
-		__entry->expires	= hrtimer_get_expires(timer).tv64;
-		__entry->softexpires	= hrtimer_get_softexpires(timer).tv64;
+		__entry->hrtimer	= hrtimer;
+		__entry->function	= hrtimer->function;
+		__entry->expires	= hrtimer_get_expires(hrtimer).tv64;
+		__entry->softexpires	= hrtimer_get_softexpires(hrtimer).tv64;
 	),
 
-	TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu",
-		  __entry->timer, __entry->function,
+	TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu",
+		  __entry->hrtimer, __entry->function,
 		  (unsigned long long)ktime_to_ns((ktime_t) {
 				  .tv64 = __entry->expires }),
 		  (unsigned long long)ktime_to_ns((ktime_t) {
@@ -206,23 +206,22 @@
  */
 TRACE_EVENT(hrtimer_expire_entry,
 
-	TP_PROTO(struct hrtimer *timer, ktime_t *now),
+	TP_PROTO(struct hrtimer *hrtimer, ktime_t *now),
 
-	TP_ARGS(timer, now),
+	TP_ARGS(hrtimer, now),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer	)
+		__field( void *,	hrtimer	)
 		__field( s64,		now	)
 	),
 
 	TP_fast_assign(
-		__entry->timer	= timer;
-		__entry->now	= now->tv64;
+		__entry->hrtimer	= hrtimer;
+		__entry->now		= now->tv64;
 	),
 
-	TP_printk("hrtimer %p, now %llu", __entry->timer,
-		  (unsigned long long)ktime_to_ns((ktime_t) {
-				  .tv64 = __entry->now }))
+	TP_printk("hrtimer=%p now=%llu", __entry->hrtimer,
+		  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
 
 /**
@@ -234,40 +233,40 @@
  */
 TRACE_EVENT(hrtimer_expire_exit,
 
-	TP_PROTO(struct hrtimer *timer),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(timer),
+	TP_ARGS(hrtimer),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer	)
+		__field( void *,	hrtimer	)
 	),
 
 	TP_fast_assign(
-		__entry->timer	= timer;
+		__entry->hrtimer	= hrtimer;
 	),
 
-	TP_printk("hrtimer %p", __entry->timer)
+	TP_printk("hrtimer=%p", __entry->hrtimer)
 );
 
 /**
  * hrtimer_cancel - called when the hrtimer is canceled
- * @timer:	pointer to struct hrtimer
+ * @hrtimer:	pointer to struct hrtimer
  */
 TRACE_EVENT(hrtimer_cancel,
 
-	TP_PROTO(struct hrtimer *timer),
+	TP_PROTO(struct hrtimer *hrtimer),
 
-	TP_ARGS(timer),
+	TP_ARGS(hrtimer),
 
 	TP_STRUCT__entry(
-		__field( void *,	timer	)
+		__field( void *,	hrtimer	)
 	),
 
 	TP_fast_assign(
-		__entry->timer	= timer;
+		__entry->hrtimer	= hrtimer;
 	),
 
-	TP_printk("hrtimer %p", __entry->timer)
+	TP_printk("hrtimer=%p", __entry->hrtimer)
 );
 
 /**
@@ -302,7 +301,7 @@
 		__entry->interval_usec	= value->it_interval.tv_usec;
 	),
 
-	TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu",
+	TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu",
 		  __entry->which, __entry->expires,
 		  __entry->value_sec, __entry->value_usec,
 		  __entry->interval_sec, __entry->interval_usec)
@@ -332,7 +331,7 @@
 		__entry->pid	= pid_nr(pid);
 	),
 
-	    TP_printk("which %d, pid %d, now %lu", __entry->which,
+	    TP_printk("which=%d pid=%d now=%lu", __entry->which,
 		      (int) __entry->pid, __entry->now)
 );
 
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index e4612db..d6c9744 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -8,7 +8,7 @@
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
 
-TRACE_EVENT(workqueue_insertion,
+DECLARE_EVENT_CLASS(workqueue,
 
 	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
@@ -30,26 +30,18 @@
 		__entry->thread_pid, __entry->func)
 );
 
-TRACE_EVENT(workqueue_execution,
+DEFINE_EVENT(workqueue, workqueue_insertion,
 
 	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
-	TP_ARGS(wq_thread, work),
+	TP_ARGS(wq_thread, work)
+);
 
-	TP_STRUCT__entry(
-		__array(char,		thread_comm,	TASK_COMM_LEN)
-		__field(pid_t,		thread_pid)
-		__field(work_func_t,	func)
-	),
+DEFINE_EVENT(workqueue, workqueue_execution,
 
-	TP_fast_assign(
-		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
-		__entry->thread_pid	= wq_thread->pid;
-		__entry->func		= work->func;
-	),
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
-	TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
-		__entry->thread_pid, __entry->func)
+	TP_ARGS(wq_thread, work)
 );
 
 /* Trace the creation of one workqueue thread on a cpu */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index dacb8ef..d1b3de9 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -18,6 +18,26 @@
 
 #include <linux/ftrace_event.h>
 
+/*
+ * DECLARE_EVENT_CLASS can be used to add a generic function
+ * handlers for events. That is, if all events have the same
+ * parameters and just have distinct trace points.
+ * Each tracepoint can be defined with DEFINE_EVENT and that
+ * will map the DECLARE_EVENT_CLASS to the tracepoint.
+ *
+ * TRACE_EVENT is a one to one mapping between tracepoint and template.
+ */
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
+	DECLARE_EVENT_CLASS(name,			       \
+			     PARAMS(proto),		       \
+			     PARAMS(args),		       \
+			     PARAMS(tstruct),		       \
+			     PARAMS(assign),		       \
+			     PARAMS(print));		       \
+	DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
+
+
 #undef __field
 #define __field(type, item)		type	item;
 
@@ -36,15 +56,21 @@
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
-	struct ftrace_raw_##name {				\
-		struct trace_entry	ent;			\
-		tstruct						\
-		char			__data[0];		\
-	};							\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)	\
+	struct ftrace_raw_##name {					\
+		struct trace_entry	ent;				\
+		tstruct							\
+		char			__data[0];			\
+	};
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)	\
 	static struct ftrace_event_call event_##name
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #undef __cpparg
 #define __cpparg(arg...) arg
 
@@ -89,12 +115,19 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	struct ftrace_data_offsets_##call {				\
 		tstruct;						\
 	};
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -120,9 +153,10 @@
 #undef __field
 #define __field(type, item)					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	\
 			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
+			       (unsigned int)sizeof(field.item),	\
+			       (unsigned int)is_signed_type(type));	\
 	if (!ret)							\
 		return 0;
 
@@ -132,19 +166,21 @@
 #undef __array
 #define __array(type, item, len)						\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	\
 			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
+			       (unsigned int)sizeof(field.item),	\
+			       (unsigned int)is_signed_type(type));	\
 	if (!ret)							\
 		return 0;
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)				       \
 	ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\
-			       "offset:%u;\tsize:%u;\n",		       \
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	       \
 			       (unsigned int)offsetof(typeof(field),	       \
 					__data_loc_##item),		       \
-			       (unsigned int)sizeof(field.__data_loc_##item)); \
+			       (unsigned int)sizeof(field.__data_loc_##item), \
+			       (unsigned int)is_signed_type(type));	\
 	if (!ret)							       \
 		return 0;
 
@@ -167,17 +203,50 @@
 #undef TP_perf_assign
 #define TP_perf_assign(args...)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int								\
-ftrace_format_##call(struct ftrace_event_call *unused,			\
-		      struct trace_seq *s)				\
+ftrace_format_setup_##call(struct ftrace_event_call *unused,		\
+			   struct trace_seq *s)				\
 {									\
 	struct ftrace_raw_##call field __attribute__((unused));		\
 	int ret = 0;							\
 									\
 	tstruct;							\
 									\
+	return ret;							\
+}									\
+									\
+static int								\
+ftrace_format_##call(struct ftrace_event_call *unused,			\
+		     struct trace_seq *s)				\
+{									\
+	int ret = 0;							\
+									\
+	ret = ftrace_format_setup_##call(unused, s);			\
+	if (!ret)							\
+		return ret;						\
+									\
+	ret = trace_seq_printf(s, "\nprint fmt: " print);		\
+									\
+	return ret;							\
+}
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)		\
+static int								\
+ftrace_format_##name(struct ftrace_event_call *unused,			\
+		      struct trace_seq *s)				\
+{									\
+	int ret = 0;							\
+									\
+	ret = ftrace_format_setup_##template(unused, s);		\
+	if (!ret)							\
+		return ret;						\
+									\
 	trace_seq_printf(s, "\nprint fmt: " print);			\
 									\
 	return ret;							\
@@ -252,13 +321,55 @@
 		ftrace_print_symbols_seq(p, value, symbols);		\
 	})
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
+static enum print_line_t						\
+ftrace_raw_output_id_##call(int event_id, const char *name,		\
+			    struct trace_iterator *iter, int flags)	\
+{									\
+	struct trace_seq *s = &iter->seq;				\
+	struct ftrace_raw_##call *field;				\
+	struct trace_entry *entry;					\
+	struct trace_seq *p;						\
+	int ret;							\
+									\
+	entry = iter->ent;						\
+									\
+	if (entry->type != event_id) {					\
+		WARN_ON_ONCE(1);					\
+		return TRACE_TYPE_UNHANDLED;				\
+	}								\
+									\
+	field = (typeof(field))entry;					\
+									\
+	p = &get_cpu_var(ftrace_event_seq);				\
+	trace_seq_init(p);						\
+	ret = trace_seq_printf(s, "%s: ", name);			\
+	if (ret)							\
+		ret = trace_seq_printf(s, print);			\
+	put_cpu();							\
+	if (!ret)							\
+		return TRACE_TYPE_PARTIAL_LINE;				\
+									\
+	return TRACE_TYPE_HANDLED;					\
+}
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)			\
+static enum print_line_t						\
+ftrace_raw_output_##name(struct trace_iterator *iter, int flags)	\
+{									\
+	return ftrace_raw_output_id_##template(event_##name.id,		\
+					       #name, iter, flags);	\
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
 static enum print_line_t						\
 ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 {									\
 	struct trace_seq *s = &iter->seq;				\
-	struct ftrace_raw_##call *field;				\
+	struct ftrace_raw_##template *field;				\
 	struct trace_entry *entry;					\
 	struct trace_seq *p;						\
 	int ret;							\
@@ -274,14 +385,16 @@
 									\
 	p = &get_cpu_var(ftrace_event_seq);				\
 	trace_seq_init(p);						\
-	ret = trace_seq_printf(s, #call ": " print);			\
+	ret = trace_seq_printf(s, "%s: ", #call);			\
+	if (ret)							\
+		ret = trace_seq_printf(s, print);			\
 	put_cpu();							\
 	if (!ret)							\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 									\
 	return TRACE_TYPE_HANDLED;					\
 }
-	
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #undef __field_ext
@@ -315,8 +428,8 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int								\
 ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 {									\
@@ -332,6 +445,13 @@
 	return ret;							\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -358,10 +478,10 @@
 	__data_size += (len) * sizeof(type);
 
 #undef __string
-#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)       \
+#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static inline int ftrace_get_offsets_##call(				\
 	struct ftrace_data_offsets_##call *__data_offsets, proto)       \
 {									\
@@ -373,6 +493,13 @@
 	return __data_size;						\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #ifdef CONFIG_EVENT_PROFILE
@@ -394,21 +521,28 @@
  *
  */
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args)			\
 									\
-static void ftrace_profile_##call(proto);				\
+static void ftrace_profile_##name(proto);				\
 									\
-static int ftrace_profile_enable_##call(void)				\
+static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\
 {									\
-	return register_trace_##call(ftrace_profile_##call);		\
+	return register_trace_##name(ftrace_profile_##name);		\
 }									\
 									\
-static void ftrace_profile_disable_##call(void)				\
+static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 {									\
-	unregister_trace_##call(ftrace_profile_##call);			\
+	unregister_trace_##name(ftrace_profile_##name);			\
 }
 
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #endif
@@ -423,7 +557,7 @@
  *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
  * }
  *
- * static int ftrace_reg_event_<call>(void)
+ * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused)
  * {
  *	int ret;
  *
@@ -434,7 +568,7 @@
  *	return ret;
  * }
  *
- * static void ftrace_unreg_event_<call>(void)
+ * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
  * {
  *	unregister_trace_<call>(ftrace_event_<call>);
  * }
@@ -469,7 +603,7 @@
  *	trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc);
  * }
  *
- * static int ftrace_raw_reg_event_<call>(void)
+ * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
  * {
  *	int ret;
  *
@@ -480,7 +614,7 @@
  *	return ret;
  * }
  *
- * static void ftrace_unreg_event_<call>(void)
+ * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
  * {
  *	unregister_trace_<call>(ftrace_raw_event_<call>);
  * }
@@ -489,7 +623,7 @@
  *	.trace			= ftrace_raw_output_<call>, <-- stage 2
  * };
  *
- * static int ftrace_raw_init_event_<call>(void)
+ * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
  * {
  *	int id;
  *
@@ -547,15 +681,13 @@
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), src);
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 									\
-static struct ftrace_event_call event_##call;				\
-									\
-static void ftrace_raw_event_##call(proto)				\
+static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
+				       proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-	struct ftrace_event_call *event_call = &event_##call;		\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
 	struct ring_buffer *buffer;					\
@@ -569,7 +701,7 @@
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 									\
 	event = trace_current_buffer_lock_reserve(&buffer,		\
-				 event_##call.id,			\
+				 event_call->id,			\
 				 sizeof(*entry) + __data_size,		\
 				 irq_flags, pc);			\
 	if (!event)							\
@@ -584,9 +716,17 @@
 	if (!filter_current_check_discard(buffer, event_call, entry, event)) \
 		trace_nowake_buffer_unlock_commit(buffer,		\
 						  event, irq_flags, pc); \
+}
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
+									\
+static void ftrace_raw_event_##call(proto)				\
+{									\
+	ftrace_raw_event_id_##template(&event_##call, args);		\
 }									\
 									\
-static int ftrace_raw_reg_event_##call(void *ptr)			\
+static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
 {									\
 	int ret;							\
 									\
@@ -597,7 +737,7 @@
 	return ret;							\
 }									\
 									\
-static void ftrace_raw_unreg_event_##call(void *ptr)			\
+static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
 {									\
 	unregister_trace_##call(ftrace_raw_event_##call);		\
 }									\
@@ -606,7 +746,7 @@
 	.trace			= ftrace_raw_output_##call,		\
 };									\
 									\
-static int ftrace_raw_init_event_##call(void)				\
+static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
 {									\
 	int id;								\
 									\
@@ -616,7 +756,36 @@
 	event_##call.id = id;						\
 	INIT_LIST_HEAD(&event_##call.fields);				\
 	return 0;							\
-}									\
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
+									\
+static struct ftrace_event_call __used					\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.event			= &ftrace_event_type_##call,		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
+	.regfunc		= ftrace_raw_reg_event_##call,		\
+	.unregfunc		= ftrace_raw_unreg_event_##call,	\
+	.show_format		= ftrace_format_##template,		\
+	.define_fields		= ftrace_define_fields_##template,	\
+	_TRACE_PROFILE_INIT(call)					\
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
 									\
 static struct ftrace_event_call __used					\
 __attribute__((__aligned__(4)))						\
@@ -628,7 +797,7 @@
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.show_format		= ftrace_format_##call,			\
-	.define_fields		= ftrace_define_fields_##call,		\
+	.define_fields		= ftrace_define_fields_##template,	\
 	_TRACE_PROFILE_INIT(call)					\
 }
 
@@ -646,6 +815,7 @@
  *	struct ftrace_event_call *event_call = &event_<call>;
  *	extern void perf_tp_event(int, u64, u64, void *, int);
  *	struct ftrace_raw_##call *entry;
+ *	struct perf_trace_buf *trace_buf;
  *	u64 __addr = 0, __count = 1;
  *	unsigned long irq_flags;
  *	struct trace_entry *ent;
@@ -670,14 +840,25 @@
  *	__cpu = smp_processor_id();
  *
  *	if (in_nmi())
- *		raw_data = rcu_dereference(trace_profile_buf_nmi);
+ *		trace_buf = rcu_dereference(perf_trace_buf_nmi);
  *	else
- *		raw_data = rcu_dereference(trace_profile_buf);
+ *		trace_buf = rcu_dereference(perf_trace_buf);
  *
- *	if (!raw_data)
+ *	if (!trace_buf)
  *		goto end;
  *
- *	raw_data = per_cpu_ptr(raw_data, __cpu);
+ *	trace_buf = per_cpu_ptr(trace_buf, __cpu);
+ *
+ * 	// Avoid recursion from perf that could mess up the buffer
+ * 	if (trace_buf->recursion++)
+ *		goto end_recursion;
+ *
+ * 	raw_data = trace_buf->buf;
+ *
+ *	// Make recursion update visible before entering perf_tp_event
+ *	// so that we protect from perf recursions.
+ *
+ *	barrier();
  *
  *	//zero dead bytes from alignment to avoid stack leak to userspace:
  *	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
@@ -704,21 +885,26 @@
 #undef __perf_count
 #define __perf_count(c) __count = (c)
 
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-static void ftrace_profile_##call(proto)				\
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
+static void								\
+ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
+			    proto)					\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-	struct ftrace_event_call *event_call = &event_##call;		\
-	extern void perf_tp_event(int, u64, u64, void *, int);	\
+	extern int perf_swevent_get_recursion_context(void);		\
+	extern void perf_swevent_put_recursion_context(int rctx);	\
+	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
 	struct trace_entry *ent;					\
 	int __entry_size;						\
 	int __data_size;						\
+	char *trace_buf;						\
 	char *raw_data;							\
 	int __cpu;							\
+	int rctx;							\
 	int pc;								\
 									\
 	pc = preempt_count();						\
@@ -733,17 +919,22 @@
 		return;							\
 									\
 	local_irq_save(irq_flags);					\
+									\
+	rctx = perf_swevent_get_recursion_context();			\
+	if (rctx < 0)							\
+		goto end_recursion;					\
+									\
 	__cpu = smp_processor_id();					\
 									\
 	if (in_nmi())							\
-		raw_data = rcu_dereference(trace_profile_buf_nmi);		\
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);	\
 	else								\
-		raw_data = rcu_dereference(trace_profile_buf);		\
+		trace_buf = rcu_dereference(perf_trace_buf);		\
 									\
-	if (!raw_data)							\
+	if (!trace_buf)							\
 		goto end;						\
 									\
-	raw_data = per_cpu_ptr(raw_data, __cpu);			\
+	raw_data = per_cpu_ptr(trace_buf, __cpu);			\
 									\
 	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
 	entry = (struct ftrace_raw_##call *)raw_data;			\
@@ -759,10 +950,25 @@
 			     __entry_size);				\
 									\
 end:									\
+	perf_swevent_put_recursion_context(rctx);			\
+end_recursion:								\
 	local_irq_restore(irq_flags);					\
 									\
 }
 
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)		\
+static void ftrace_profile_##call(proto)			\
+{								\
+	struct ftrace_event_call *event_call = &event_##call;	\
+								\
+	ftrace_profile_templ_##template(event_call, args);	\
+}
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 #endif /* CONFIG_EVENT_PROFILE */
 
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index e972f0a..961fda3 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -12,21 +12,19 @@
  * A syscall entry in the ftrace syscalls array.
  *
  * @name: name of the syscall
+ * @syscall_nr: number of the syscall
  * @nb_args: number of parameters it takes
  * @types: list of types as strings
  * @args: list of args as strings (args[i] matches types[i])
- * @enter_id: associated ftrace enter event id
- * @exit_id: associated ftrace exit event id
  * @enter_event: associated syscall_enter trace event
  * @exit_event: associated syscall_exit trace event
  */
 struct syscall_metadata {
 	const char	*name;
+	int		syscall_nr;
 	int		nb_args;
 	const char	**types;
 	const char	**args;
-	int		enter_id;
-	int		exit_id;
 
 	struct ftrace_event_call *enter_event;
 	struct ftrace_event_call *exit_event;
@@ -34,29 +32,28 @@
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern unsigned long arch_syscall_addr(int nr);
-extern int syscall_name_to_nr(char *name);
-void set_syscall_enter_id(int num, int id);
-void set_syscall_exit_id(int num, int id);
-extern struct trace_event event_syscall_enter;
-extern struct trace_event event_syscall_exit;
-extern int reg_event_syscall_enter(void *ptr);
-extern void unreg_event_syscall_enter(void *ptr);
-extern int reg_event_syscall_exit(void *ptr);
-extern void unreg_event_syscall_exit(void *ptr);
+extern int init_syscall_trace(struct ftrace_event_call *call);
+
 extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
 extern int syscall_exit_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
 extern int syscall_enter_define_fields(struct ftrace_event_call *call);
 extern int syscall_exit_define_fields(struct ftrace_event_call *call);
+extern int reg_event_syscall_enter(struct ftrace_event_call *call);
+extern void unreg_event_syscall_enter(struct ftrace_event_call *call);
+extern int reg_event_syscall_exit(struct ftrace_event_call *call);
+extern void unreg_event_syscall_exit(struct ftrace_event_call *call);
+extern int
+ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
 enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
 #ifdef CONFIG_EVENT_PROFILE
-int reg_prof_syscall_enter(char *name);
-void unreg_prof_syscall_enter(char *name);
-int reg_prof_syscall_exit(char *name);
-void unreg_prof_syscall_exit(char *name);
+int prof_sysenter_enable(struct ftrace_event_call *call);
+void prof_sysenter_disable(struct ftrace_event_call *call);
+int prof_sysexit_enable(struct ftrace_event_call *call);
+void prof_sysexit_disable(struct ftrace_event_call *call);
 
 #endif
 
diff --git a/kernel/Makefile b/kernel/Makefile
index dcf6789..982c50e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -21,6 +21,7 @@
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
+CFLAGS_REMOVE_perf_event.o = -pg
 endif
 
 obj-$(CONFIG_FREEZER) += freezer.o
@@ -97,6 +98,7 @@
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
 obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac..3f45e3c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
 #include <linux/init_task.h>
 #include <linux/perf_event.h>
 #include <trace/events/sched.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -978,6 +979,10 @@
 	proc_exit_connector(tsk);
 
 	/*
+	 * FIXME: do that only when needed, using sched_exit tracepoint
+	 */
+	flush_ptrace_hw_breakpoint(tsk);
+	/*
 	 * Flush inherited counters to the parent - before the parent
 	 * gets woken up by child-exit notifications.
 	 */
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644
index 0000000..cf5ee16
--- /dev/null
+++ b/kernel/hw_breakpoint.c
@@ -0,0 +1,423 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) IBM Corporation, 2009
+ * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Thanks to Ingo Molnar for his many suggestions.
+ *
+ * Authors: Alan Stern <stern@rowland.harvard.edu>
+ *          K.Prasad <prasad@linux.vnet.ibm.com>
+ *          Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ * This file contains the arch-independent routines.
+ */
+
+#include <linux/irqflags.h>
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <linux/hw_breakpoint.h>
+
+/*
+ * Constraints data
+ */
+
+/* Number of pinned cpu breakpoints in a cpu */
+static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
+
+/* Number of pinned task breakpoints in a cpu */
+static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
+
+/* Number of non-pinned cpu/task breakpoints in a cpu */
+static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
+
+/* Gather the number of total pinned and un-pinned bp in a cpuset */
+struct bp_busy_slots {
+	unsigned int pinned;
+	unsigned int flexible;
+};
+
+/* Serialize accesses to the above constraints */
+static DEFINE_MUTEX(nr_bp_mutex);
+
+/*
+ * Report the maximum number of pinned breakpoints a task
+ * have in this cpu
+ */
+static unsigned int max_task_bp_pinned(int cpu)
+{
+	int i;
+	unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
+
+	for (i = HBP_NUM -1; i >= 0; i--) {
+		if (tsk_pinned[i] > 0)
+			return i + 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Report the number of pinned/un-pinned breakpoints we have in
+ * a given cpu (cpu > -1) or in all of them (cpu = -1).
+ */
+static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
+{
+	if (cpu >= 0) {
+		slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
+		slots->pinned += max_task_bp_pinned(cpu);
+		slots->flexible = per_cpu(nr_bp_flexible, cpu);
+
+		return;
+	}
+
+	for_each_online_cpu(cpu) {
+		unsigned int nr;
+
+		nr = per_cpu(nr_cpu_bp_pinned, cpu);
+		nr += max_task_bp_pinned(cpu);
+
+		if (nr > slots->pinned)
+			slots->pinned = nr;
+
+		nr = per_cpu(nr_bp_flexible, cpu);
+
+		if (nr > slots->flexible)
+			slots->flexible = nr;
+	}
+}
+
+/*
+ * Add a pinned breakpoint for the given task in our constraint table
+ */
+static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
+{
+	int count = 0;
+	struct perf_event *bp;
+	struct perf_event_context *ctx = tsk->perf_event_ctxp;
+	unsigned int *tsk_pinned;
+	struct list_head *list;
+	unsigned long flags;
+
+	if (WARN_ONCE(!ctx, "No perf context for this task"))
+		return;
+
+	list = &ctx->event_list;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+
+	/*
+	 * The current breakpoint counter is not included in the list
+	 * at the open() callback time
+	 */
+	list_for_each_entry(bp, list, event_entry) {
+		if (bp->attr.type == PERF_TYPE_BREAKPOINT)
+			count++;
+	}
+
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
+		return;
+
+	tsk_pinned = per_cpu(task_bp_pinned, cpu);
+	if (enable) {
+		tsk_pinned[count]++;
+		if (count > 0)
+			tsk_pinned[count-1]--;
+	} else {
+		tsk_pinned[count]--;
+		if (count > 0)
+			tsk_pinned[count-1]++;
+	}
+}
+
+/*
+ * Add/remove the given breakpoint in our constraint table
+ */
+static void toggle_bp_slot(struct perf_event *bp, bool enable)
+{
+	int cpu = bp->cpu;
+	struct task_struct *tsk = bp->ctx->task;
+
+	/* Pinned counter task profiling */
+	if (tsk) {
+		if (cpu >= 0) {
+			toggle_bp_task_slot(tsk, cpu, enable);
+			return;
+		}
+
+		for_each_online_cpu(cpu)
+			toggle_bp_task_slot(tsk, cpu, enable);
+		return;
+	}
+
+	/* Pinned counter cpu profiling */
+	if (enable)
+		per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
+	else
+		per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
+}
+
+/*
+ * Contraints to check before allowing this new breakpoint counter:
+ *
+ *  == Non-pinned counter == (Considered as pinned for now)
+ *
+ *   - If attached to a single cpu, check:
+ *
+ *       (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
+ *           + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
+ *
+ *       -> If there are already non-pinned counters in this cpu, it means
+ *          there is already a free slot for them.
+ *          Otherwise, we check that the maximum number of per task
+ *          breakpoints (for this cpu) plus the number of per cpu breakpoint
+ *          (for this cpu) doesn't cover every registers.
+ *
+ *   - If attached to every cpus, check:
+ *
+ *       (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
+ *           + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
+ *
+ *       -> This is roughly the same, except we check the number of per cpu
+ *          bp for every cpu and we keep the max one. Same for the per tasks
+ *          breakpoints.
+ *
+ *
+ * == Pinned counter ==
+ *
+ *   - If attached to a single cpu, check:
+ *
+ *       ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
+ *            + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
+ *
+ *       -> Same checks as before. But now the nr_bp_flexible, if any, must keep
+ *          one register at least (or they will never be fed).
+ *
+ *   - If attached to every cpus, check:
+ *
+ *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
+ *            + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
+ */
+int reserve_bp_slot(struct perf_event *bp)
+{
+	struct bp_busy_slots slots = {0};
+	int ret = 0;
+
+	mutex_lock(&nr_bp_mutex);
+
+	fetch_bp_busy_slots(&slots, bp->cpu);
+
+	/* Flexible counters need to keep at least one slot */
+	if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
+		ret = -ENOSPC;
+		goto end;
+	}
+
+	toggle_bp_slot(bp, true);
+
+end:
+	mutex_unlock(&nr_bp_mutex);
+
+	return ret;
+}
+
+void release_bp_slot(struct perf_event *bp)
+{
+	mutex_lock(&nr_bp_mutex);
+
+	toggle_bp_slot(bp, false);
+
+	mutex_unlock(&nr_bp_mutex);
+}
+
+
+int __register_perf_hw_breakpoint(struct perf_event *bp)
+{
+	int ret;
+
+	ret = reserve_bp_slot(bp);
+	if (ret)
+		return ret;
+
+	/*
+	 * Ptrace breakpoints can be temporary perf events only
+	 * meant to reserve a slot. In this case, it is created disabled and
+	 * we don't want to check the params right now (as we put a null addr)
+	 * But perf tools create events as disabled and we want to check
+	 * the params for them.
+	 * This is a quick hack that will be removed soon, once we remove
+	 * the tmp breakpoints from ptrace
+	 */
+	if (!bp->attr.disabled || bp->callback == perf_bp_event)
+		ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+
+	return ret;
+}
+
+int register_perf_hw_breakpoint(struct perf_event *bp)
+{
+	bp->callback = perf_bp_event;
+
+	return __register_perf_hw_breakpoint(bp);
+}
+
+/**
+ * register_user_hw_breakpoint - register a hardware breakpoint for user space
+ * @attr: breakpoint attributes
+ * @triggered: callback to trigger when we hit the breakpoint
+ * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ */
+struct perf_event *
+register_user_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered,
+			    struct task_struct *tsk)
+{
+	return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
+}
+EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
+
+/**
+ * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
+ * @bp: the breakpoint structure to modify
+ * @attr: new breakpoint attributes
+ * @triggered: callback to trigger when we hit the breakpoint
+ * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ */
+struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
+			  perf_callback_t triggered,
+			  struct task_struct *tsk)
+{
+	/*
+	 * FIXME: do it without unregistering
+	 * - We don't want to lose our slot
+	 * - If the new bp is incorrect, don't lose the older one
+	 */
+	unregister_hw_breakpoint(bp);
+
+	return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
+}
+EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
+
+/**
+ * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
+ * @bp: the breakpoint structure to unregister
+ */
+void unregister_hw_breakpoint(struct perf_event *bp)
+{
+	if (!bp)
+		return;
+	perf_event_release_kernel(bp);
+}
+EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
+
+/**
+ * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
+ * @attr: breakpoint attributes
+ * @triggered: callback to trigger when we hit the breakpoint
+ *
+ * @return a set of per_cpu pointers to perf events
+ */
+struct perf_event **
+register_wide_hw_breakpoint(struct perf_event_attr *attr,
+			    perf_callback_t triggered)
+{
+	struct perf_event **cpu_events, **pevent, *bp;
+	long err;
+	int cpu;
+
+	cpu_events = alloc_percpu(typeof(*cpu_events));
+	if (!cpu_events)
+		return ERR_PTR(-ENOMEM);
+
+	for_each_possible_cpu(cpu) {
+		pevent = per_cpu_ptr(cpu_events, cpu);
+		bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
+
+		*pevent = bp;
+
+		if (IS_ERR(bp)) {
+			err = PTR_ERR(bp);
+			goto fail;
+		}
+	}
+
+	return cpu_events;
+
+fail:
+	for_each_possible_cpu(cpu) {
+		pevent = per_cpu_ptr(cpu_events, cpu);
+		if (IS_ERR(*pevent))
+			break;
+		unregister_hw_breakpoint(*pevent);
+	}
+	free_percpu(cpu_events);
+	/* return the error if any */
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
+
+/**
+ * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
+ * @cpu_events: the per cpu set of events to unregister
+ */
+void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
+{
+	int cpu;
+	struct perf_event **pevent;
+
+	for_each_possible_cpu(cpu) {
+		pevent = per_cpu_ptr(cpu_events, cpu);
+		unregister_hw_breakpoint(*pevent);
+	}
+	free_percpu(cpu_events);
+}
+EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
+
+static struct notifier_block hw_breakpoint_exceptions_nb = {
+	.notifier_call = hw_breakpoint_exceptions_notify,
+	/* we need to be notified first */
+	.priority = 0x7fffffff
+};
+
+static int __init init_hw_breakpoint(void)
+{
+	return register_die_notifier(&hw_breakpoint_exceptions_nb);
+}
+core_initcall(init_hw_breakpoint);
+
+
+struct pmu perf_ops_bp = {
+	.enable		= arch_install_hw_breakpoint,
+	.disable	= arch_uninstall_hw_breakpoint,
+	.read		= hw_breakpoint_pmu_read,
+	.unthrottle	= hw_breakpoint_pmu_unthrottle
+};
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8b6b8b6..8e5288a 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -181,6 +181,7 @@
 	}
 	return module_kallsyms_lookup_name(name);
 }
+EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
 
 int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
 				      unsigned long),
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1494e85..e5342a3 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -90,6 +90,9 @@
  */
 static struct kprobe_blackpoint kprobe_blacklist[] = {
 	{"preempt_schedule",},
+	{"native_get_debugreg",},
+	{"irq_entries_start",},
+	{"common_interrupt",},
 	{NULL}    /* Terminator */
 };
 
@@ -673,6 +676,40 @@
 	return (kprobe_opcode_t *)(((char *)addr) + p->offset);
 }
 
+/* Check passed kprobe is valid and return kprobe in kprobe_table. */
+static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
+{
+	struct kprobe *old_p, *list_p;
+
+	old_p = get_kprobe(p->addr);
+	if (unlikely(!old_p))
+		return NULL;
+
+	if (p != old_p) {
+		list_for_each_entry_rcu(list_p, &old_p->list, list)
+			if (list_p == p)
+			/* kprobe p is a valid probe */
+				goto valid;
+		return NULL;
+	}
+valid:
+	return old_p;
+}
+
+/* Return error if the kprobe is being re-registered */
+static inline int check_kprobe_rereg(struct kprobe *p)
+{
+	int ret = 0;
+	struct kprobe *old_p;
+
+	mutex_lock(&kprobe_mutex);
+	old_p = __get_valid_kprobe(p);
+	if (old_p)
+		ret = -EINVAL;
+	mutex_unlock(&kprobe_mutex);
+	return ret;
+}
+
 int __kprobes register_kprobe(struct kprobe *p)
 {
 	int ret = 0;
@@ -685,6 +722,10 @@
 		return -EINVAL;
 	p->addr = addr;
 
+	ret = check_kprobe_rereg(p);
+	if (ret)
+		return ret;
+
 	preempt_disable();
 	if (!kernel_text_address((unsigned long) p->addr) ||
 	    in_kprobes_functions((unsigned long) p->addr)) {
@@ -754,26 +795,6 @@
 }
 EXPORT_SYMBOL_GPL(register_kprobe);
 
-/* Check passed kprobe is valid and return kprobe in kprobe_table. */
-static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
-{
-	struct kprobe *old_p, *list_p;
-
-	old_p = get_kprobe(p->addr);
-	if (unlikely(!old_p))
-		return NULL;
-
-	if (p != old_p) {
-		list_for_each_entry_rcu(list_p, &old_p->list, list)
-			if (list_p == p)
-			/* kprobe p is a valid probe */
-				goto valid;
-		return NULL;
-	}
-valid:
-	return old_p;
-}
-
 /*
  * Unregister a kprobe without a scheduler synchronization.
  */
@@ -1141,6 +1162,13 @@
 	arch_remove_kprobe(p);
 }
 
+void __kprobes dump_kprobe(struct kprobe *kp)
+{
+	printk(KERN_WARNING "Dumping kprobe:\n");
+	printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
+	       kp->symbol_name, kp->addr, kp->offset);
+}
+
 /* Module notifier call back, checking kprobes on the module */
 static int __kprobes kprobes_module_callback(struct notifier_block *nb,
 					     unsigned long val, void *data)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 9af5672..f5dcd36 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -49,7 +49,7 @@
 #include "lockdep_internals.h"
 
 #define CREATE_TRACE_POINTS
-#include <trace/events/lockdep.h>
+#include <trace/events/lock.h>
 
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 61d5aa5..acd24e7 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -558,7 +558,7 @@
 
 static ATOMIC_NOTIFIER_HEAD(die_chain);
 
-int notrace notify_die(enum die_val val, const char *str,
+int notrace __kprobes notify_die(enum die_val val, const char *str,
 	       struct pt_regs *regs, long err, int trap, int sig)
 {
 	struct die_args args = {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643..6b7ddba 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,6 +28,8 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
+#include <linux/ftrace_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/irq_regs.h>
 
@@ -244,6 +246,49 @@
 	put_ctx(ctx);
 }
 
+static inline u64 perf_clock(void)
+{
+	return cpu_clock(smp_processor_id());
+}
+
+/*
+ * Update the record of the current time in a context.
+ */
+static void update_context_time(struct perf_event_context *ctx)
+{
+	u64 now = perf_clock();
+
+	ctx->time += now - ctx->timestamp;
+	ctx->timestamp = now;
+}
+
+/*
+ * Update the total_time_enabled and total_time_running fields for a event.
+ */
+static void update_event_times(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	u64 run_end;
+
+	if (event->state < PERF_EVENT_STATE_INACTIVE ||
+	    event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
+		return;
+
+	if (ctx->is_active)
+		run_end = ctx->time;
+	else
+		run_end = event->tstamp_stopped;
+
+	event->total_time_enabled = run_end - event->tstamp_enabled;
+
+	if (event->state == PERF_EVENT_STATE_INACTIVE)
+		run_end = event->tstamp_stopped;
+	else
+		run_end = ctx->time;
+
+	event->total_time_running = run_end - event->tstamp_running;
+}
+
 /*
  * Add a event from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
@@ -292,6 +337,18 @@
 	if (event->group_leader != event)
 		event->group_leader->nr_siblings--;
 
+	update_event_times(event);
+
+	/*
+	 * If event was in error state, then keep it
+	 * that way, otherwise bogus counts will be
+	 * returned on read(). The only way to get out
+	 * of error state is by explicit re-enabling
+	 * of the event
+	 */
+	if (event->state > PERF_EVENT_STATE_OFF)
+		event->state = PERF_EVENT_STATE_OFF;
+
 	/*
 	 * If this was a group event with sibling events then
 	 * upgrade the siblings to singleton events by adding them
@@ -445,50 +502,11 @@
 	 * can remove the event safely, if the call above did not
 	 * succeed.
 	 */
-	if (!list_empty(&event->group_entry)) {
+	if (!list_empty(&event->group_entry))
 		list_del_event(event, ctx);
-	}
 	spin_unlock_irq(&ctx->lock);
 }
 
-static inline u64 perf_clock(void)
-{
-	return cpu_clock(smp_processor_id());
-}
-
-/*
- * Update the record of the current time in a context.
- */
-static void update_context_time(struct perf_event_context *ctx)
-{
-	u64 now = perf_clock();
-
-	ctx->time += now - ctx->timestamp;
-	ctx->timestamp = now;
-}
-
-/*
- * Update the total_time_enabled and total_time_running fields for a event.
- */
-static void update_event_times(struct perf_event *event)
-{
-	struct perf_event_context *ctx = event->ctx;
-	u64 run_end;
-
-	if (event->state < PERF_EVENT_STATE_INACTIVE ||
-	    event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
-		return;
-
-	event->total_time_enabled = ctx->time - event->tstamp_enabled;
-
-	if (event->state == PERF_EVENT_STATE_INACTIVE)
-		run_end = event->tstamp_stopped;
-	else
-		run_end = ctx->time;
-
-	event->total_time_running = run_end - event->tstamp_running;
-}
-
 /*
  * Update total_time_enabled and total_time_running for all events in a group.
  */
@@ -1031,10 +1049,10 @@
 	update_context_time(ctx);
 
 	perf_disable();
-	if (ctx->nr_active)
+	if (ctx->nr_active) {
 		list_for_each_entry(event, &ctx->group_list, group_entry)
 			group_sched_out(event, cpuctx, ctx);
-
+	}
 	perf_enable();
  out:
 	spin_unlock(&ctx->lock);
@@ -1059,8 +1077,6 @@
 		&& !ctx1->pin_count && !ctx2->pin_count;
 }
 
-static void __perf_event_read(void *event);
-
 static void __perf_event_sync_stat(struct perf_event *event,
 				     struct perf_event *next_event)
 {
@@ -1078,8 +1094,8 @@
 	 */
 	switch (event->state) {
 	case PERF_EVENT_STATE_ACTIVE:
-		__perf_event_read(event);
-		break;
+		event->pmu->read(event);
+		/* fall-through */
 
 	case PERF_EVENT_STATE_INACTIVE:
 		update_event_times(event);
@@ -1118,6 +1134,8 @@
 	if (!ctx->nr_stat)
 		return;
 
+	update_context_time(ctx);
+
 	event = list_first_entry(&ctx->event_list,
 				   struct perf_event, event_entry);
 
@@ -1161,8 +1179,6 @@
 	if (likely(!ctx || !cpuctx->task_ctx))
 		return;
 
-	update_context_time(ctx);
-
 	rcu_read_lock();
 	parent = rcu_dereference(ctx->parent_ctx);
 	next_ctx = next->perf_event_ctxp;
@@ -1515,7 +1531,6 @@
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event *event = info;
 	struct perf_event_context *ctx = event->ctx;
-	unsigned long flags;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -1527,12 +1542,12 @@
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	local_irq_save(flags);
-	if (ctx->is_active)
-		update_context_time(ctx);
-	event->pmu->read(event);
+	spin_lock(&ctx->lock);
+	update_context_time(ctx);
 	update_event_times(event);
-	local_irq_restore(flags);
+	spin_unlock(&ctx->lock);
+
+	event->pmu->read(event);
 }
 
 static u64 perf_event_read(struct perf_event *event)
@@ -1545,7 +1560,13 @@
 		smp_call_function_single(event->oncpu,
 					 __perf_event_read, event, 1);
 	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
+		struct perf_event_context *ctx = event->ctx;
+		unsigned long flags;
+
+		spin_lock_irqsave(&ctx->lock, flags);
+		update_context_time(ctx);
 		update_event_times(event);
+		spin_unlock_irqrestore(&ctx->lock, flags);
 	}
 
 	return atomic64_read(&event->count);
@@ -1658,6 +1679,8 @@
 	return ERR_PTR(err);
 }
 
+static void perf_event_free_filter(struct perf_event *event);
+
 static void free_event_rcu(struct rcu_head *head)
 {
 	struct perf_event *event;
@@ -1665,6 +1688,7 @@
 	event = container_of(head, struct perf_event, rcu_head);
 	if (event->ns)
 		put_pid_ns(event->ns);
+	perf_event_free_filter(event);
 	kfree(event);
 }
 
@@ -1696,16 +1720,10 @@
 	call_rcu(&event->rcu_head, free_event_rcu);
 }
 
-/*
- * Called when the last reference to the file is gone.
- */
-static int perf_release(struct inode *inode, struct file *file)
+int perf_event_release_kernel(struct perf_event *event)
 {
-	struct perf_event *event = file->private_data;
 	struct perf_event_context *ctx = event->ctx;
 
-	file->private_data = NULL;
-
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	perf_event_remove_from_context(event);
@@ -1720,6 +1738,19 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static int perf_release(struct inode *inode, struct file *file)
+{
+	struct perf_event *event = file->private_data;
+
+	file->private_data = NULL;
+
+	return perf_event_release_kernel(event);
+}
 
 static int perf_event_read_size(struct perf_event *event)
 {
@@ -1746,91 +1777,94 @@
 	return size;
 }
 
-static u64 perf_event_read_value(struct perf_event *event)
+u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
 	u64 total = 0;
 
+	*enabled = 0;
+	*running = 0;
+
+	mutex_lock(&event->child_mutex);
 	total += perf_event_read(event);
-	list_for_each_entry(child, &event->child_list, child_list)
+	*enabled += event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+	*running += event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+
+	list_for_each_entry(child, &event->child_list, child_list) {
 		total += perf_event_read(child);
+		*enabled += child->total_time_enabled;
+		*running += child->total_time_running;
+	}
+	mutex_unlock(&event->child_mutex);
 
 	return total;
 }
-
-static int perf_event_read_entry(struct perf_event *event,
-				   u64 read_format, char __user *buf)
-{
-	int n = 0, count = 0;
-	u64 values[2];
-
-	values[n++] = perf_event_read_value(event);
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_event_id(event);
-
-	count = n * sizeof(u64);
-
-	if (copy_to_user(buf, values, count))
-		return -EFAULT;
-
-	return count;
-}
+EXPORT_SYMBOL_GPL(perf_event_read_value);
 
 static int perf_event_read_group(struct perf_event *event,
 				   u64 read_format, char __user *buf)
 {
 	struct perf_event *leader = event->group_leader, *sub;
-	int n = 0, size = 0, err = -EFAULT;
-	u64 values[3];
+	int n = 0, size = 0, ret = -EFAULT;
+	struct perf_event_context *ctx = leader->ctx;
+	u64 values[5];
+	u64 count, enabled, running;
+
+	mutex_lock(&ctx->mutex);
+	count = perf_event_read_value(leader, &enabled, &running);
 
 	values[n++] = 1 + leader->nr_siblings;
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = leader->total_time_enabled +
-			atomic64_read(&leader->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = leader->total_time_running +
-			atomic64_read(&leader->child_total_time_running);
-	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = enabled;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = running;
+	values[n++] = count;
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(leader);
 
 	size = n * sizeof(u64);
 
 	if (copy_to_user(buf, values, size))
-		return -EFAULT;
+		goto unlock;
 
-	err = perf_event_read_entry(leader, read_format, buf + size);
-	if (err < 0)
-		return err;
-
-	size += err;
+	ret = size;
 
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		err = perf_event_read_entry(sub, read_format,
-				buf + size);
-		if (err < 0)
-			return err;
+		n = 0;
 
-		size += err;
+		values[n++] = perf_event_read_value(sub, &enabled, &running);
+		if (read_format & PERF_FORMAT_ID)
+			values[n++] = primary_event_id(sub);
+
+		size = n * sizeof(u64);
+
+		if (copy_to_user(buf + ret, values, size)) {
+			ret = -EFAULT;
+			goto unlock;
+		}
+
+		ret += size;
 	}
+unlock:
+	mutex_unlock(&ctx->mutex);
 
-	return size;
+	return ret;
 }
 
 static int perf_event_read_one(struct perf_event *event,
 				 u64 read_format, char __user *buf)
 {
+	u64 enabled, running;
 	u64 values[4];
 	int n = 0;
 
-	values[n++] = perf_event_read_value(event);
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = event->total_time_enabled +
-			atomic64_read(&event->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = event->total_time_running +
-			atomic64_read(&event->child_total_time_running);
-	}
+	values[n++] = perf_event_read_value(event, &enabled, &running);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = enabled;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = running;
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(event);
 
@@ -1861,12 +1895,10 @@
 		return -ENOSPC;
 
 	WARN_ON_ONCE(event->ctx->parent_ctx);
-	mutex_lock(&event->child_mutex);
 	if (read_format & PERF_FORMAT_GROUP)
 		ret = perf_event_read_group(event, read_format, buf);
 	else
 		ret = perf_event_read_one(event, read_format, buf);
-	mutex_unlock(&event->child_mutex);
 
 	return ret;
 }
@@ -1974,7 +2006,8 @@
 	return ret;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_filter(struct perf_event *event, void __user *arg);
 
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -2002,6 +2035,9 @@
 	case PERF_EVENT_IOC_SET_OUTPUT:
 		return perf_event_set_output(event, arg);
 
+	case PERF_EVENT_IOC_SET_FILTER:
+		return perf_event_set_filter(event, (void __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -2174,6 +2210,7 @@
 	perf_mmap_free_page((unsigned long)data->user_page);
 	for (i = 0; i < data->nr_pages; i++)
 		perf_mmap_free_page((unsigned long)data->data_pages[i]);
+	kfree(data);
 }
 
 #else
@@ -2214,6 +2251,7 @@
 		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
 
 	vfree(base);
+	kfree(data);
 }
 
 static void perf_mmap_data_free(struct perf_mmap_data *data)
@@ -2307,7 +2345,7 @@
 	}
 
 	if (!data->watermark)
-		data->watermark = max_t(long, PAGE_SIZE, max_size / 2);
+		data->watermark = max_size / 2;
 
 
 	rcu_assign_pointer(event->data, data);
@@ -2319,7 +2357,6 @@
 
 	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
 	perf_mmap_data_free(data);
-	kfree(data);
 }
 
 static void perf_mmap_data_release(struct perf_event *event)
@@ -2666,20 +2703,21 @@
 static void perf_output_lock(struct perf_output_handle *handle)
 {
 	struct perf_mmap_data *data = handle->data;
-	int cpu;
+	int cur, cpu = get_cpu();
 
 	handle->locked = 0;
 
-	local_irq_save(handle->flags);
-	cpu = smp_processor_id();
+	for (;;) {
+		cur = atomic_cmpxchg(&data->lock, -1, cpu);
+		if (cur == -1) {
+			handle->locked = 1;
+			break;
+		}
+		if (cur == cpu)
+			break;
 
-	if (in_nmi() && atomic_read(&data->lock) == cpu)
-		return;
-
-	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
 		cpu_relax();
-
-	handle->locked = 1;
+	}
 }
 
 static void perf_output_unlock(struct perf_output_handle *handle)
@@ -2725,7 +2763,7 @@
 	if (atomic_xchg(&data->wakeup, 0))
 		perf_output_wakeup(handle);
 out:
-	local_irq_restore(handle->flags);
+	put_cpu();
 }
 
 void perf_output_copy(struct perf_output_handle *handle,
@@ -3236,15 +3274,10 @@
 {
 	struct perf_event *event;
 
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (perf_event_task_match(event))
 			perf_event_task_output(event, task_event);
 	}
-	rcu_read_unlock();
 }
 
 static void perf_event_task_event(struct perf_task_event *task_event)
@@ -3252,11 +3285,11 @@
 	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx = task_event->task_ctx;
 
+	rcu_read_lock();
 	cpuctx = &get_cpu_var(perf_cpu_context);
 	perf_event_task_ctx(&cpuctx->ctx, task_event);
 	put_cpu_var(perf_cpu_context);
 
-	rcu_read_lock();
 	if (!ctx)
 		ctx = rcu_dereference(task_event->task->perf_event_ctxp);
 	if (ctx)
@@ -3348,15 +3381,10 @@
 {
 	struct perf_event *event;
 
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (perf_event_comm_match(event))
 			perf_event_comm_output(event, comm_event);
 	}
-	rcu_read_unlock();
 }
 
 static void perf_event_comm_event(struct perf_comm_event *comm_event)
@@ -3367,7 +3395,7 @@
 	char comm[TASK_COMM_LEN];
 
 	memset(comm, 0, sizeof(comm));
-	strncpy(comm, comm_event->task->comm, sizeof(comm));
+	strlcpy(comm, comm_event->task->comm, sizeof(comm));
 	size = ALIGN(strlen(comm)+1, sizeof(u64));
 
 	comm_event->comm = comm;
@@ -3375,11 +3403,11 @@
 
 	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
 
+	rcu_read_lock();
 	cpuctx = &get_cpu_var(perf_cpu_context);
 	perf_event_comm_ctx(&cpuctx->ctx, comm_event);
 	put_cpu_var(perf_cpu_context);
 
-	rcu_read_lock();
 	/*
 	 * doesn't really matter which of the child contexts the
 	 * events ends up in.
@@ -3472,15 +3500,10 @@
 {
 	struct perf_event *event;
 
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (perf_event_mmap_match(event, mmap_event))
 			perf_event_mmap_output(event, mmap_event);
 	}
-	rcu_read_unlock();
 }
 
 static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -3536,11 +3559,11 @@
 
 	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
 
+	rcu_read_lock();
 	cpuctx = &get_cpu_var(perf_cpu_context);
 	perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
 	put_cpu_var(perf_cpu_context);
 
-	rcu_read_lock();
 	/*
 	 * doesn't really matter which of the child contexts the
 	 * events ends up in.
@@ -3679,7 +3702,11 @@
 			perf_event_disable(event);
 	}
 
-	perf_event_output(event, nmi, data, regs);
+	if (event->overflow_handler)
+		event->overflow_handler(event, nmi, data, regs);
+	else
+		perf_event_output(event, nmi, data, regs);
+
 	return ret;
 }
 
@@ -3724,16 +3751,16 @@
 	return nr;
 }
 
-static void perf_swevent_overflow(struct perf_event *event,
+static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
 				    int nmi, struct perf_sample_data *data,
 				    struct pt_regs *regs)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int throttle = 0;
-	u64 overflow;
 
 	data->period = event->hw.last_period;
-	overflow = perf_swevent_set_period(event);
+	if (!overflow)
+		overflow = perf_swevent_set_period(event);
 
 	if (hwc->interrupts == MAX_INTERRUPTS)
 		return;
@@ -3766,14 +3793,19 @@
 
 	atomic64_add(nr, &event->count);
 
-	if (!hwc->sample_period)
-		return;
-
 	if (!regs)
 		return;
 
-	if (!atomic64_add_negative(nr, &hwc->period_left))
-		perf_swevent_overflow(event, nmi, data, regs);
+	if (!hwc->sample_period)
+		return;
+
+	if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
+		return perf_swevent_overflow(event, 1, nmi, data, regs);
+
+	if (atomic64_add_negative(nr, &hwc->period_left))
+		return;
+
+	perf_swevent_overflow(event, 0, nmi, data, regs);
 }
 
 static int perf_swevent_is_counting(struct perf_event *event)
@@ -3806,25 +3838,44 @@
 	return 1;
 }
 
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data);
+
+static int perf_exclude_event(struct perf_event *event,
+			      struct pt_regs *regs)
+{
+	if (regs) {
+		if (event->attr.exclude_user && user_mode(regs))
+			return 1;
+
+		if (event->attr.exclude_kernel && !user_mode(regs))
+			return 1;
+	}
+
+	return 0;
+}
+
 static int perf_swevent_match(struct perf_event *event,
 				enum perf_type_id type,
-				u32 event_id, struct pt_regs *regs)
+				u32 event_id,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
 {
 	if (!perf_swevent_is_counting(event))
 		return 0;
 
 	if (event->attr.type != type)
 		return 0;
+
 	if (event->attr.config != event_id)
 		return 0;
 
-	if (regs) {
-		if (event->attr.exclude_user && user_mode(regs))
-			return 0;
+	if (perf_exclude_event(event, regs))
+		return 0;
 
-		if (event->attr.exclude_kernel && !user_mode(regs))
-			return 0;
-	}
+	if (event->attr.type == PERF_TYPE_TRACEPOINT &&
+	    !perf_tp_event_match(event, data))
+		return 0;
 
 	return 1;
 }
@@ -3837,49 +3888,59 @@
 {
 	struct perf_event *event;
 
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-		if (perf_swevent_match(event, type, event_id, regs))
+		if (perf_swevent_match(event, type, event_id, data, regs))
 			perf_swevent_add(event, nr, nmi, data, regs);
 	}
-	rcu_read_unlock();
 }
 
-static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx)
+int perf_swevent_get_recursion_context(void)
 {
+	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+	int rctx;
+
 	if (in_nmi())
-		return &cpuctx->recursion[3];
+		rctx = 3;
+	else if (in_irq())
+		rctx = 2;
+	else if (in_softirq())
+		rctx = 1;
+	else
+		rctx = 0;
 
-	if (in_irq())
-		return &cpuctx->recursion[2];
+	if (cpuctx->recursion[rctx]) {
+		put_cpu_var(perf_cpu_context);
+		return -1;
+	}
 
-	if (in_softirq())
-		return &cpuctx->recursion[1];
+	cpuctx->recursion[rctx]++;
+	barrier();
 
-	return &cpuctx->recursion[0];
+	return rctx;
 }
+EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
+
+void perf_swevent_put_recursion_context(int rctx)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	barrier();
+	cpuctx->recursion[rctx]--;
+	put_cpu_var(perf_cpu_context);
+}
+EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
 
 static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 				    u64 nr, int nmi,
 				    struct perf_sample_data *data,
 				    struct pt_regs *regs)
 {
-	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
-	int *recursion = perf_swevent_recursion_context(cpuctx);
+	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
 
-	if (*recursion)
-		goto out;
-
-	(*recursion)++;
-	barrier();
-
+	cpuctx = &__get_cpu_var(perf_cpu_context);
+	rcu_read_lock();
 	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
 				 nr, nmi, data, regs);
-	rcu_read_lock();
 	/*
 	 * doesn't really matter which of the child contexts the
 	 * events ends up in.
@@ -3888,23 +3949,24 @@
 	if (ctx)
 		perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
 	rcu_read_unlock();
-
-	barrier();
-	(*recursion)--;
-
-out:
-	put_cpu_var(perf_cpu_context);
 }
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
 			    struct pt_regs *regs, u64 addr)
 {
-	struct perf_sample_data data = {
-		.addr = addr,
-	};
+	struct perf_sample_data data;
+	int rctx;
 
-	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi,
-				&data, regs);
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
+		return;
+
+	data.addr = addr;
+	data.raw  = NULL;
+
+	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
+
+	perf_swevent_put_recursion_context(rctx);
 }
 
 static void perf_swevent_read(struct perf_event *event)
@@ -3949,6 +4011,7 @@
 	event->pmu->read(event);
 
 	data.addr = 0;
+	data.period = event->hw.last_period;
 	regs = get_irq_regs();
 	/*
 	 * In case we exclude kernel IPs or are somehow not in interrupt
@@ -4108,6 +4171,7 @@
 };
 
 #ifdef CONFIG_EVENT_PROFILE
+
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 			  int entry_size)
 {
@@ -4126,13 +4190,21 @@
 	if (!regs)
 		regs = task_pt_regs(current);
 
+	/* Trace events already protected against recursion */
 	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
 				&data, regs);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
-extern int ftrace_profile_enable(int);
-extern void ftrace_profile_disable(int);
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data)
+{
+	void *record = data->raw->data;
+
+	if (likely(!event->filter) || filter_match_preds(event->filter, record))
+		return 1;
+	return 0;
+}
 
 static void tp_perf_event_destroy(struct perf_event *event)
 {
@@ -4157,11 +4229,99 @@
 
 	return &perf_ops_generic;
 }
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+	char *filter_str;
+	int ret;
+
+	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+		return -EINVAL;
+
+	filter_str = strndup_user(arg, PAGE_SIZE);
+	if (IS_ERR(filter_str))
+		return PTR_ERR(filter_str);
+
+	ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
+
+	kfree(filter_str);
+	return ret;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+	ftrace_profile_free_filter(event);
+}
+
 #else
+
+static int perf_tp_event_match(struct perf_event *event,
+				struct perf_sample_data *data)
+{
+	return 1;
+}
+
 static const struct pmu *tp_perf_event_init(struct perf_event *event)
 {
 	return NULL;
 }
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+	return -ENOENT;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+	release_bp_slot(event);
+}
+
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+	int err;
+	/*
+	 * The breakpoint is already filled if we haven't created the counter
+	 * through perf syscall
+	 * FIXME: manage to get trigerred to NULL if it comes from syscalls
+	 */
+	if (!bp->callback)
+		err = register_perf_hw_breakpoint(bp);
+	else
+		err = __register_perf_hw_breakpoint(bp);
+	if (err)
+		return ERR_PTR(err);
+
+	bp->destroy = bp_perf_event_destroy;
+
+	return &perf_ops_bp;
+}
+
+void perf_bp_event(struct perf_event *bp, void *data)
+{
+	struct perf_sample_data sample;
+	struct pt_regs *regs = data;
+
+	sample.addr = bp->attr.bp_addr;
+
+	if (!perf_exclude_event(bp, regs))
+		perf_swevent_add(bp, 1, 1, &sample, regs);
+}
+#else
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+	return NULL;
+}
+
+void perf_bp_event(struct perf_event *bp, void *regs)
+{
+}
 #endif
 
 atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -4208,6 +4368,8 @@
 	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
 	case PERF_COUNT_SW_CONTEXT_SWITCHES:
 	case PERF_COUNT_SW_CPU_MIGRATIONS:
+	case PERF_COUNT_SW_ALIGNMENT_FAULTS:
+	case PERF_COUNT_SW_EMULATION_FAULTS:
 		if (!event->parent) {
 			atomic_inc(&perf_swevent_enabled[event_id]);
 			event->destroy = sw_perf_event_destroy;
@@ -4228,6 +4390,7 @@
 		   struct perf_event_context *ctx,
 		   struct perf_event *group_leader,
 		   struct perf_event *parent_event,
+		   perf_callback_t callback,
 		   gfp_t gfpflags)
 {
 	const struct pmu *pmu;
@@ -4270,6 +4433,11 @@
 
 	event->state		= PERF_EVENT_STATE_INACTIVE;
 
+	if (!callback && parent_event)
+		callback = parent_event->callback;
+	
+	event->callback	= callback;
+
 	if (attr->disabled)
 		event->state = PERF_EVENT_STATE_OFF;
 
@@ -4304,6 +4472,11 @@
 		pmu = tp_perf_event_init(event);
 		break;
 
+	case PERF_TYPE_BREAKPOINT:
+		pmu = bp_perf_event_init(event);
+		break;
+
+
 	default:
 		break;
 	}
@@ -4416,7 +4589,7 @@
 	goto out;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd)
+static int perf_event_set_output(struct perf_event *event, int output_fd)
 {
 	struct perf_event *output_event = NULL;
 	struct file *output_file = NULL;
@@ -4546,7 +4719,7 @@
 	}
 
 	event = perf_event_alloc(&attr, cpu, ctx, group_leader,
-				     NULL, GFP_KERNEL);
+				     NULL, NULL, GFP_KERNEL);
 	err = PTR_ERR(event);
 	if (IS_ERR(event))
 		goto err_put_context;
@@ -4594,6 +4767,60 @@
 	return err;
 }
 
+/**
+ * perf_event_create_kernel_counter
+ *
+ * @attr: attributes of the counter to create
+ * @cpu: cpu in which the counter is bound
+ * @pid: task to profile
+ */
+struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
+				 pid_t pid, perf_callback_t callback)
+{
+	struct perf_event *event;
+	struct perf_event_context *ctx;
+	int err;
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
+
+	ctx = find_get_context(pid, cpu);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto err_exit;
+	}
+
+	event = perf_event_alloc(attr, cpu, ctx, NULL,
+				     NULL, callback, GFP_KERNEL);
+	if (IS_ERR(event)) {
+		err = PTR_ERR(event);
+		goto err_put_context;
+	}
+
+	event->filp = NULL;
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_install_in_context(ctx, event, cpu);
+	++ctx->generation;
+	mutex_unlock(&ctx->mutex);
+
+	event->owner = current;
+	get_task_struct(current);
+	mutex_lock(&current->perf_event_mutex);
+	list_add_tail(&event->owner_entry, &current->perf_event_list);
+	mutex_unlock(&current->perf_event_mutex);
+
+	return event;
+
+ err_put_context:
+	put_ctx(ctx);
+ err_exit:
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+
 /*
  * inherit a event from parent task to child task:
  */
@@ -4619,7 +4846,7 @@
 	child_event = perf_event_alloc(&parent_event->attr,
 					   parent_event->cpu, child_ctx,
 					   group_leader, parent_event,
-					   GFP_KERNEL);
+					   NULL, GFP_KERNEL);
 	if (IS_ERR(child_event))
 		return child_event;
 	get_ctx(child_ctx);
@@ -4637,6 +4864,8 @@
 	if (parent_event->attr.freq)
 		child_event->hw.sample_period = parent_event->hw.sample_period;
 
+	child_event->overflow_handler = parent_event->overflow_handler;
+
 	/*
 	 * Link it up in the child's context:
 	 */
@@ -4726,7 +4955,6 @@
 {
 	struct perf_event *parent_event;
 
-	update_event_times(child_event);
 	perf_event_remove_from_context(child_event);
 
 	parent_event = child_event->parent;
@@ -4778,6 +5006,7 @@
 	 * the events from it.
 	 */
 	unclone_ctx(child_ctx);
+	update_context_time(child_ctx);
 	spin_unlock_irqrestore(&child_ctx->lock, flags);
 
 	/*
diff --git a/kernel/signal.c b/kernel/signal.c
index fe08008..6b982f2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -28,7 +28,8 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
-#include <trace/events/sched.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/signal.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -856,7 +857,7 @@
 	struct sigqueue *q;
 	int override_rlimit;
 
-	trace_sched_signal_send(sig, t);
+	trace_signal_generate(sig, info, t);
 
 	assert_spin_locked(&t->sighand->siglock);
 
@@ -918,12 +919,21 @@
 			break;
 		}
 	} else if (!is_si_special(info)) {
-		if (sig >= SIGRTMIN && info->si_code != SI_USER)
-		/*
-		 * Queue overflow, abort.  We may abort if the signal was rt
-		 * and sent by user using something other than kill().
-		 */
+		if (sig >= SIGRTMIN && info->si_code != SI_USER) {
+			/*
+			 * Queue overflow, abort.  We may abort if the
+			 * signal was rt and sent by user using something
+			 * other than kill().
+			 */
+			trace_signal_overflow_fail(sig, group, info);
 			return -EAGAIN;
+		} else {
+			/*
+			 * This is a silent loss of information.  We still
+			 * send the signal, but the *info bits are lost.
+			 */
+			trace_signal_lose_info(sig, group, info);
+		}
 	}
 
 out_set:
@@ -1859,6 +1869,9 @@
 			ka = &sighand->action[signr-1];
 		}
 
+		/* Trace actually delivered signals. */
+		trace_signal_deliver(signr, info, ka);
+
 		if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
 			continue;
 		if (ka->sa.sa_handler != SIG_DFL) {
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512..d006554 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -339,6 +339,27 @@
 	  power management decisions, specifically the C-state and P-state
 	  behavior.
 
+config KSYM_TRACER
+	bool "Trace read and write access on kernel memory locations"
+	depends on HAVE_HW_BREAKPOINT
+	select TRACING
+	help
+	  This tracer helps find read and write operations on any given kernel
+	  symbol i.e. /proc/kallsyms.
+
+config PROFILE_KSYM_TRACER
+	bool "Profile all kernel memory accesses on 'watched' variables"
+	depends on KSYM_TRACER
+	help
+	  This tracer profiles kernel accesses on variables watched through the
+	  ksym tracer ftrace plugin. Depending upon the hardware, all read
+	  and write operations on kernel variables can be monitored for
+	  accesses.
+
+	  The results will be displayed in:
+	  /debugfs/tracing/profile_ksym
+
+	  Say N if unsure.
 
 config STACK_TRACER
 	bool "Trace max stack"
@@ -428,6 +449,23 @@
 
 	  If unsure, say N.
 
+config KPROBE_EVENT
+	depends on KPROBES
+	depends on X86
+	bool "Enable kprobes-based dynamic events"
+	select TRACING
+	default y
+	help
+	  This allows the user to add tracing events (similar to tracepoints) on the fly
+	  via the ftrace interface. See Documentation/trace/kprobetrace.txt
+	  for more details.
+
+	  Those events can be inserted wherever kprobes can probe, and record
+	  various register and memory values.
+
+	  This option is also required by perf-probe subcommand of perf tools. If
+	  you want to use perf tools, this option is strongly recommended.
+
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
 	depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac..cd9ecd8 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,8 @@
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
 obj-$(CONFIG_EVENT_TRACING) += power-traces.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a72c6e0..a1ca495 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -397,18 +397,21 @@
 	int ret;
 
 	ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
-			       "offset:0;\tsize:%u;\n",
-			       (unsigned int)sizeof(field.time_stamp));
+			       "offset:0;\tsize:%u;\tsigned:%u;\n",
+			       (unsigned int)sizeof(field.time_stamp),
+			       (unsigned int)is_signed_type(u64));
 
 	ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
-			       "offset:%u;\tsize:%u;\n",
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",
 			       (unsigned int)offsetof(typeof(field), commit),
-			       (unsigned int)sizeof(field.commit));
+			       (unsigned int)sizeof(field.commit),
+			       (unsigned int)is_signed_type(long));
 
 	ret = trace_seq_printf(s, "\tfield: char data;\t"
-			       "offset:%u;\tsize:%u;\n",
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",
 			       (unsigned int)offsetof(typeof(field), data),
-			       (unsigned int)BUF_PAGE_SIZE);
+			       (unsigned int)BUF_PAGE_SIZE,
+			       (unsigned int)is_signed_type(char));
 
 	return ret;
 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index acef8b4..1d7f483 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
 #include <linux/ftrace.h>
 #include <trace/boot.h>
 #include <linux/kmemtrace.h>
+#include <linux/hw_breakpoint.h>
 
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@
 	TRACE_KMEM_ALLOC,
 	TRACE_KMEM_FREE,
 	TRACE_BLK,
+	TRACE_KSYM,
 
 	__TRACE_LAST_TYPE,
 };
@@ -98,9 +100,32 @@
 struct syscall_trace_exit {
 	struct trace_entry	ent;
 	int			nr;
-	unsigned long		ret;
+	long			ret;
 };
 
+struct kprobe_trace_entry {
+	struct trace_entry	ent;
+	unsigned long		ip;
+	int			nargs;
+	unsigned long		args[];
+};
+
+#define SIZEOF_KPROBE_TRACE_ENTRY(n)			\
+	(offsetof(struct kprobe_trace_entry, args) +	\
+	(sizeof(unsigned long) * (n)))
+
+struct kretprobe_trace_entry {
+	struct trace_entry	ent;
+	unsigned long		func;
+	unsigned long		ret_ip;
+	int			nargs;
+	unsigned long		args[];
+};
+
+#define SIZEOF_KRETPROBE_TRACE_ENTRY(n)			\
+	(offsetof(struct kretprobe_trace_entry, args) +	\
+	(sizeof(unsigned long) * (n)))
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
@@ -209,6 +234,7 @@
 			  TRACE_KMEM_ALLOC);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
 			  TRACE_KMEM_FREE);	\
+		IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
 		__ftrace_bad_type();					\
 	} while (0)
 
@@ -364,6 +390,8 @@
 void unregister_tracer(struct tracer *type);
 int is_tracing_stopped(void);
 
+extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
+
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
@@ -438,6 +466,8 @@
 					 struct trace_array *tr);
 extern int trace_selftest_startup_hw_branches(struct tracer *trace,
 					      struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+					 struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
@@ -683,7 +713,6 @@
 	int			n_preds;
 	struct filter_pred	**preds;
 	char			*filter_string;
-	bool			no_reset;
 };
 
 struct event_subsystem {
@@ -703,7 +732,7 @@
 typedef int (*regex_match_func)(char *str, struct regex *r, int len);
 
 enum regex_type {
-	MATCH_FULL,
+	MATCH_FULL = 0,
 	MATCH_FRONT_ONLY,
 	MATCH_MIDDLE_ONLY,
 	MATCH_END_ONLY,
@@ -744,7 +773,8 @@
 		     struct ring_buffer *buffer,
 		     struct ring_buffer_event *event)
 {
-	if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+	if (unlikely(call->filter_active) &&
+	    !filter_match_preds(call->filter, rec)) {
 		ring_buffer_discard_commit(buffer, event);
 		return 1;
 	}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d72..c16a08f 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@
 	F_printk("type:%u call_site:%lx ptr:%p",
 		 __entry->type_id, __entry->call_site, __entry->ptr)
 );
+
+FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
+
+	TRACE_KSYM,
+
+	F_STRUCT(
+		__field(	unsigned long,	ip			  )
+		__field(	unsigned char,	type			  )
+		__array(	char	     ,	cmd,	   TASK_COMM_LEN  )
+		__field(	unsigned long,  addr			  )
+	),
+
+	F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
+		(void *)__entry->ip, (unsigned int)__entry->type,
+		(void *)__entry->addr,  __entry->cmd)
+);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 8d5c171..d9c60f8 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,17 +8,14 @@
 #include <linux/module.h>
 #include "trace.h"
 
-/*
- * We can't use a size but a type in alloc_percpu()
- * So let's create a dummy type that matches the desired size
- */
-typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
 
-char		*trace_profile_buf;
-EXPORT_SYMBOL_GPL(trace_profile_buf);
+char *perf_trace_buf;
+EXPORT_SYMBOL_GPL(perf_trace_buf);
 
-char		*trace_profile_buf_nmi;
-EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
+char *perf_trace_buf_nmi;
+EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
+
+typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
 
 /* Count the events in use (per event id, not per instance) */
 static int	total_profile_count;
@@ -32,20 +29,20 @@
 		return 0;
 
 	if (!total_profile_count) {
-		buf = (char *)alloc_percpu(profile_buf_t);
+		buf = (char *)alloc_percpu(perf_trace_t);
 		if (!buf)
 			goto fail_buf;
 
-		rcu_assign_pointer(trace_profile_buf, buf);
+		rcu_assign_pointer(perf_trace_buf, buf);
 
-		buf = (char *)alloc_percpu(profile_buf_t);
+		buf = (char *)alloc_percpu(perf_trace_t);
 		if (!buf)
 			goto fail_buf_nmi;
 
-		rcu_assign_pointer(trace_profile_buf_nmi, buf);
+		rcu_assign_pointer(perf_trace_buf_nmi, buf);
 	}
 
-	ret = event->profile_enable();
+	ret = event->profile_enable(event);
 	if (!ret) {
 		total_profile_count++;
 		return 0;
@@ -53,10 +50,10 @@
 
 fail_buf_nmi:
 	if (!total_profile_count) {
-		free_percpu(trace_profile_buf_nmi);
-		free_percpu(trace_profile_buf);
-		trace_profile_buf_nmi = NULL;
-		trace_profile_buf = NULL;
+		free_percpu(perf_trace_buf_nmi);
+		free_percpu(perf_trace_buf);
+		perf_trace_buf_nmi = NULL;
+		perf_trace_buf = NULL;
 	}
 fail_buf:
 	atomic_dec(&event->profile_count);
@@ -89,14 +86,14 @@
 	if (!atomic_add_negative(-1, &event->profile_count))
 		return;
 
-	event->profile_disable();
+	event->profile_disable(event);
 
 	if (!--total_profile_count) {
-		buf = trace_profile_buf;
-		rcu_assign_pointer(trace_profile_buf, NULL);
+		buf = perf_trace_buf;
+		rcu_assign_pointer(perf_trace_buf, NULL);
 
-		nmi_buf = trace_profile_buf_nmi;
-		rcu_assign_pointer(trace_profile_buf_nmi, NULL);
+		nmi_buf = perf_trace_buf_nmi;
+		rcu_assign_pointer(perf_trace_buf_nmi, NULL);
 
 		/*
 		 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5e9ffc3..1d18315 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -93,9 +93,7 @@
 }
 EXPORT_SYMBOL_GPL(trace_define_common_fields);
 
-#ifdef CONFIG_MODULES
-
-static void trace_destroy_fields(struct ftrace_event_call *call)
+void trace_destroy_fields(struct ftrace_event_call *call)
 {
 	struct ftrace_event_field *field, *next;
 
@@ -107,8 +105,6 @@
 	}
 }
 
-#endif /* CONFIG_MODULES */
-
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
@@ -117,14 +113,14 @@
 		if (call->enabled) {
 			call->enabled = 0;
 			tracing_stop_cmdline_record();
-			call->unregfunc(call->data);
+			call->unregfunc(call);
 		}
 		break;
 	case 1:
 		if (!call->enabled) {
 			call->enabled = 1;
 			tracing_start_cmdline_record();
-			call->regfunc(call->data);
+			call->regfunc(call);
 		}
 		break;
 	}
@@ -507,7 +503,7 @@
 #define FIELD(type, name)						\
 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
 	#type, "common_" #name, offsetof(typeof(field), name),		\
-		sizeof(field.name)
+		sizeof(field.name), is_signed_type(type)
 
 static int trace_write_header(struct trace_seq *s)
 {
@@ -515,17 +511,17 @@
 
 	/* struct trace_entry */
 	return trace_seq_printf(s,
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\n",
-				FIELD(unsigned short, type),
-				FIELD(unsigned char, flags),
-				FIELD(unsigned char, preempt_count),
-				FIELD(int, pid),
-				FIELD(int, lock_depth));
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+			"\n",
+			FIELD(unsigned short, type),
+			FIELD(unsigned char, flags),
+			FIELD(unsigned char, preempt_count),
+			FIELD(int, pid),
+			FIELD(int, lock_depth));
 }
 
 static ssize_t
@@ -937,27 +933,46 @@
 	return 0;
 }
 
-#define for_each_event(event, start, end)			\
-	for (event = start;					\
-	     (unsigned long)event < (unsigned long)end;		\
-	     event++)
+static int __trace_add_event_call(struct ftrace_event_call *call)
+{
+	struct dentry *d_events;
+	int ret;
 
-#ifdef CONFIG_MODULES
+	if (!call->name)
+		return -EINVAL;
 
-static LIST_HEAD(ftrace_module_file_list);
+	if (call->raw_init) {
+		ret = call->raw_init(call);
+		if (ret < 0) {
+			if (ret != -ENOSYS)
+				pr_warning("Could not initialize trace "
+				"events/%s\n", call->name);
+			return ret;
+		}
+	}
 
-/*
- * Modules must own their file_operations to keep up with
- * reference counting.
- */
-struct ftrace_module_file_ops {
-	struct list_head		list;
-	struct module			*mod;
-	struct file_operations		id;
-	struct file_operations		enable;
-	struct file_operations		format;
-	struct file_operations		filter;
-};
+	d_events = event_trace_events_dir();
+	if (!d_events)
+		return -ENOENT;
+
+	ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
+				&ftrace_enable_fops, &ftrace_event_filter_fops,
+				&ftrace_event_format_fops);
+	if (!ret)
+		list_add(&call->list, &ftrace_events);
+
+	return ret;
+}
+
+/* Add an additional event_call dynamically */
+int trace_add_event_call(struct ftrace_event_call *call)
+{
+	int ret;
+	mutex_lock(&event_mutex);
+	ret = __trace_add_event_call(call);
+	mutex_unlock(&event_mutex);
+	return ret;
+}
 
 static void remove_subsystem_dir(const char *name)
 {
@@ -985,6 +1000,53 @@
 	}
 }
 
+/*
+ * Must be called under locking both of event_mutex and trace_event_mutex.
+ */
+static void __trace_remove_event_call(struct ftrace_event_call *call)
+{
+	ftrace_event_enable_disable(call, 0);
+	if (call->event)
+		__unregister_ftrace_event(call->event);
+	debugfs_remove_recursive(call->dir);
+	list_del(&call->list);
+	trace_destroy_fields(call);
+	destroy_preds(call);
+	remove_subsystem_dir(call->system);
+}
+
+/* Remove an event_call */
+void trace_remove_event_call(struct ftrace_event_call *call)
+{
+	mutex_lock(&event_mutex);
+	down_write(&trace_event_mutex);
+	__trace_remove_event_call(call);
+	up_write(&trace_event_mutex);
+	mutex_unlock(&event_mutex);
+}
+
+#define for_each_event(event, start, end)			\
+	for (event = start;					\
+	     (unsigned long)event < (unsigned long)end;		\
+	     event++)
+
+#ifdef CONFIG_MODULES
+
+static LIST_HEAD(ftrace_module_file_list);
+
+/*
+ * Modules must own their file_operations to keep up with
+ * reference counting.
+ */
+struct ftrace_module_file_ops {
+	struct list_head		list;
+	struct module			*mod;
+	struct file_operations		id;
+	struct file_operations		enable;
+	struct file_operations		format;
+	struct file_operations		filter;
+};
+
 static struct ftrace_module_file_ops *
 trace_create_file_ops(struct module *mod)
 {
@@ -1042,7 +1104,7 @@
 		if (!call->name)
 			continue;
 		if (call->raw_init) {
-			ret = call->raw_init();
+			ret = call->raw_init(call);
 			if (ret < 0) {
 				if (ret != -ENOSYS)
 					pr_warning("Could not initialize trace "
@@ -1060,10 +1122,11 @@
 				return;
 		}
 		call->mod = mod;
-		list_add(&call->list, &ftrace_events);
-		event_create_dir(call, d_events,
-				 &file_ops->id, &file_ops->enable,
-				 &file_ops->filter, &file_ops->format);
+		ret = event_create_dir(call, d_events,
+				       &file_ops->id, &file_ops->enable,
+				       &file_ops->filter, &file_ops->format);
+		if (!ret)
+			list_add(&call->list, &ftrace_events);
 	}
 }
 
@@ -1077,14 +1140,7 @@
 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
 		if (call->mod == mod) {
 			found = true;
-			ftrace_event_enable_disable(call, 0);
-			if (call->event)
-				__unregister_ftrace_event(call->event);
-			debugfs_remove_recursive(call->dir);
-			list_del(&call->list);
-			trace_destroy_fields(call);
-			destroy_preds(call);
-			remove_subsystem_dir(call->system);
+			__trace_remove_event_call(call);
 		}
 	}
 
@@ -1202,7 +1258,7 @@
 		if (!call->name)
 			continue;
 		if (call->raw_init) {
-			ret = call->raw_init();
+			ret = call->raw_init(call);
 			if (ret < 0) {
 				if (ret != -ENOSYS)
 					pr_warning("Could not initialize trace "
@@ -1210,10 +1266,12 @@
 				continue;
 			}
 		}
-		list_add(&call->list, &ftrace_events);
-		event_create_dir(call, d_events, &ftrace_event_id_fops,
-				 &ftrace_enable_fops, &ftrace_event_filter_fops,
-				 &ftrace_event_format_fops);
+		ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
+				       &ftrace_enable_fops,
+				       &ftrace_event_filter_fops,
+				       &ftrace_event_format_fops);
+		if (!ret)
+			list_add(&call->list, &ftrace_events);
 	}
 
 	while (true) {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 9267201..50504cb 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/mutex.h>
+#include <linux/perf_event.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -29,6 +30,7 @@
 {
 	OP_OR,
 	OP_AND,
+	OP_GLOB,
 	OP_NE,
 	OP_EQ,
 	OP_LT,
@@ -46,16 +48,17 @@
 };
 
 static struct filter_op filter_ops[] = {
-	{ OP_OR, "||", 1 },
-	{ OP_AND, "&&", 2 },
-	{ OP_NE, "!=", 4 },
-	{ OP_EQ, "==", 4 },
-	{ OP_LT, "<", 5 },
-	{ OP_LE, "<=", 5 },
-	{ OP_GT, ">", 5 },
-	{ OP_GE, ">=", 5 },
-	{ OP_NONE, "OP_NONE", 0 },
-	{ OP_OPEN_PAREN, "(", 0 },
+	{ OP_OR,	"||",		1 },
+	{ OP_AND,	"&&",		2 },
+	{ OP_GLOB,	"~",		4 },
+	{ OP_NE,	"!=",		4 },
+	{ OP_EQ,	"==",		4 },
+	{ OP_LT,	"<",		5 },
+	{ OP_LE,	"<=",		5 },
+	{ OP_GT,	">",		5 },
+	{ OP_GE,	">=",		5 },
+	{ OP_NONE,	"OP_NONE",	0 },
+	{ OP_OPEN_PAREN, "(",		0 },
 };
 
 enum {
@@ -329,22 +332,18 @@
 	return type;
 }
 
-static int filter_build_regex(struct filter_pred *pred)
+static void filter_build_regex(struct filter_pred *pred)
 {
 	struct regex *r = &pred->regex;
-	char *search, *dup;
-	enum regex_type type;
-	int not;
+	char *search;
+	enum regex_type type = MATCH_FULL;
+	int not = 0;
 
-	type = filter_parse_regex(r->pattern, r->len, &search, &not);
-	dup = kstrdup(search, GFP_KERNEL);
-	if (!dup)
-		return -ENOMEM;
-
-	strcpy(r->pattern, dup);
-	kfree(dup);
-
-	r->len = strlen(r->pattern);
+	if (pred->op == OP_GLOB) {
+		type = filter_parse_regex(r->pattern, r->len, &search, &not);
+		r->len = strlen(search);
+		memmove(r->pattern, search, r->len+1);
+	}
 
 	switch (type) {
 	case MATCH_FULL:
@@ -362,14 +361,11 @@
 	}
 
 	pred->not ^= not;
-
-	return 0;
 }
 
 /* return 1 if event matches, 0 otherwise (discard) */
-int filter_match_preds(struct ftrace_event_call *call, void *rec)
+int filter_match_preds(struct event_filter *filter, void *rec)
 {
-	struct event_filter *filter = call->filter;
 	int match, top = 0, val1 = 0, val2 = 0;
 	int stack[MAX_FILTER_PRED];
 	struct filter_pred *pred;
@@ -542,9 +538,8 @@
 		filter->preds[i]->fn = filter_pred_none;
 }
 
-void destroy_preds(struct ftrace_event_call *call)
+static void __free_preds(struct event_filter *filter)
 {
-	struct event_filter *filter = call->filter;
 	int i;
 
 	if (!filter)
@@ -557,21 +552,24 @@
 	kfree(filter->preds);
 	kfree(filter->filter_string);
 	kfree(filter);
-	call->filter = NULL;
 }
 
-static int init_preds(struct ftrace_event_call *call)
+void destroy_preds(struct ftrace_event_call *call)
+{
+	__free_preds(call->filter);
+	call->filter = NULL;
+	call->filter_active = 0;
+}
+
+static struct event_filter *__alloc_preds(void)
 {
 	struct event_filter *filter;
 	struct filter_pred *pred;
 	int i;
 
-	if (call->filter)
-		return 0;
-
-	filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-	if (!call->filter)
-		return -ENOMEM;
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return ERR_PTR(-ENOMEM);
 
 	filter->n_preds = 0;
 
@@ -587,12 +585,24 @@
 		filter->preds[i] = pred;
 	}
 
-	return 0;
+	return filter;
 
 oom:
-	destroy_preds(call);
+	__free_preds(filter);
+	return ERR_PTR(-ENOMEM);
+}
 
-	return -ENOMEM;
+static int init_preds(struct ftrace_event_call *call)
+{
+	if (call->filter)
+		return 0;
+
+	call->filter_active = 0;
+	call->filter = __alloc_preds();
+	if (IS_ERR(call->filter))
+		return PTR_ERR(call->filter);
+
+	return 0;
 }
 
 static int init_subsystem_preds(struct event_subsystem *system)
@@ -615,14 +625,7 @@
 	return 0;
 }
 
-enum {
-	FILTER_DISABLE_ALL,
-	FILTER_INIT_NO_RESET,
-	FILTER_SKIP_NO_RESET,
-};
-
-static void filter_free_subsystem_preds(struct event_subsystem *system,
-					int flag)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
 {
 	struct ftrace_event_call *call;
 
@@ -633,14 +636,6 @@
 		if (strcmp(call->system, system->name) != 0)
 			continue;
 
-		if (flag == FILTER_INIT_NO_RESET) {
-			call->filter->no_reset = false;
-			continue;
-		}
-
-		if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
-			continue;
-
 		filter_disable_preds(call);
 		remove_filter_string(call->filter);
 	}
@@ -648,10 +643,10 @@
 
 static int filter_add_pred_fn(struct filter_parse_state *ps,
 			      struct ftrace_event_call *call,
+			      struct event_filter *filter,
 			      struct filter_pred *pred,
 			      filter_pred_fn_t fn)
 {
-	struct event_filter *filter = call->filter;
 	int idx, err;
 
 	if (filter->n_preds == MAX_FILTER_PRED) {
@@ -666,7 +661,6 @@
 		return err;
 
 	filter->n_preds++;
-	call->filter_active = 1;
 
 	return 0;
 }
@@ -691,7 +685,10 @@
 
 static int is_legal_op(struct ftrace_event_field *field, int op)
 {
-	if (is_string_field(field) && (op != OP_EQ && op != OP_NE))
+	if (is_string_field(field) &&
+	    (op != OP_EQ && op != OP_NE && op != OP_GLOB))
+		return 0;
+	if (!is_string_field(field) && op == OP_GLOB)
 		return 0;
 
 	return 1;
@@ -742,6 +739,7 @@
 
 static int filter_add_pred(struct filter_parse_state *ps,
 			   struct ftrace_event_call *call,
+			   struct event_filter *filter,
 			   struct filter_pred *pred,
 			   bool dry_run)
 {
@@ -776,15 +774,13 @@
 	}
 
 	if (is_string_field(field)) {
-		ret = filter_build_regex(pred);
-		if (ret)
-			return ret;
+		filter_build_regex(pred);
 
 		if (field->filter_type == FILTER_STATIC_STRING) {
 			fn = filter_pred_string;
 			pred->regex.field_len = field->size;
 		} else if (field->filter_type == FILTER_DYN_STRING)
-				fn = filter_pred_strloc;
+			fn = filter_pred_strloc;
 		else {
 			fn = filter_pred_pchar;
 			pred->regex.field_len = strlen(pred->regex.pattern);
@@ -813,45 +809,7 @@
 
 add_pred_fn:
 	if (!dry_run)
-		return filter_add_pred_fn(ps, call, pred, fn);
-	return 0;
-}
-
-static int filter_add_subsystem_pred(struct filter_parse_state *ps,
-				     struct event_subsystem *system,
-				     struct filter_pred *pred,
-				     char *filter_string,
-				     bool dry_run)
-{
-	struct ftrace_event_call *call;
-	int err = 0;
-	bool fail = true;
-
-	list_for_each_entry(call, &ftrace_events, list) {
-
-		if (!call->define_fields)
-			continue;
-
-		if (strcmp(call->system, system->name))
-			continue;
-
-		if (call->filter->no_reset)
-			continue;
-
-		err = filter_add_pred(ps, call, pred, dry_run);
-		if (err)
-			call->filter->no_reset = true;
-		else
-			fail = false;
-
-		if (!dry_run)
-			replace_filter_string(call->filter, filter_string);
-	}
-
-	if (fail) {
-		parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
-		return err;
-	}
+		return filter_add_pred_fn(ps, call, filter, pred, fn);
 	return 0;
 }
 
@@ -1209,8 +1167,8 @@
 	return 0;
 }
 
-static int replace_preds(struct event_subsystem *system,
-			 struct ftrace_event_call *call,
+static int replace_preds(struct ftrace_event_call *call,
+			 struct event_filter *filter,
 			 struct filter_parse_state *ps,
 			 char *filter_string,
 			 bool dry_run)
@@ -1257,11 +1215,7 @@
 add_pred:
 		if (!pred)
 			return -ENOMEM;
-		if (call)
-			err = filter_add_pred(ps, call, pred, false);
-		else
-			err = filter_add_subsystem_pred(ps, system, pred,
-						filter_string, dry_run);
+		err = filter_add_pred(ps, call, filter, pred, dry_run);
 		filter_free_pred(pred);
 		if (err)
 			return err;
@@ -1272,10 +1226,50 @@
 	return 0;
 }
 
+static int replace_system_preds(struct event_subsystem *system,
+				struct filter_parse_state *ps,
+				char *filter_string)
+{
+	struct ftrace_event_call *call;
+	bool fail = true;
+	int err;
+
+	list_for_each_entry(call, &ftrace_events, list) {
+		struct event_filter *filter = call->filter;
+
+		if (!call->define_fields)
+			continue;
+
+		if (strcmp(call->system, system->name) != 0)
+			continue;
+
+		/* try to see if the filter can be applied */
+		err = replace_preds(call, filter, ps, filter_string, true);
+		if (err)
+			continue;
+
+		/* really apply the filter */
+		filter_disable_preds(call);
+		err = replace_preds(call, filter, ps, filter_string, false);
+		if (err)
+			filter_disable_preds(call);
+		else {
+			call->filter_active = 1;
+			replace_filter_string(filter, filter_string);
+		}
+		fail = false;
+	}
+
+	if (fail) {
+		parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 {
 	int err;
-
 	struct filter_parse_state *ps;
 
 	mutex_lock(&event_mutex);
@@ -1287,8 +1281,7 @@
 	if (!strcmp(strstrip(filter_string), "0")) {
 		filter_disable_preds(call);
 		remove_filter_string(call->filter);
-		mutex_unlock(&event_mutex);
-		return 0;
+		goto out_unlock;
 	}
 
 	err = -ENOMEM;
@@ -1306,10 +1299,11 @@
 		goto out;
 	}
 
-	err = replace_preds(NULL, call, ps, filter_string, false);
+	err = replace_preds(call, call->filter, ps, filter_string, false);
 	if (err)
 		append_filter_err(ps, call->filter);
-
+	else
+		call->filter_active = 1;
 out:
 	filter_opstack_clear(ps);
 	postfix_clear(ps);
@@ -1324,7 +1318,6 @@
 				 char *filter_string)
 {
 	int err;
-
 	struct filter_parse_state *ps;
 
 	mutex_lock(&event_mutex);
@@ -1334,10 +1327,9 @@
 		goto out_unlock;
 
 	if (!strcmp(strstrip(filter_string), "0")) {
-		filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
+		filter_free_subsystem_preds(system);
 		remove_filter_string(system->filter);
-		mutex_unlock(&event_mutex);
-		return 0;
+		goto out_unlock;
 	}
 
 	err = -ENOMEM;
@@ -1354,23 +1346,9 @@
 		goto out;
 	}
 
-	filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET);
-
-	/* try to see the filter can be applied to which events */
-	err = replace_preds(system, NULL, ps, filter_string, true);
-	if (err) {
+	err = replace_system_preds(system, ps, filter_string);
+	if (err)
 		append_filter_err(ps, system->filter);
-		goto out;
-	}
-
-	filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET);
-
-	/* really apply the filter to the events */
-	err = replace_preds(system, NULL, ps, filter_string, false);
-	if (err) {
-		append_filter_err(ps, system->filter);
-		filter_free_subsystem_preds(system, 2);
-	}
 
 out:
 	filter_opstack_clear(ps);
@@ -1382,3 +1360,73 @@
 	return err;
 }
 
+#ifdef CONFIG_EVENT_PROFILE
+
+void ftrace_profile_free_filter(struct perf_event *event)
+{
+	struct event_filter *filter = event->filter;
+
+	event->filter = NULL;
+	__free_preds(filter);
+}
+
+int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+			      char *filter_str)
+{
+	int err;
+	struct event_filter *filter;
+	struct filter_parse_state *ps;
+	struct ftrace_event_call *call = NULL;
+
+	mutex_lock(&event_mutex);
+
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (call->id == event_id)
+			break;
+	}
+
+	err = -EINVAL;
+	if (!call)
+		goto out_unlock;
+
+	err = -EEXIST;
+	if (event->filter)
+		goto out_unlock;
+
+	filter = __alloc_preds();
+	if (IS_ERR(filter)) {
+		err = PTR_ERR(filter);
+		goto out_unlock;
+	}
+
+	err = -ENOMEM;
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		goto free_preds;
+
+	parse_init(ps, filter_ops, filter_str);
+	err = filter_parse(ps);
+	if (err)
+		goto free_ps;
+
+	err = replace_preds(call, filter, ps, filter_str, false);
+	if (!err)
+		event->filter = filter;
+
+free_ps:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+
+free_preds:
+	if (err)
+		__free_preds(filter);
+
+out_unlock:
+	mutex_unlock(&event_mutex);
+
+	return err;
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
+
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index c74848d..dff8c84 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -66,44 +66,47 @@
 #undef __field
 #define __field(type, item)						\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:%zu;\n",		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
 			       offsetof(typeof(field), item),		\
-			       sizeof(field.item));			\
+			       sizeof(field.item), is_signed_type(type)); \
 	if (!ret)							\
 		return 0;
 
 #undef __field_desc
 #define __field_desc(type, container, item)				\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:%zu;\n",		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
 			       offsetof(typeof(field), container.item),	\
-			       sizeof(field.container.item));		\
+			       sizeof(field.container.item),		\
+			       is_signed_type(type));			\
 	if (!ret)							\
 		return 0;
 
 #undef __array
 #define __array(type, item, len)					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-			       "offset:%zu;\tsize:%zu;\n",		\
-			       offsetof(typeof(field), item),	\
-			       sizeof(field.item));		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
+			       offsetof(typeof(field), item),		\
+			       sizeof(field.item), is_signed_type(type)); \
 	if (!ret)							\
 		return 0;
 
 #undef __array_desc
 #define __array_desc(type, container, item, len)			\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-			       "offset:%zu;\tsize:%zu;\n",		\
+			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
 			       offsetof(typeof(field), container.item),	\
-			       sizeof(field.container.item));		\
+			       sizeof(field.container.item),		\
+			       is_signed_type(type));			\
 	if (!ret)							\
 		return 0;
 
 #undef __dynamic_array
 #define __dynamic_array(type, item)					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:0;\n",		\
-			       offsetof(typeof(field), item));		\
+			       "offset:%zu;\tsize:0;\tsigned:%u;\n",	\
+			       offsetof(typeof(field), item),		\
+			       is_signed_type(type));			\
 	if (!ret)							\
 		return 0;
 
@@ -131,7 +134,6 @@
 
 #include "trace_entries.h"
 
-
 #undef __field
 #define __field(type, item)						\
 	ret = trace_define_field(event_call, #type, #item,		\
@@ -193,6 +195,11 @@
 
 #include "trace_entries.h"
 
+static int ftrace_raw_init_event(struct ftrace_event_call *call)
+{
+	INIT_LIST_HEAD(&call->fields);
+	return 0;
+}
 
 #undef __field
 #define __field(type, item)
@@ -211,7 +218,6 @@
 
 #undef FTRACE_ENTRY
 #define FTRACE_ENTRY(call, struct_name, type, tstruct, print)		\
-static int ftrace_raw_init_event_##call(void);				\
 									\
 struct ftrace_event_call __used						\
 __attribute__((__aligned__(4)))						\
@@ -219,14 +225,9 @@
 	.name			= #call,				\
 	.id			= type,					\
 	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= ftrace_raw_init_event,		\
 	.show_format		= ftrace_format_##call,			\
 	.define_fields		= ftrace_define_fields_##call,		\
 };									\
-static int ftrace_raw_init_event_##call(void)				\
-{									\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	return 0;							\
-}									\
 
 #include "trace_entries.h"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 0000000..aff5f80
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1523 @@
+/*
+ * Kprobes-based tracing events
+ *
+ * Created by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/kprobes.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/debugfs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/ptrace.h>
+#include <linux/perf_event.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+#define MAX_TRACE_ARGS 128
+#define MAX_ARGSTR_LEN 63
+#define MAX_EVENT_NAME_LEN 64
+#define KPROBE_EVENT_SYSTEM "kprobes"
+
+/* Reserved field names */
+#define FIELD_STRING_IP "__probe_ip"
+#define FIELD_STRING_NARGS "__probe_nargs"
+#define FIELD_STRING_RETIP "__probe_ret_ip"
+#define FIELD_STRING_FUNC "__probe_func"
+
+const char *reserved_field_names[] = {
+	"common_type",
+	"common_flags",
+	"common_preempt_count",
+	"common_pid",
+	"common_tgid",
+	"common_lock_depth",
+	FIELD_STRING_IP,
+	FIELD_STRING_NARGS,
+	FIELD_STRING_RETIP,
+	FIELD_STRING_FUNC,
+};
+
+struct fetch_func {
+	unsigned long (*func)(struct pt_regs *, void *);
+	void *data;
+};
+
+static __kprobes unsigned long call_fetch(struct fetch_func *f,
+					  struct pt_regs *regs)
+{
+	return f->func(regs, f->data);
+}
+
+/* fetch handlers */
+static __kprobes unsigned long fetch_register(struct pt_regs *regs,
+					      void *offset)
+{
+	return regs_get_register(regs, (unsigned int)((unsigned long)offset));
+}
+
+static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
+					   void *num)
+{
+	return regs_get_kernel_stack_nth(regs,
+					 (unsigned int)((unsigned long)num));
+}
+
+static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
+{
+	unsigned long retval;
+
+	if (probe_kernel_address(addr, retval))
+		return 0;
+	return retval;
+}
+
+static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
+{
+	return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
+}
+
+static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
+					      void *dummy)
+{
+	return regs_return_value(regs);
+}
+
+static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
+						   void *dummy)
+{
+	return kernel_stack_pointer(regs);
+}
+
+/* Memory fetching by symbol */
+struct symbol_cache {
+	char *symbol;
+	long offset;
+	unsigned long addr;
+};
+
+static unsigned long update_symbol_cache(struct symbol_cache *sc)
+{
+	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
+	if (sc->addr)
+		sc->addr += sc->offset;
+	return sc->addr;
+}
+
+static void free_symbol_cache(struct symbol_cache *sc)
+{
+	kfree(sc->symbol);
+	kfree(sc);
+}
+
+static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
+{
+	struct symbol_cache *sc;
+
+	if (!sym || strlen(sym) == 0)
+		return NULL;
+	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
+	if (!sc)
+		return NULL;
+
+	sc->symbol = kstrdup(sym, GFP_KERNEL);
+	if (!sc->symbol) {
+		kfree(sc);
+		return NULL;
+	}
+	sc->offset = offset;
+
+	update_symbol_cache(sc);
+	return sc;
+}
+
+static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
+{
+	struct symbol_cache *sc = data;
+
+	if (sc->addr)
+		return fetch_memory(regs, (void *)sc->addr);
+	else
+		return 0;
+}
+
+/* Special indirect memory access interface */
+struct indirect_fetch_data {
+	struct fetch_func orig;
+	long offset;
+};
+
+static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
+{
+	struct indirect_fetch_data *ind = data;
+	unsigned long addr;
+
+	addr = call_fetch(&ind->orig, regs);
+	if (addr) {
+		addr += ind->offset;
+		return fetch_memory(regs, (void *)addr);
+	} else
+		return 0;
+}
+
+static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
+{
+	if (data->orig.func == fetch_indirect)
+		free_indirect_fetch_data(data->orig.data);
+	else if (data->orig.func == fetch_symbol)
+		free_symbol_cache(data->orig.data);
+	kfree(data);
+}
+
+/**
+ * Kprobe event core functions
+ */
+
+struct probe_arg {
+	struct fetch_func	fetch;
+	const char		*name;
+};
+
+/* Flags for trace_probe */
+#define TP_FLAG_TRACE	1
+#define TP_FLAG_PROFILE	2
+
+struct trace_probe {
+	struct list_head	list;
+	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
+	unsigned long 		nhit;
+	unsigned int		flags;	/* For TP_FLAG_* */
+	const char		*symbol;	/* symbol name */
+	struct ftrace_event_call	call;
+	struct trace_event		event;
+	unsigned int		nr_args;
+	struct probe_arg	args[];
+};
+
+#define SIZEOF_TRACE_PROBE(n)			\
+	(offsetof(struct trace_probe, args) +	\
+	(sizeof(struct probe_arg) * (n)))
+
+static __kprobes int probe_is_return(struct trace_probe *tp)
+{
+	return tp->rp.handler != NULL;
+}
+
+static __kprobes const char *probe_symbol(struct trace_probe *tp)
+{
+	return tp->symbol ? tp->symbol : "unknown";
+}
+
+static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
+{
+	int ret = -EINVAL;
+
+	if (ff->func == fetch_argument)
+		ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
+	else if (ff->func == fetch_register) {
+		const char *name;
+		name = regs_query_register_name((unsigned int)((long)ff->data));
+		ret = snprintf(buf, n, "%%%s", name);
+	} else if (ff->func == fetch_stack)
+		ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
+	else if (ff->func == fetch_memory)
+		ret = snprintf(buf, n, "@0x%p", ff->data);
+	else if (ff->func == fetch_symbol) {
+		struct symbol_cache *sc = ff->data;
+		if (sc->offset)
+			ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
+					sc->offset);
+		else
+			ret = snprintf(buf, n, "@%s", sc->symbol);
+	} else if (ff->func == fetch_retvalue)
+		ret = snprintf(buf, n, "$retval");
+	else if (ff->func == fetch_stack_address)
+		ret = snprintf(buf, n, "$stack");
+	else if (ff->func == fetch_indirect) {
+		struct indirect_fetch_data *id = ff->data;
+		size_t l = 0;
+		ret = snprintf(buf, n, "%+ld(", id->offset);
+		if (ret >= n)
+			goto end;
+		l += ret;
+		ret = probe_arg_string(buf + l, n - l, &id->orig);
+		if (ret < 0)
+			goto end;
+		l += ret;
+		ret = snprintf(buf + l, n - l, ")");
+		ret += l;
+	}
+end:
+	if (ret >= n)
+		return -ENOSPC;
+	return ret;
+}
+
+static int register_probe_event(struct trace_probe *tp);
+static void unregister_probe_event(struct trace_probe *tp);
+
+static DEFINE_MUTEX(probe_lock);
+static LIST_HEAD(probe_list);
+
+static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
+static int kretprobe_dispatcher(struct kretprobe_instance *ri,
+				struct pt_regs *regs);
+
+/*
+ * Allocate new trace_probe and initialize it (including kprobes).
+ */
+static struct trace_probe *alloc_trace_probe(const char *group,
+					     const char *event,
+					     void *addr,
+					     const char *symbol,
+					     unsigned long offs,
+					     int nargs, int is_return)
+{
+	struct trace_probe *tp;
+
+	tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
+	if (!tp)
+		return ERR_PTR(-ENOMEM);
+
+	if (symbol) {
+		tp->symbol = kstrdup(symbol, GFP_KERNEL);
+		if (!tp->symbol)
+			goto error;
+		tp->rp.kp.symbol_name = tp->symbol;
+		tp->rp.kp.offset = offs;
+	} else
+		tp->rp.kp.addr = addr;
+
+	if (is_return)
+		tp->rp.handler = kretprobe_dispatcher;
+	else
+		tp->rp.kp.pre_handler = kprobe_dispatcher;
+
+	if (!event)
+		goto error;
+	tp->call.name = kstrdup(event, GFP_KERNEL);
+	if (!tp->call.name)
+		goto error;
+
+	if (!group)
+		goto error;
+	tp->call.system = kstrdup(group, GFP_KERNEL);
+	if (!tp->call.system)
+		goto error;
+
+	INIT_LIST_HEAD(&tp->list);
+	return tp;
+error:
+	kfree(tp->call.name);
+	kfree(tp->symbol);
+	kfree(tp);
+	return ERR_PTR(-ENOMEM);
+}
+
+static void free_probe_arg(struct probe_arg *arg)
+{
+	if (arg->fetch.func == fetch_symbol)
+		free_symbol_cache(arg->fetch.data);
+	else if (arg->fetch.func == fetch_indirect)
+		free_indirect_fetch_data(arg->fetch.data);
+	kfree(arg->name);
+}
+
+static void free_trace_probe(struct trace_probe *tp)
+{
+	int i;
+
+	for (i = 0; i < tp->nr_args; i++)
+		free_probe_arg(&tp->args[i]);
+
+	kfree(tp->call.system);
+	kfree(tp->call.name);
+	kfree(tp->symbol);
+	kfree(tp);
+}
+
+static struct trace_probe *find_probe_event(const char *event,
+					    const char *group)
+{
+	struct trace_probe *tp;
+
+	list_for_each_entry(tp, &probe_list, list)
+		if (strcmp(tp->call.name, event) == 0 &&
+		    strcmp(tp->call.system, group) == 0)
+			return tp;
+	return NULL;
+}
+
+/* Unregister a trace_probe and probe_event: call with locking probe_lock */
+static void unregister_trace_probe(struct trace_probe *tp)
+{
+	if (probe_is_return(tp))
+		unregister_kretprobe(&tp->rp);
+	else
+		unregister_kprobe(&tp->rp.kp);
+	list_del(&tp->list);
+	unregister_probe_event(tp);
+}
+
+/* Register a trace_probe and probe_event */
+static int register_trace_probe(struct trace_probe *tp)
+{
+	struct trace_probe *old_tp;
+	int ret;
+
+	mutex_lock(&probe_lock);
+
+	/* register as an event */
+	old_tp = find_probe_event(tp->call.name, tp->call.system);
+	if (old_tp) {
+		/* delete old event */
+		unregister_trace_probe(old_tp);
+		free_trace_probe(old_tp);
+	}
+	ret = register_probe_event(tp);
+	if (ret) {
+		pr_warning("Faild to register probe event(%d)\n", ret);
+		goto end;
+	}
+
+	tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
+	if (probe_is_return(tp))
+		ret = register_kretprobe(&tp->rp);
+	else
+		ret = register_kprobe(&tp->rp.kp);
+
+	if (ret) {
+		pr_warning("Could not insert probe(%d)\n", ret);
+		if (ret == -EILSEQ) {
+			pr_warning("Probing address(0x%p) is not an "
+				   "instruction boundary.\n",
+				   tp->rp.kp.addr);
+			ret = -EINVAL;
+		}
+		unregister_probe_event(tp);
+	} else
+		list_add_tail(&tp->list, &probe_list);
+end:
+	mutex_unlock(&probe_lock);
+	return ret;
+}
+
+/* Split symbol and offset. */
+static int split_symbol_offset(char *symbol, unsigned long *offset)
+{
+	char *tmp;
+	int ret;
+
+	if (!offset)
+		return -EINVAL;
+
+	tmp = strchr(symbol, '+');
+	if (tmp) {
+		/* skip sign because strict_strtol doesn't accept '+' */
+		ret = strict_strtoul(tmp + 1, 0, offset);
+		if (ret)
+			return ret;
+		*tmp = '\0';
+	} else
+		*offset = 0;
+	return 0;
+}
+
+#define PARAM_MAX_ARGS 16
+#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
+
+static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
+{
+	int ret = 0;
+	unsigned long param;
+
+	if (strcmp(arg, "retval") == 0) {
+		if (is_return) {
+			ff->func = fetch_retvalue;
+			ff->data = NULL;
+		} else
+			ret = -EINVAL;
+	} else if (strncmp(arg, "stack", 5) == 0) {
+		if (arg[5] == '\0') {
+			ff->func = fetch_stack_address;
+			ff->data = NULL;
+		} else if (isdigit(arg[5])) {
+			ret = strict_strtoul(arg + 5, 10, &param);
+			if (ret || param > PARAM_MAX_STACK)
+				ret = -EINVAL;
+			else {
+				ff->func = fetch_stack;
+				ff->data = (void *)param;
+			}
+		} else
+			ret = -EINVAL;
+	} else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
+		ret = strict_strtoul(arg + 3, 10, &param);
+		if (ret || param > PARAM_MAX_ARGS)
+			ret = -EINVAL;
+		else {
+			ff->func = fetch_argument;
+			ff->data = (void *)param;
+		}
+	} else
+		ret = -EINVAL;
+	return ret;
+}
+
+/* Recursive argument parser */
+static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
+{
+	int ret = 0;
+	unsigned long param;
+	long offset;
+	char *tmp;
+
+	switch (arg[0]) {
+	case '$':
+		ret = parse_probe_vars(arg + 1, ff, is_return);
+		break;
+	case '%':	/* named register */
+		ret = regs_query_register_offset(arg + 1);
+		if (ret >= 0) {
+			ff->func = fetch_register;
+			ff->data = (void *)(unsigned long)ret;
+			ret = 0;
+		}
+		break;
+	case '@':	/* memory or symbol */
+		if (isdigit(arg[1])) {
+			ret = strict_strtoul(arg + 1, 0, &param);
+			if (ret)
+				break;
+			ff->func = fetch_memory;
+			ff->data = (void *)param;
+		} else {
+			ret = split_symbol_offset(arg + 1, &offset);
+			if (ret)
+				break;
+			ff->data = alloc_symbol_cache(arg + 1, offset);
+			if (ff->data)
+				ff->func = fetch_symbol;
+			else
+				ret = -EINVAL;
+		}
+		break;
+	case '+':	/* indirect memory */
+	case '-':
+		tmp = strchr(arg, '(');
+		if (!tmp) {
+			ret = -EINVAL;
+			break;
+		}
+		*tmp = '\0';
+		ret = strict_strtol(arg + 1, 0, &offset);
+		if (ret)
+			break;
+		if (arg[0] == '-')
+			offset = -offset;
+		arg = tmp + 1;
+		tmp = strrchr(arg, ')');
+		if (tmp) {
+			struct indirect_fetch_data *id;
+			*tmp = '\0';
+			id = kzalloc(sizeof(struct indirect_fetch_data),
+				     GFP_KERNEL);
+			if (!id)
+				return -ENOMEM;
+			id->offset = offset;
+			ret = __parse_probe_arg(arg, &id->orig, is_return);
+			if (ret)
+				kfree(id);
+			else {
+				ff->func = fetch_indirect;
+				ff->data = (void *)id;
+			}
+		} else
+			ret = -EINVAL;
+		break;
+	default:
+		/* TODO: support custom handler */
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/* String length checking wrapper */
+static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
+{
+	if (strlen(arg) > MAX_ARGSTR_LEN) {
+		pr_info("Argument is too long.: %s\n",  arg);
+		return -ENOSPC;
+	}
+	return __parse_probe_arg(arg, ff, is_return);
+}
+
+/* Return 1 if name is reserved or already used by another argument */
+static int conflict_field_name(const char *name,
+			       struct probe_arg *args, int narg)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
+		if (strcmp(reserved_field_names[i], name) == 0)
+			return 1;
+	for (i = 0; i < narg; i++)
+		if (strcmp(args[i].name, name) == 0)
+			return 1;
+	return 0;
+}
+
+static int create_trace_probe(int argc, char **argv)
+{
+	/*
+	 * Argument syntax:
+	 *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
+	 *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
+	 * Fetch args:
+	 *  $argN	: fetch Nth of function argument. (N:0-)
+	 *  $retval	: fetch return value
+	 *  $stack	: fetch stack address
+	 *  $stackN	: fetch Nth of stack (N:0-)
+	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
+	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
+	 *  %REG	: fetch register REG
+	 * Indirect memory fetch:
+	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
+	 * Alias name of args:
+	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
+	 */
+	struct trace_probe *tp;
+	int i, ret = 0;
+	int is_return = 0;
+	char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
+	unsigned long offset = 0;
+	void *addr = NULL;
+	char buf[MAX_EVENT_NAME_LEN];
+
+	if (argc < 2) {
+		pr_info("Probe point is not specified.\n");
+		return -EINVAL;
+	}
+
+	if (argv[0][0] == 'p')
+		is_return = 0;
+	else if (argv[0][0] == 'r')
+		is_return = 1;
+	else {
+		pr_info("Probe definition must be started with 'p' or 'r'.\n");
+		return -EINVAL;
+	}
+
+	if (argv[0][1] == ':') {
+		event = &argv[0][2];
+		if (strchr(event, '/')) {
+			group = event;
+			event = strchr(group, '/') + 1;
+			event[-1] = '\0';
+			if (strlen(group) == 0) {
+				pr_info("Group name is not specifiled\n");
+				return -EINVAL;
+			}
+		}
+		if (strlen(event) == 0) {
+			pr_info("Event name is not specifiled\n");
+			return -EINVAL;
+		}
+	}
+
+	if (isdigit(argv[1][0])) {
+		if (is_return) {
+			pr_info("Return probe point must be a symbol.\n");
+			return -EINVAL;
+		}
+		/* an address specified */
+		ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
+		if (ret) {
+			pr_info("Failed to parse address.\n");
+			return ret;
+		}
+	} else {
+		/* a symbol specified */
+		symbol = argv[1];
+		/* TODO: support .init module functions */
+		ret = split_symbol_offset(symbol, &offset);
+		if (ret) {
+			pr_info("Failed to parse symbol.\n");
+			return ret;
+		}
+		if (offset && is_return) {
+			pr_info("Return probe must be used without offset.\n");
+			return -EINVAL;
+		}
+	}
+	argc -= 2; argv += 2;
+
+	/* setup a probe */
+	if (!group)
+		group = KPROBE_EVENT_SYSTEM;
+	if (!event) {
+		/* Make a new event name */
+		if (symbol)
+			snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
+				 is_return ? 'r' : 'p', symbol, offset);
+		else
+			snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
+				 is_return ? 'r' : 'p', addr);
+		event = buf;
+	}
+	tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
+			       is_return);
+	if (IS_ERR(tp)) {
+		pr_info("Failed to allocate trace_probe.(%d)\n",
+			(int)PTR_ERR(tp));
+		return PTR_ERR(tp);
+	}
+
+	/* parse arguments */
+	ret = 0;
+	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+		/* Parse argument name */
+		arg = strchr(argv[i], '=');
+		if (arg)
+			*arg++ = '\0';
+		else
+			arg = argv[i];
+
+		if (conflict_field_name(argv[i], tp->args, i)) {
+			pr_info("Argument%d name '%s' conflicts with "
+				"another field.\n", i, argv[i]);
+			ret = -EINVAL;
+			goto error;
+		}
+
+		tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+		if (!tp->args[i].name) {
+			pr_info("Failed to allocate argument%d name '%s'.\n",
+				i, argv[i]);
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		/* Parse fetch argument */
+		ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
+		if (ret) {
+			pr_info("Parse error at argument%d. (%d)\n", i, ret);
+			kfree(tp->args[i].name);
+			goto error;
+		}
+
+		tp->nr_args++;
+	}
+
+	ret = register_trace_probe(tp);
+	if (ret)
+		goto error;
+	return 0;
+
+error:
+	free_trace_probe(tp);
+	return ret;
+}
+
+static void cleanup_all_probes(void)
+{
+	struct trace_probe *tp;
+
+	mutex_lock(&probe_lock);
+	/* TODO: Use batch unregistration */
+	while (!list_empty(&probe_list)) {
+		tp = list_entry(probe_list.next, struct trace_probe, list);
+		unregister_trace_probe(tp);
+		free_trace_probe(tp);
+	}
+	mutex_unlock(&probe_lock);
+}
+
+
+/* Probes listing interfaces */
+static void *probes_seq_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&probe_lock);
+	return seq_list_start(&probe_list, *pos);
+}
+
+static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &probe_list, pos);
+}
+
+static void probes_seq_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&probe_lock);
+}
+
+static int probes_seq_show(struct seq_file *m, void *v)
+{
+	struct trace_probe *tp = v;
+	int i, ret;
+	char buf[MAX_ARGSTR_LEN + 1];
+
+	seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
+	seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
+
+	if (!tp->symbol)
+		seq_printf(m, " 0x%p", tp->rp.kp.addr);
+	else if (tp->rp.kp.offset)
+		seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
+	else
+		seq_printf(m, " %s", probe_symbol(tp));
+
+	for (i = 0; i < tp->nr_args; i++) {
+		ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
+		if (ret < 0) {
+			pr_warning("Argument%d decoding error(%d).\n", i, ret);
+			return ret;
+		}
+		seq_printf(m, " %s=%s", tp->args[i].name, buf);
+	}
+	seq_printf(m, "\n");
+	return 0;
+}
+
+static const struct seq_operations probes_seq_op = {
+	.start  = probes_seq_start,
+	.next   = probes_seq_next,
+	.stop   = probes_seq_stop,
+	.show   = probes_seq_show
+};
+
+static int probes_open(struct inode *inode, struct file *file)
+{
+	if ((file->f_mode & FMODE_WRITE) &&
+	    (file->f_flags & O_TRUNC))
+		cleanup_all_probes();
+
+	return seq_open(file, &probes_seq_op);
+}
+
+static int command_trace_probe(const char *buf)
+{
+	char **argv;
+	int argc = 0, ret = 0;
+
+	argv = argv_split(GFP_KERNEL, buf, &argc);
+	if (!argv)
+		return -ENOMEM;
+
+	if (argc)
+		ret = create_trace_probe(argc, argv);
+
+	argv_free(argv);
+	return ret;
+}
+
+#define WRITE_BUFSIZE 128
+
+static ssize_t probes_write(struct file *file, const char __user *buffer,
+			    size_t count, loff_t *ppos)
+{
+	char *kbuf, *tmp;
+	int ret;
+	size_t done;
+	size_t size;
+
+	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	ret = done = 0;
+	while (done < count) {
+		size = count - done;
+		if (size >= WRITE_BUFSIZE)
+			size = WRITE_BUFSIZE - 1;
+		if (copy_from_user(kbuf, buffer + done, size)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		kbuf[size] = '\0';
+		tmp = strchr(kbuf, '\n');
+		if (tmp) {
+			*tmp = '\0';
+			size = tmp - kbuf + 1;
+		} else if (done + size < count) {
+			pr_warning("Line length is too long: "
+				   "Should be less than %d.", WRITE_BUFSIZE);
+			ret = -EINVAL;
+			goto out;
+		}
+		done += size;
+		/* Remove comments */
+		tmp = strchr(kbuf, '#');
+		if (tmp)
+			*tmp = '\0';
+
+		ret = command_trace_probe(kbuf);
+		if (ret)
+			goto out;
+	}
+	ret = done;
+out:
+	kfree(kbuf);
+	return ret;
+}
+
+static const struct file_operations kprobe_events_ops = {
+	.owner          = THIS_MODULE,
+	.open           = probes_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+	.write		= probes_write,
+};
+
+/* Probes profiling interfaces */
+static int probes_profile_seq_show(struct seq_file *m, void *v)
+{
+	struct trace_probe *tp = v;
+
+	seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
+		   tp->rp.kp.nmissed);
+
+	return 0;
+}
+
+static const struct seq_operations profile_seq_op = {
+	.start  = probes_seq_start,
+	.next   = probes_seq_next,
+	.stop   = probes_seq_stop,
+	.show   = probes_profile_seq_show
+};
+
+static int profile_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &profile_seq_op);
+}
+
+static const struct file_operations kprobe_profile_ops = {
+	.owner          = THIS_MODULE,
+	.open           = profile_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+/* Kprobe handler */
+static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
+{
+	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+	struct kprobe_trace_entry *entry;
+	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
+	int size, i, pc;
+	unsigned long irq_flags;
+	struct ftrace_event_call *call = &tp->call;
+
+	tp->nhit++;
+
+	local_save_flags(irq_flags);
+	pc = preempt_count();
+
+	size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
+
+	event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
+						  irq_flags, pc);
+	if (!event)
+		return 0;
+
+	entry = ring_buffer_event_data(event);
+	entry->nargs = tp->nr_args;
+	entry->ip = (unsigned long)kp->addr;
+	for (i = 0; i < tp->nr_args; i++)
+		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
+
+	if (!filter_current_check_discard(buffer, call, entry, event))
+		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+	return 0;
+}
+
+/* Kretprobe handler */
+static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
+					  struct pt_regs *regs)
+{
+	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+	struct kretprobe_trace_entry *entry;
+	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
+	int size, i, pc;
+	unsigned long irq_flags;
+	struct ftrace_event_call *call = &tp->call;
+
+	local_save_flags(irq_flags);
+	pc = preempt_count();
+
+	size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
+
+	event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
+						  irq_flags, pc);
+	if (!event)
+		return 0;
+
+	entry = ring_buffer_event_data(event);
+	entry->nargs = tp->nr_args;
+	entry->func = (unsigned long)tp->rp.kp.addr;
+	entry->ret_ip = (unsigned long)ri->ret_addr;
+	for (i = 0; i < tp->nr_args; i++)
+		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
+
+	if (!filter_current_check_discard(buffer, call, entry, event))
+		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+
+	return 0;
+}
+
+/* Event entry printers */
+enum print_line_t
+print_kprobe_event(struct trace_iterator *iter, int flags)
+{
+	struct kprobe_trace_entry *field;
+	struct trace_seq *s = &iter->seq;
+	struct trace_event *event;
+	struct trace_probe *tp;
+	int i;
+
+	field = (struct kprobe_trace_entry *)iter->ent;
+	event = ftrace_find_event(field->ent.type);
+	tp = container_of(event, struct trace_probe, event);
+
+	if (!trace_seq_printf(s, "%s: (", tp->call.name))
+		goto partial;
+
+	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+		goto partial;
+
+	if (!trace_seq_puts(s, ")"))
+		goto partial;
+
+	for (i = 0; i < field->nargs; i++)
+		if (!trace_seq_printf(s, " %s=%lx",
+				      tp->args[i].name, field->args[i]))
+			goto partial;
+
+	if (!trace_seq_puts(s, "\n"))
+		goto partial;
+
+	return TRACE_TYPE_HANDLED;
+partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+enum print_line_t
+print_kretprobe_event(struct trace_iterator *iter, int flags)
+{
+	struct kretprobe_trace_entry *field;
+	struct trace_seq *s = &iter->seq;
+	struct trace_event *event;
+	struct trace_probe *tp;
+	int i;
+
+	field = (struct kretprobe_trace_entry *)iter->ent;
+	event = ftrace_find_event(field->ent.type);
+	tp = container_of(event, struct trace_probe, event);
+
+	if (!trace_seq_printf(s, "%s: (", tp->call.name))
+		goto partial;
+
+	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
+		goto partial;
+
+	if (!trace_seq_puts(s, " <- "))
+		goto partial;
+
+	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
+		goto partial;
+
+	if (!trace_seq_puts(s, ")"))
+		goto partial;
+
+	for (i = 0; i < field->nargs; i++)
+		if (!trace_seq_printf(s, " %s=%lx",
+				      tp->args[i].name, field->args[i]))
+			goto partial;
+
+	if (!trace_seq_puts(s, "\n"))
+		goto partial;
+
+	return TRACE_TYPE_HANDLED;
+partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int probe_event_enable(struct ftrace_event_call *call)
+{
+	struct trace_probe *tp = (struct trace_probe *)call->data;
+
+	tp->flags |= TP_FLAG_TRACE;
+	if (probe_is_return(tp))
+		return enable_kretprobe(&tp->rp);
+	else
+		return enable_kprobe(&tp->rp.kp);
+}
+
+static void probe_event_disable(struct ftrace_event_call *call)
+{
+	struct trace_probe *tp = (struct trace_probe *)call->data;
+
+	tp->flags &= ~TP_FLAG_TRACE;
+	if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
+		if (probe_is_return(tp))
+			disable_kretprobe(&tp->rp);
+		else
+			disable_kprobe(&tp->rp.kp);
+	}
+}
+
+static int probe_event_raw_init(struct ftrace_event_call *event_call)
+{
+	INIT_LIST_HEAD(&event_call->fields);
+
+	return 0;
+}
+
+#undef DEFINE_FIELD
+#define DEFINE_FIELD(type, item, name, is_signed)			\
+	do {								\
+		ret = trace_define_field(event_call, #type, name,	\
+					 offsetof(typeof(field), item),	\
+					 sizeof(field.item), is_signed, \
+					 FILTER_OTHER);			\
+		if (ret)						\
+			return ret;					\
+	} while (0)
+
+static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
+{
+	int ret, i;
+	struct kprobe_trace_entry field;
+	struct trace_probe *tp = (struct trace_probe *)event_call->data;
+
+	ret = trace_define_common_fields(event_call);
+	if (!ret)
+		return ret;
+
+	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
+	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
+	/* Set argument names as fields */
+	for (i = 0; i < tp->nr_args; i++)
+		DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
+	return 0;
+}
+
+static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
+{
+	int ret, i;
+	struct kretprobe_trace_entry field;
+	struct trace_probe *tp = (struct trace_probe *)event_call->data;
+
+	ret = trace_define_common_fields(event_call);
+	if (!ret)
+		return ret;
+
+	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
+	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
+	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
+	/* Set argument names as fields */
+	for (i = 0; i < tp->nr_args; i++)
+		DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
+	return 0;
+}
+
+static int __probe_event_show_format(struct trace_seq *s,
+				     struct trace_probe *tp, const char *fmt,
+				     const char *arg)
+{
+	int i;
+
+	/* Show format */
+	if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
+		return 0;
+
+	for (i = 0; i < tp->nr_args; i++)
+		if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
+			return 0;
+
+	if (!trace_seq_printf(s, "\", %s", arg))
+		return 0;
+
+	for (i = 0; i < tp->nr_args; i++)
+		if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
+			return 0;
+
+	return trace_seq_puts(s, "\n");
+}
+
+#undef SHOW_FIELD
+#define SHOW_FIELD(type, item, name)					\
+	do {								\
+		ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"	\
+				"offset:%u;\tsize:%u;\n", name,		\
+				(unsigned int)offsetof(typeof(field), item),\
+				(unsigned int)sizeof(type));		\
+		if (!ret)						\
+			return 0;					\
+	} while (0)
+
+static int kprobe_event_show_format(struct ftrace_event_call *call,
+				    struct trace_seq *s)
+{
+	struct kprobe_trace_entry field __attribute__((unused));
+	int ret, i;
+	struct trace_probe *tp = (struct trace_probe *)call->data;
+
+	SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
+	SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
+
+	/* Show fields */
+	for (i = 0; i < tp->nr_args; i++)
+		SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
+	trace_seq_puts(s, "\n");
+
+	return __probe_event_show_format(s, tp, "(%lx)",
+					 "REC->" FIELD_STRING_IP);
+}
+
+static int kretprobe_event_show_format(struct ftrace_event_call *call,
+				       struct trace_seq *s)
+{
+	struct kretprobe_trace_entry field __attribute__((unused));
+	int ret, i;
+	struct trace_probe *tp = (struct trace_probe *)call->data;
+
+	SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
+	SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
+	SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
+
+	/* Show fields */
+	for (i = 0; i < tp->nr_args; i++)
+		SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
+	trace_seq_puts(s, "\n");
+
+	return __probe_event_show_format(s, tp, "(%lx <- %lx)",
+					 "REC->" FIELD_STRING_FUNC
+					 ", REC->" FIELD_STRING_RETIP);
+}
+
+#ifdef CONFIG_EVENT_PROFILE
+
+/* Kprobe profile handler */
+static __kprobes int kprobe_profile_func(struct kprobe *kp,
+					 struct pt_regs *regs)
+{
+	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+	struct ftrace_event_call *call = &tp->call;
+	struct kprobe_trace_entry *entry;
+	struct trace_entry *ent;
+	int size, __size, i, pc, __cpu;
+	unsigned long irq_flags;
+	char *trace_buf;
+	char *raw_data;
+	int rctx;
+
+	pc = preempt_count();
+	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
+	size = ALIGN(__size + sizeof(u32), sizeof(u64));
+	size -= sizeof(u32);
+	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+		     "profile buffer not large enough"))
+		return 0;
+
+	/*
+	 * Protect the non nmi buffer
+	 * This also protects the rcu read side
+	 */
+	local_irq_save(irq_flags);
+
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
+		goto end_recursion;
+
+	__cpu = smp_processor_id();
+
+	if (in_nmi())
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
+	else
+		trace_buf = rcu_dereference(perf_trace_buf);
+
+	if (!trace_buf)
+		goto end;
+
+	raw_data = per_cpu_ptr(trace_buf, __cpu);
+
+	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+	entry = (struct kprobe_trace_entry *)raw_data;
+	ent = &entry->ent;
+
+	tracing_generic_entry_update(ent, irq_flags, pc);
+	ent->type = call->id;
+	entry->nargs = tp->nr_args;
+	entry->ip = (unsigned long)kp->addr;
+	for (i = 0; i < tp->nr_args; i++)
+		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
+	perf_tp_event(call->id, entry->ip, 1, entry, size);
+
+end:
+	perf_swevent_put_recursion_context(rctx);
+end_recursion:
+	local_irq_restore(irq_flags);
+
+	return 0;
+}
+
+/* Kretprobe profile handler */
+static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
+					    struct pt_regs *regs)
+{
+	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+	struct ftrace_event_call *call = &tp->call;
+	struct kretprobe_trace_entry *entry;
+	struct trace_entry *ent;
+	int size, __size, i, pc, __cpu;
+	unsigned long irq_flags;
+	char *trace_buf;
+	char *raw_data;
+	int rctx;
+
+	pc = preempt_count();
+	__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
+	size = ALIGN(__size + sizeof(u32), sizeof(u64));
+	size -= sizeof(u32);
+	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+		     "profile buffer not large enough"))
+		return 0;
+
+	/*
+	 * Protect the non nmi buffer
+	 * This also protects the rcu read side
+	 */
+	local_irq_save(irq_flags);
+
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
+		goto end_recursion;
+
+	__cpu = smp_processor_id();
+
+	if (in_nmi())
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
+	else
+		trace_buf = rcu_dereference(perf_trace_buf);
+
+	if (!trace_buf)
+		goto end;
+
+	raw_data = per_cpu_ptr(trace_buf, __cpu);
+
+	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+	entry = (struct kretprobe_trace_entry *)raw_data;
+	ent = &entry->ent;
+
+	tracing_generic_entry_update(ent, irq_flags, pc);
+	ent->type = call->id;
+	entry->nargs = tp->nr_args;
+	entry->func = (unsigned long)tp->rp.kp.addr;
+	entry->ret_ip = (unsigned long)ri->ret_addr;
+	for (i = 0; i < tp->nr_args; i++)
+		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
+	perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
+
+end:
+	perf_swevent_put_recursion_context(rctx);
+end_recursion:
+	local_irq_restore(irq_flags);
+
+	return 0;
+}
+
+static int probe_profile_enable(struct ftrace_event_call *call)
+{
+	struct trace_probe *tp = (struct trace_probe *)call->data;
+
+	tp->flags |= TP_FLAG_PROFILE;
+
+	if (probe_is_return(tp))
+		return enable_kretprobe(&tp->rp);
+	else
+		return enable_kprobe(&tp->rp.kp);
+}
+
+static void probe_profile_disable(struct ftrace_event_call *call)
+{
+	struct trace_probe *tp = (struct trace_probe *)call->data;
+
+	tp->flags &= ~TP_FLAG_PROFILE;
+
+	if (!(tp->flags & TP_FLAG_TRACE)) {
+		if (probe_is_return(tp))
+			disable_kretprobe(&tp->rp);
+		else
+			disable_kprobe(&tp->rp.kp);
+	}
+}
+#endif	/* CONFIG_EVENT_PROFILE */
+
+
+static __kprobes
+int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
+{
+	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+
+	if (tp->flags & TP_FLAG_TRACE)
+		kprobe_trace_func(kp, regs);
+#ifdef CONFIG_EVENT_PROFILE
+	if (tp->flags & TP_FLAG_PROFILE)
+		kprobe_profile_func(kp, regs);
+#endif	/* CONFIG_EVENT_PROFILE */
+	return 0;	/* We don't tweek kernel, so just return 0 */
+}
+
+static __kprobes
+int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+
+	if (tp->flags & TP_FLAG_TRACE)
+		kretprobe_trace_func(ri, regs);
+#ifdef CONFIG_EVENT_PROFILE
+	if (tp->flags & TP_FLAG_PROFILE)
+		kretprobe_profile_func(ri, regs);
+#endif	/* CONFIG_EVENT_PROFILE */
+	return 0;	/* We don't tweek kernel, so just return 0 */
+}
+
+static int register_probe_event(struct trace_probe *tp)
+{
+	struct ftrace_event_call *call = &tp->call;
+	int ret;
+
+	/* Initialize ftrace_event_call */
+	if (probe_is_return(tp)) {
+		tp->event.trace = print_kretprobe_event;
+		call->raw_init = probe_event_raw_init;
+		call->show_format = kretprobe_event_show_format;
+		call->define_fields = kretprobe_event_define_fields;
+	} else {
+		tp->event.trace = print_kprobe_event;
+		call->raw_init = probe_event_raw_init;
+		call->show_format = kprobe_event_show_format;
+		call->define_fields = kprobe_event_define_fields;
+	}
+	call->event = &tp->event;
+	call->id = register_ftrace_event(&tp->event);
+	if (!call->id)
+		return -ENODEV;
+	call->enabled = 0;
+	call->regfunc = probe_event_enable;
+	call->unregfunc = probe_event_disable;
+
+#ifdef CONFIG_EVENT_PROFILE
+	atomic_set(&call->profile_count, -1);
+	call->profile_enable = probe_profile_enable;
+	call->profile_disable = probe_profile_disable;
+#endif
+	call->data = tp;
+	ret = trace_add_event_call(call);
+	if (ret) {
+		pr_info("Failed to register kprobe event: %s\n", call->name);
+		unregister_ftrace_event(&tp->event);
+	}
+	return ret;
+}
+
+static void unregister_probe_event(struct trace_probe *tp)
+{
+	/* tp->event is unregistered in trace_remove_event_call() */
+	trace_remove_event_call(&tp->call);
+}
+
+/* Make a debugfs interface for controling probe points */
+static __init int init_kprobe_trace(void)
+{
+	struct dentry *d_tracer;
+	struct dentry *entry;
+
+	d_tracer = tracing_init_dentry();
+	if (!d_tracer)
+		return 0;
+
+	entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
+				    NULL, &kprobe_events_ops);
+
+	/* Event list interface */
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'kprobe_events' entry\n");
+
+	/* Profile interface */
+	entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
+				    NULL, &kprobe_profile_ops);
+
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'kprobe_profile' entry\n");
+	return 0;
+}
+fs_initcall(init_kprobe_trace);
+
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+
+static int kprobe_trace_selftest_target(int a1, int a2, int a3,
+					int a4, int a5, int a6)
+{
+	return a1 + a2 + a3 + a4 + a5 + a6;
+}
+
+static __init int kprobe_trace_self_tests_init(void)
+{
+	int ret;
+	int (*target)(int, int, int, int, int, int);
+
+	target = kprobe_trace_selftest_target;
+
+	pr_info("Testing kprobe tracing: ");
+
+	ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
+				  "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
+	if (WARN_ON_ONCE(ret))
+		pr_warning("error enabling function entry\n");
+
+	ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
+				  "$retval");
+	if (WARN_ON_ONCE(ret))
+		pr_warning("error enabling function return\n");
+
+	ret = target(1, 2, 3, 4, 5, 6);
+
+	cleanup_all_probes();
+
+	pr_cont("OK\n");
+	return 0;
+}
+
+late_initcall(kprobe_trace_self_tests_init);
+
+#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 0000000..ddfa0fd
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,550 @@
+/*
+ * trace_ksym.c - Kernel Symbol Tracer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+#include "trace_stat.h"
+#include "trace.h"
+
+#include <linux/hw_breakpoint.h>
+#include <asm/hw_breakpoint.h>
+
+/*
+ * For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HBP_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+
+struct trace_ksym {
+	struct perf_event	**ksym_hbp;
+	struct perf_event_attr	attr;
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+	unsigned long		counter;
+#endif
+	struct hlist_node	ksym_hlist;
+};
+
+static struct trace_array *ksym_trace_array;
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+static DEFINE_MUTEX(ksym_tracer_mutex);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+
+#define MAX_UL_INT 0xffffffff
+
+void ksym_collect_stats(unsigned long hbp_hit_addr)
+{
+	struct hlist_node *node;
+	struct trace_ksym *entry;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
+		if ((entry->attr.bp_addr == hbp_hit_addr) &&
+		    (entry->counter <= MAX_UL_INT)) {
+			entry->counter++;
+			break;
+		}
+	}
+	rcu_read_unlock();
+}
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+void ksym_hbp_handler(struct perf_event *hbp, void *data)
+{
+	struct ring_buffer_event *event;
+	struct ksym_trace_entry *entry;
+	struct pt_regs *regs = data;
+	struct ring_buffer *buffer;
+	int pc;
+
+	if (!ksym_tracing_enabled)
+		return;
+
+	buffer = ksym_trace_array->buffer;
+
+	pc = preempt_count();
+
+	event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
+							sizeof(*entry), 0, pc);
+	if (!event)
+		return;
+
+	entry		= ring_buffer_event_data(event);
+	entry->ip	= instruction_pointer(regs);
+	entry->type	= hw_breakpoint_type(hbp);
+	entry->addr	= hw_breakpoint_addr(hbp);
+	strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+	ksym_collect_stats(hw_breakpoint_addr(hbp));
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+	trace_buffer_unlock_commit(buffer, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *str)
+{
+	int access = 0;
+
+	if (str[0] == 'r')
+		access |= HW_BREAKPOINT_R;
+
+	if (str[1] == 'w')
+		access |= HW_BREAKPOINT_W;
+
+	if (str[2] == 'x')
+		access |= HW_BREAKPOINT_X;
+
+	switch (access) {
+	case HW_BREAKPOINT_R:
+	case HW_BREAKPOINT_W:
+	case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+		return access;
+	default:
+		return -EINVAL;
+	}
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ *    i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ *    <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+							unsigned long *addr)
+{
+	int ret;
+
+	*ksymname = strsep(&input_string, ":");
+	*addr = kallsyms_lookup_name(*ksymname);
+
+	/* Check for malformed request: (2), (1) and (5) */
+	if ((!input_string) ||
+	    (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
+	    (*addr == 0))
+		return -EINVAL;;
+
+	ret = ksym_trace_get_access_type(input_string);
+
+	return ret;
+}
+
+int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
+{
+	struct trace_ksym *entry;
+	int ret = -ENOMEM;
+
+	if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+		printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+		" new requests for tracing can be accepted now.\n",
+			KSYM_TRACER_MAX);
+		return -ENOSPC;
+	}
+
+	entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	hw_breakpoint_init(&entry->attr);
+
+	entry->attr.bp_type = op;
+	entry->attr.bp_addr = addr;
+	entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
+
+	ret = -EAGAIN;
+	entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
+					ksym_hbp_handler);
+
+	if (IS_ERR(entry->ksym_hbp)) {
+		ret = PTR_ERR(entry->ksym_hbp);
+		printk(KERN_INFO "ksym_tracer request failed. Try again"
+					" later!!\n");
+		goto err;
+	}
+
+	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
+	ksym_filter_entry_count++;
+
+	return 0;
+
+err:
+	kfree(entry);
+
+	return ret;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+						size_t count, loff_t *ppos)
+{
+	struct trace_ksym *entry;
+	struct hlist_node *node;
+	struct trace_seq *s;
+	ssize_t cnt = 0;
+	int ret;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+	trace_seq_init(s);
+
+	mutex_lock(&ksym_tracer_mutex);
+
+	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+		ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
+		if (entry->attr.bp_type == HW_BREAKPOINT_R)
+			ret = trace_seq_puts(s, "r--\n");
+		else if (entry->attr.bp_type == HW_BREAKPOINT_W)
+			ret = trace_seq_puts(s, "-w-\n");
+		else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
+			ret = trace_seq_puts(s, "rw-\n");
+		WARN_ON_ONCE(!ret);
+	}
+
+	cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+
+	mutex_unlock(&ksym_tracer_mutex);
+
+	kfree(s);
+
+	return cnt;
+}
+
+static void __ksym_trace_reset(void)
+{
+	struct trace_ksym *entry;
+	struct hlist_node *node, *node1;
+
+	mutex_lock(&ksym_tracer_mutex);
+	hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
+								ksym_hlist) {
+		unregister_wide_hw_breakpoint(entry->ksym_hbp);
+		ksym_filter_entry_count--;
+		hlist_del_rcu(&(entry->ksym_hlist));
+		synchronize_rcu();
+		kfree(entry);
+	}
+	mutex_unlock(&ksym_tracer_mutex);
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+					const char __user *buffer,
+						size_t count, loff_t *ppos)
+{
+	struct trace_ksym *entry;
+	struct hlist_node *node;
+	char *input_string, *ksymname = NULL;
+	unsigned long ksym_addr = 0;
+	int ret, op, changed = 0;
+
+	input_string = kzalloc(count + 1, GFP_KERNEL);
+	if (!input_string)
+		return -ENOMEM;
+
+	if (copy_from_user(input_string, buffer, count)) {
+		kfree(input_string);
+		return -EFAULT;
+	}
+	input_string[count] = '\0';
+
+	strstrip(input_string);
+
+	/*
+	 * Clear all breakpoints if:
+	 * 1: echo > ksym_trace_filter
+	 * 2: echo 0 > ksym_trace_filter
+	 * 3: echo "*:---" > ksym_trace_filter
+	 */
+	if (!input_string[0] || !strcmp(input_string, "0") ||
+	    !strcmp(input_string, "*:---")) {
+		__ksym_trace_reset();
+		kfree(input_string);
+		return count;
+	}
+
+	ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+	if (ret < 0) {
+		kfree(input_string);
+		return ret;
+	}
+
+	mutex_lock(&ksym_tracer_mutex);
+
+	ret = -EINVAL;
+	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+		if (entry->attr.bp_addr == ksym_addr) {
+			/* Check for malformed request: (6) */
+			if (entry->attr.bp_type != op)
+				changed = 1;
+			else
+				goto out;
+			break;
+		}
+	}
+	if (changed) {
+		unregister_wide_hw_breakpoint(entry->ksym_hbp);
+		entry->attr.bp_type = op;
+		ret = 0;
+		if (op > 0) {
+			entry->ksym_hbp =
+				register_wide_hw_breakpoint(&entry->attr,
+					ksym_hbp_handler);
+			if (IS_ERR(entry->ksym_hbp))
+				ret = PTR_ERR(entry->ksym_hbp);
+			else
+				goto out;
+		}
+		/* Error or "symbol:---" case: drop it */
+		ksym_filter_entry_count--;
+		hlist_del_rcu(&(entry->ksym_hlist));
+		synchronize_rcu();
+		kfree(entry);
+		goto out;
+	} else {
+		/* Check for malformed request: (4) */
+		if (op == 0)
+			goto out;
+		ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+	}
+out:
+	mutex_unlock(&ksym_tracer_mutex);
+
+	kfree(input_string);
+
+	if (!ret)
+		ret = count;
+	return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+	.open		= tracing_open_generic,
+	.read		= ksym_trace_filter_read,
+	.write		= ksym_trace_filter_write,
+};
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+	ksym_tracing_enabled = 0;
+	__ksym_trace_reset();
+}
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+	int cpu, ret = 0;
+
+	for_each_online_cpu(cpu)
+		tracing_reset(tr, cpu);
+	ksym_tracing_enabled = 1;
+	ksym_trace_array = tr;
+
+	return ret;
+}
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+	seq_puts(m,
+		 "#       TASK-PID   CPU#      Symbol                    "
+		 "Type    Function\n");
+	seq_puts(m,
+		 "#          |        |          |                       "
+		 " |         |\n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+	struct trace_entry *entry = iter->ent;
+	struct trace_seq *s = &iter->seq;
+	struct ksym_trace_entry *field;
+	char str[KSYM_SYMBOL_LEN];
+	int ret;
+
+	if (entry->type != TRACE_KSYM)
+		return TRACE_TYPE_UNHANDLED;
+
+	trace_assign_type(field, entry);
+
+	ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
+				entry->pid, iter->cpu, (char *)field->addr);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	switch (field->type) {
+	case HW_BREAKPOINT_R:
+		ret = trace_seq_printf(s, " R  ");
+		break;
+	case HW_BREAKPOINT_W:
+		ret = trace_seq_printf(s, " W  ");
+		break;
+	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+		ret = trace_seq_printf(s, " RW ");
+		break;
+	default:
+		return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	sprint_symbol(str, field->ip);
+	ret = trace_seq_printf(s, "%s\n", str);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+	.name		= "ksym_tracer",
+	.init		= ksym_trace_init,
+	.reset		= ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+	.selftest	= trace_selftest_startup_ksym,
+#endif
+	.print_header   = ksym_trace_print_header,
+	.print_line	= ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+	struct dentry *d_tracer;
+	struct dentry *entry;
+
+	d_tracer = tracing_init_dentry();
+	ksym_filter_entry_count = 0;
+
+	entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
+				    NULL, &ksym_tracing_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'ksym_trace_filter' file\n");
+
+	return register_tracer(&ksym_tracer);
+}
+device_initcall(init_ksym_trace);
+
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+static int ksym_tracer_stat_headers(struct seq_file *m)
+{
+	seq_puts(m, "  Access Type ");
+	seq_puts(m, "  Symbol                                       Counter\n");
+	seq_puts(m, "  ----------- ");
+	seq_puts(m, "  ------                                       -------\n");
+	return 0;
+}
+
+static int ksym_tracer_stat_show(struct seq_file *m, void *v)
+{
+	struct hlist_node *stat = v;
+	struct trace_ksym *entry;
+	int access_type = 0;
+	char fn_name[KSYM_NAME_LEN];
+
+	entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+
+	access_type = entry->attr.bp_type;
+
+	switch (access_type) {
+	case HW_BREAKPOINT_R:
+		seq_puts(m, "  R           ");
+		break;
+	case HW_BREAKPOINT_W:
+		seq_puts(m, "  W           ");
+		break;
+	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+		seq_puts(m, "  RW          ");
+		break;
+	default:
+		seq_puts(m, "  NA          ");
+	}
+
+	if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
+		seq_printf(m, "  %-36s", fn_name);
+	else
+		seq_printf(m, "  %-36s", "<NA>");
+	seq_printf(m, " %15lu\n", entry->counter);
+
+	return 0;
+}
+
+static void *ksym_tracer_stat_start(struct tracer_stat *trace)
+{
+	return ksym_filter_head.first;
+}
+
+static void *
+ksym_tracer_stat_next(void *v, int idx)
+{
+	struct hlist_node *stat = v;
+
+	return stat->next;
+}
+
+static struct tracer_stat ksym_tracer_stats = {
+	.name = "ksym_tracer",
+	.stat_start = ksym_tracer_stat_start,
+	.stat_next = ksym_tracer_stat_next,
+	.stat_headers = ksym_tracer_stat_headers,
+	.stat_show = ksym_tracer_stat_show
+};
+
+__init static int ksym_tracer_stat_init(void)
+{
+	int ret;
+
+	ret = register_stat_tracer(&ksym_tracer_stats);
+	if (ret) {
+		printk(KERN_WARNING "Warning: could not register "
+				    "ksym tracer stats\n");
+		return 1;
+	}
+
+	return 0;
+}
+fs_initcall(ksym_tracer_stat_init);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbab..dc98309 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@
 	case TRACE_GRAPH_ENT:
 	case TRACE_GRAPH_RET:
 	case TRACE_HW_BRANCHES:
+	case TRACE_KSYM:
 		return 1;
 	}
 	return 0;
@@ -808,3 +809,57 @@
 	return ret;
 }
 #endif /* CONFIG_HW_BRANCH_TRACER */
+
+#ifdef CONFIG_KSYM_TRACER
+static int ksym_selftest_dummy;
+
+int
+trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+	unsigned long count;
+	int ret;
+
+	/* start the tracing */
+	ret = tracer_init(trace, tr);
+	if (ret) {
+		warn_failed_init_tracer(trace, ret);
+		return ret;
+	}
+
+	ksym_selftest_dummy = 0;
+	/* Register the read-write tracing request */
+
+	ret = process_new_ksym_entry("ksym_selftest_dummy",
+				     HW_BREAKPOINT_R | HW_BREAKPOINT_W,
+					(unsigned long)(&ksym_selftest_dummy));
+
+	if (ret < 0) {
+		printk(KERN_CONT "ksym_trace read-write startup test failed\n");
+		goto ret_path;
+	}
+	/* Perform a read and a write operation over the dummy variable to
+	 * trigger the tracer
+	 */
+	if (ksym_selftest_dummy == 0)
+		ksym_selftest_dummy++;
+
+	/* stop the tracing. */
+	tracing_stop();
+	/* check the trace buffer */
+	ret = trace_test_buffer(tr, &count);
+	trace->reset(tr);
+	tracing_start();
+
+	/* read & write operations - one each is performed on the dummy variable
+	 * triggering two entries in the trace buffer
+	 */
+	if (!ret && count != 2) {
+		printk(KERN_CONT "Ksym tracer startup test failed");
+		ret = -1;
+	}
+
+ret_path:
+	return ret;
+}
+#endif /* CONFIG_KSYM_TRACER */
+
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index ddee9c5..57501d9 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -51,32 +51,6 @@
 	return syscalls_metadata[nr];
 }
 
-int syscall_name_to_nr(char *name)
-{
-	int i;
-
-	if (!syscalls_metadata)
-		return -1;
-
-	for (i = 0; i < NR_syscalls; i++) {
-		if (syscalls_metadata[i]) {
-			if (!strcmp(syscalls_metadata[i]->name, name))
-				return i;
-		}
-	}
-	return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
-	syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
-{
-	syscalls_metadata[num]->exit_id = id;
-}
-
 enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags)
 {
@@ -93,7 +67,7 @@
 	if (!entry)
 		goto end;
 
-	if (entry->enter_id != ent->type) {
+	if (entry->enter_event->id != ent->type) {
 		WARN_ON_ONCE(1);
 		goto end;
 	}
@@ -148,7 +122,7 @@
 		return TRACE_TYPE_HANDLED;
 	}
 
-	if (entry->exit_id != ent->type) {
+	if (entry->exit_event->id != ent->type) {
 		WARN_ON_ONCE(1);
 		return TRACE_TYPE_UNHANDLED;
 	}
@@ -166,24 +140,19 @@
 #define SYSCALL_FIELD(type, name)					\
 	sizeof(type) != sizeof(trace.name) ?				\
 		__bad_type_size() :					\
-		#type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
+		#type, #name, offsetof(typeof(trace), name),		\
+		sizeof(trace.name), is_signed_type(type)
 
 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 {
 	int i;
-	int nr;
 	int ret;
-	struct syscall_metadata *entry;
+	struct syscall_metadata *entry = call->data;
 	struct syscall_trace_enter trace;
 	int offset = offsetof(struct syscall_trace_enter, args);
 
-	nr = syscall_name_to_nr(call->data);
-	entry = syscall_nr_to_meta(nr);
-
-	if (!entry)
-		return 0;
-
-	ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
+	ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+			       "\tsigned:%u;\n",
 			       SYSCALL_FIELD(int, nr));
 	if (!ret)
 		return 0;
@@ -193,8 +162,10 @@
 				        entry->args[i]);
 		if (!ret)
 			return 0;
-		ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
-				       sizeof(unsigned long));
+		ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
+				       "\tsigned:%u;\n", offset,
+				       sizeof(unsigned long),
+				       is_signed_type(unsigned long));
 		if (!ret)
 			return 0;
 		offset += sizeof(unsigned long);
@@ -226,8 +197,10 @@
 	struct syscall_trace_exit trace;
 
 	ret = trace_seq_printf(s,
-			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
+			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+			       "\tsigned:%u;\n"
+			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+			       "\tsigned:%u;\n",
 			       SYSCALL_FIELD(int, nr),
 			       SYSCALL_FIELD(long, ret));
 	if (!ret)
@@ -239,22 +212,19 @@
 int syscall_enter_define_fields(struct ftrace_event_call *call)
 {
 	struct syscall_trace_enter trace;
-	struct syscall_metadata *meta;
+	struct syscall_metadata *meta = call->data;
 	int ret;
-	int nr;
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	nr = syscall_name_to_nr(call->data);
-	meta = syscall_nr_to_meta(nr);
-
-	if (!meta)
-		return 0;
-
 	ret = trace_define_common_fields(call);
 	if (ret)
 		return ret;
 
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	if (ret)
+		return ret;
+
 	for (i = 0; i < meta->nb_args; i++) {
 		ret = trace_define_field(call, meta->types[i],
 					 meta->args[i], offset,
@@ -275,7 +245,11 @@
 	if (ret)
 		return ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0,
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	if (ret)
+		return ret;
+
+	ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
 				 FILTER_OTHER);
 
 	return ret;
@@ -302,8 +276,8 @@
 
 	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
 
-	event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
-						  size, 0, 0);
+	event = trace_current_buffer_lock_reserve(&buffer,
+			sys_data->enter_event->id, size, 0, 0);
 	if (!event)
 		return;
 
@@ -334,8 +308,8 @@
 	if (!sys_data)
 		return;
 
-	event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
-				sizeof(*entry), 0, 0);
+	event = trace_current_buffer_lock_reserve(&buffer,
+			sys_data->exit_event->id, sizeof(*entry), 0, 0);
 	if (!event)
 		return;
 
@@ -348,14 +322,12 @@
 		trace_current_buffer_unlock_commit(buffer, event, 0, 0);
 }
 
-int reg_event_syscall_enter(void *ptr)
+int reg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	char *name;
 
-	name = (char *)ptr;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
 	mutex_lock(&syscall_trace_lock);
@@ -372,13 +344,11 @@
 	return ret;
 }
 
-void unreg_event_syscall_enter(void *ptr)
+void unreg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int num;
-	char *name;
 
-	name = (char *)ptr;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return;
 	mutex_lock(&syscall_trace_lock);
@@ -389,14 +359,12 @@
 	mutex_unlock(&syscall_trace_lock);
 }
 
-int reg_event_syscall_exit(void *ptr)
+int reg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
-	char *name;
 
-	name = (char *)ptr;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return -ENOSYS;
 	mutex_lock(&syscall_trace_lock);
@@ -413,13 +381,11 @@
 	return ret;
 }
 
-void unreg_event_syscall_exit(void *ptr)
+void unreg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int num;
-	char *name;
 
-	name = (char *)ptr;
-	num = syscall_name_to_nr(name);
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 	if (num < 0 || num >= NR_syscalls)
 		return;
 	mutex_lock(&syscall_trace_lock);
@@ -430,13 +396,17 @@
 	mutex_unlock(&syscall_trace_lock);
 }
 
-struct trace_event event_syscall_enter = {
-	.trace			= print_syscall_enter,
-};
+int init_syscall_trace(struct ftrace_event_call *call)
+{
+	int id;
 
-struct trace_event event_syscall_exit = {
-	.trace			= print_syscall_exit,
-};
+	id = register_ftrace_event(call->event);
+	if (!id)
+		return -ENODEV;
+	call->id = id;
+	INIT_LIST_HEAD(&call->fields);
+	return 0;
+}
 
 int __init init_ftrace_syscalls(void)
 {
@@ -454,6 +424,10 @@
 	for (i = 0; i < NR_syscalls; i++) {
 		addr = arch_syscall_addr(i);
 		meta = find_syscall_meta(addr);
+		if (!meta)
+			continue;
+
+		meta->syscall_nr = i;
 		syscalls_metadata[i] = meta;
 	}
 
@@ -473,8 +447,10 @@
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_enter *rec;
 	unsigned long flags;
+	char *trace_buf;
 	char *raw_data;
 	int syscall_nr;
+	int rctx;
 	int size;
 	int cpu;
 
@@ -498,41 +474,42 @@
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
 
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
+		goto end_recursion;
+
 	cpu = smp_processor_id();
 
-	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
-	else
-		raw_data = rcu_dereference(trace_profile_buf);
+	trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, cpu);
+	raw_data = per_cpu_ptr(trace_buf, cpu);
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
 
 	rec = (struct syscall_trace_enter *) raw_data;
 	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->enter_id;
+	rec->ent.type = sys_data->enter_event->id;
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-	perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
+	perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
 
 end:
+	perf_swevent_put_recursion_context(rctx);
+end_recursion:
 	local_irq_restore(flags);
 }
 
-int reg_prof_syscall_enter(char *name)
+int prof_sysenter_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return -ENOSYS;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_prof_refcount_enter)
@@ -548,13 +525,11 @@
 	return ret;
 }
 
-void unreg_prof_syscall_enter(char *name)
+void prof_sysenter_disable(struct ftrace_event_call *call)
 {
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	sys_prof_refcount_enter--;
@@ -570,7 +545,9 @@
 	struct syscall_trace_exit *rec;
 	unsigned long flags;
 	int syscall_nr;
+	char *trace_buf;
 	char *raw_data;
+	int rctx;
 	int size;
 	int cpu;
 
@@ -596,17 +573,19 @@
 
 	/* Protect the per cpu buffer, begin the rcu read side */
 	local_irq_save(flags);
+
+	rctx = perf_swevent_get_recursion_context();
+	if (rctx < 0)
+		goto end_recursion;
+
 	cpu = smp_processor_id();
 
-	if (in_nmi())
-		raw_data = rcu_dereference(trace_profile_buf_nmi);
-	else
-		raw_data = rcu_dereference(trace_profile_buf);
+	trace_buf = rcu_dereference(perf_trace_buf);
 
-	if (!raw_data)
+	if (!trace_buf)
 		goto end;
 
-	raw_data = per_cpu_ptr(raw_data, cpu);
+	raw_data = per_cpu_ptr(trace_buf, cpu);
 
 	/* zero the dead bytes from align to not leak stack to user */
 	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -614,24 +593,24 @@
 	rec = (struct syscall_trace_exit *)raw_data;
 
 	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->exit_id;
+	rec->ent.type = sys_data->exit_event->id;
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 
-	perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
+	perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
 
 end:
+	perf_swevent_put_recursion_context(rctx);
+end_recursion:
 	local_irq_restore(flags);
 }
 
-int reg_prof_syscall_exit(char *name)
+int prof_sysexit_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return -ENOSYS;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_prof_refcount_exit)
@@ -647,13 +626,11 @@
 	return ret;
 }
 
-void unreg_prof_syscall_exit(char *name)
+void prof_sysexit_disable(struct ftrace_event_call *call)
 {
 	int num;
 
-	num = syscall_name_to_nr(name);
-	if (num < 0 || num >= NR_syscalls)
-		return;
+	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
 	sys_prof_refcount_exit--;
diff --git a/samples/Kconfig b/samples/Kconfig
index b92bde3..e4be84a 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -40,5 +40,11 @@
 	default m
 	depends on SAMPLE_KPROBES && KRETPROBES
 
+config SAMPLE_HW_BREAKPOINT
+	tristate "Build kernel hardware breakpoint examples -- loadable module only"
+	depends on HAVE_HW_BREAKPOINT && m
+	help
+	  This builds kernel hardware breakpoint example modules.
+
 endif # SAMPLES
 
diff --git a/samples/Makefile b/samples/Makefile
index 43343a0..0f15e6d 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,4 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ tracepoints/ trace_events/
+obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ tracepoints/ trace_events/ \
+			   hw_breakpoint/
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile
new file mode 100644
index 0000000..0f5c31c
--- /dev/null
+++ b/samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
new file mode 100644
index 0000000..2952550
--- /dev/null
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,87 @@
+/*
+ * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * usage: insmod data_breakpoint.ko ksym=<ksym_name>
+ *
+ * This file is a kernel module that places a breakpoint over ksym_name kernel
+ * variable using Hardware Breakpoint register. The corresponding handler which
+ * prints a backtrace is invoked everytime a write operation is performed on
+ * that variable.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ *
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ */
+#include <linux/module.h>	/* Needed by all modules */
+#include <linux/kernel.h>	/* Needed for KERN_INFO */
+#include <linux/init.h>		/* Needed for the macros */
+#include <linux/kallsyms.h>
+
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+
+struct perf_event **sample_hbp;
+
+static char ksym_name[KSYM_NAME_LEN] = "pid_max";
+module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
+MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
+			" write operations on the kernel symbol");
+
+static void sample_hbp_handler(struct perf_event *temp, void *data)
+{
+	printk(KERN_INFO "%s value is changed\n", ksym_name);
+	dump_stack();
+	printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
+}
+
+static int __init hw_break_module_init(void)
+{
+	int ret;
+	DEFINE_BREAKPOINT_ATTR(attr);
+
+	attr.bp_addr = kallsyms_lookup_name(ksym_name);
+	attr.bp_len = HW_BREAKPOINT_LEN_4;
+	attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+
+	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
+	if (IS_ERR(sample_hbp)) {
+		ret = PTR_ERR(sample_hbp);
+		goto fail;
+	}
+
+	printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name);
+
+	return 0;
+
+fail:
+	printk(KERN_INFO "Breakpoint registration failed\n");
+
+	return ret;
+}
+
+static void __exit hw_break_module_exit(void)
+{
+	unregister_wide_hw_breakpoint(sample_hbp);
+	printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
+}
+
+module_init(hw_break_module_init);
+module_exit(hw_break_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("K.Prasad");
+MODULE_DESCRIPTION("ksym breakpoint");
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index ea9f8a5..241310e 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1852,10 +1852,17 @@
 	my $tracepointname = 0;
 	my $tracepointargs = 0;
 
-	if($prototype =~ m/TRACE_EVENT\((.*?),/) {
+	if ($prototype =~ m/TRACE_EVENT\((.*?),/) {
 		$tracepointname = $1;
 	}
-	if($prototype =~ m/TP_PROTO\((.*?)\)/) {
+	if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) {
+		$tracepointname = $1;
+	}
+	if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) {
+		$tracepointname = $2;
+	}
+	$tracepointname =~ s/^\s+//; #strip leading whitespace
+	if ($prototype =~ m/TP_PROTO\((.*?)\)/) {
 		$tracepointargs = $1;
 	}
 	if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
@@ -1920,7 +1927,9 @@
 	if ($prototype =~ /SYSCALL_DEFINE/) {
 		syscall_munge();
 	}
-	if ($prototype =~ /TRACE_EVENT/) {
+	if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ ||
+	    $prototype =~ /DEFINE_SINGLE_EVENT/)
+	{
 		tracepoint_munge($file);
 	}
 	dump_function($prototype, $file);
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 0854f11..fe08660 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -12,6 +12,7 @@
 perf*.xml
 perf*.html
 common-cmds.h
+perf.data
 tags
 TAGS
 cscope*
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
new file mode 100644
index 0000000..ae525ac
--- /dev/null
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -0,0 +1,120 @@
+perf-bench(1)
+============
+
+NAME
+----
+perf-bench - General framework for benchmark suites
+
+SYNOPSIS
+--------
+[verse]
+'perf bench' [<common options>] <subsystem> <suite> [<options>]
+
+DESCRIPTION
+-----------
+This 'perf bench' command is general framework for benchmark suites.
+
+COMMON OPTIONS
+--------------
+-f::
+--format=::
+Specify format style.
+Current available format styles are,
+
+'default'::
+Default style. This is mainly for human reading.
+---------------------
+% perf bench sched pipe                      # with no style specify
+(executing 1000000 pipe operations between two tasks)
+        Total time:5.855 sec
+                5.855061 usecs/op
+		170792 ops/sec
+---------------------
+
+'simple'::
+This simple style is friendly for automated
+processing by scripts.
+---------------------
+% perf bench --format=simple sched pipe      # specified simple
+5.988
+---------------------
+
+SUBSYSTEM
+---------
+
+'sched'::
+	Scheduler and IPC mechanisms.
+
+SUITES FOR 'sched'
+~~~~~~~~~~~~~~~~~~
+*messaging*::
+Suite for evaluating performance of scheduler and IPC mechanisms.
+Based on hackbench by Rusty Russell.
+
+Options of *pipe*
+^^^^^^^^^^^^^^^^^
+-p::
+--pipe::
+Use pipe() instead of socketpair()
+
+-t::
+--thread::
+Be multi thread instead of multi process
+
+-g::
+--group=::
+Specify number of groups
+
+-l::
+--loop=::
+Specify number of loops
+
+Example of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched messaging                 # run with default
+options (20 sender and receiver processes per group)
+(10 groups == 400 processes run)
+
+      Total time:0.308 sec
+
+% perf bench sched messaging -t -g 20        # be multi-thread,with 20 groups
+(20 sender and receiver threads per group)
+(20 groups == 800 threads run)
+
+      Total time:0.582 sec
+---------------------
+
+*pipe*::
+Suite for pipe() system call.
+Based on pipe-test-1m.c by Ingo Molnar.
+
+Options of *pipe*
+^^^^^^^^^^^^^^^^^
+-l::
+--loop=::
+Specify number of loops.
+
+Example of *pipe*
+^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched pipe
+(executing 1000000 pipe operations between two tasks)
+
+        Total time:8.091 sec
+                8.091833 usecs/op
+                123581 ops/sec
+
+% perf bench sched pipe -l 1000              # loop 1000
+(executing 1000 pipe operations between two tasks)
+
+        Total time:0.016 sec
+                16.948000 usecs/op
+                59004 ops/sec
+---------------------
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
new file mode 100644
index 0000000..01b642c
--- /dev/null
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -0,0 +1,34 @@
+perf-buildid-list(1)
+====================
+
+NAME
+----
+perf-buildid-list - List the buildids in a perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf buildid-list <options>'
+
+DESCRIPTION
+-----------
+This command displays the buildids found in a perf.data file, so that other
+tools can be used to fetch packages with matching symbol tables for use by
+perf report.
+
+OPTIONS
+-------
+-i::
+--input=::
+        Input file name. (default: perf.data)
+-f::
+--force::
+	Don't do ownership validation.
+-v::
+--verbose::
+	Be more verbose.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-top[1],
+linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
new file mode 100644
index 0000000..44b0ce3
--- /dev/null
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -0,0 +1,44 @@
+perf-kmem(1)
+==============
+
+NAME
+----
+perf-kmem - Tool to trace/measure kernel memory(slab) properties
+
+SYNOPSIS
+--------
+[verse]
+'perf kmem' {record} [<options>]
+
+DESCRIPTION
+-----------
+There's two variants of perf kmem:
+
+  'perf kmem record <command>' to record the kmem events
+  of an arbitrary workload.
+
+  'perf kmem' to report kernel memory statistics.
+
+OPTIONS
+-------
+-i <file>::
+--input=<file>::
+	Select the input file (default: perf.data)
+
+--stat=<caller|alloc>::
+	Select per callsite or per allocation statistics
+
+-s <key[,key2...]>::
+--sort=<key[,key2...]>::
+	Sort the output (default: frag,hit,bytes)
+
+-l <num>::
+--line=<num>::
+	Print n lines only
+
+--raw-ip::
+	Print raw ip instead of symbol
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
new file mode 100644
index 0000000..9270594
--- /dev/null
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -0,0 +1,49 @@
+perf-probe(1)
+=============
+
+NAME
+----
+perf-probe - Define new dynamic tracepoints
+
+SYNOPSIS
+--------
+[verse]
+'perf probe' [options] --add 'PROBE' [--add 'PROBE' ...]
+or
+'perf probe' [options] 'PROBE' ['PROBE' ...]
+
+
+DESCRIPTION
+-----------
+This command defines dynamic tracepoint events, by symbol and registers
+without debuginfo, or by C expressions (C line numbers, C function names,
+and C local variables) with debuginfo.
+
+
+OPTIONS
+-------
+-k::
+--vmlinux=PATH::
+	Specify vmlinux path which has debuginfo (Dwarf binary).
+
+-v::
+--verbose::
+        Be more verbose (show parsed arguments, etc).
+
+-a::
+--add::
+	Define a probe point (see PROBE SYNTAX for detail)
+
+PROBE SYNTAX
+------------
+Probe points are defined by following syntax.
+
+ "FUNC[+OFFS|:RLN|%return][@SRC]|SRC:ALN [ARG ...]"
+
+'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, 'RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. In addition, 'SRC' specifies a source file which has that function.
+It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
+'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
+
+SEE ALSO
+--------
+linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 0ff23de..fc46c0b 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -26,11 +26,19 @@
 
 -e::
 --event=::
-	Select the PMU event. Selection can be a symbolic event name
-	(use 'perf list' to list all events) or a raw PMU
-	event (eventsel+umask) in the form of rNNN where NNN is a
-	hexadecimal event descriptor.
+	Select the PMU event. Selection can be:
 
+        - a symbolic event name	(use 'perf list' to list all events)
+
+        - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
+	  hexadecimal event descriptor.
+
+        - a hardware breakpoint event in the form of '\mem:addr[:access]'
+          where addr is the address in memory you want to break in.
+          Access is the memory access type (read, write, execute) it can
+          be passed as follows: '\mem:addr[:[r][w][x]]'.
+          If you want to profile read-write accesses in 0x1000, just set
+          'mem:0x1000:rw'.
 -a::
         System-wide collection.
 
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 59f0b84..9dccb18 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -24,11 +24,11 @@
 --dsos=::
 	Only consider symbols in these dsos. CSV that understands
 	file://filename entries.
--n
---show-nr-samples
+-n::
+--show-nr-samples::
 	Show the number of samples for each symbol
--T
---threads
+-T::
+--threads::
 	Show per-thread event counters
 -C::
 --comms=::
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index a791009..4b17883 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -31,9 +31,12 @@
 -w::
 --width=::
         Select the width of the SVG file (default: 1000)
--p::
+-P::
 --power-only::
         Only output the CPU power section of the diagram
+-p::
+--process::
+        Select the processes to display, by name or PID
 
 
 SEE ALSO
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt
new file mode 100644
index 0000000..c5f55f4
--- /dev/null
+++ b/tools/perf/Documentation/perf-trace-perl.txt
@@ -0,0 +1,219 @@
+perf-trace-perl(1)
+==================
+
+NAME
+----
+perf-trace-perl - Process trace data with a Perl script
+
+SYNOPSIS
+--------
+[verse]
+'perf trace' [-s [lang]:script[.ext] ]
+
+DESCRIPTION
+-----------
+
+This perf trace option is used to process perf trace data using perf's
+built-in Perl interpreter.  It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Perl script, if any.
+
+STARTER SCRIPTS
+---------------
+
+You can avoid reading the rest of this document by running 'perf trace
+-g perl' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/perl for typical examples showing how to
+do basic things like aggregate event data, print results, etc.  Also,
+the check-perf-trace.pl script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf trace is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace.  If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_handled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -c 1 -f -a -M -R -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above options: -c 1 says to sample every event, -a to enable
+system-wide collection, -M to multiplex the output, and -R to collect
+raw samples.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+        field:unsigned short common_type;
+        field:unsigned char common_flags;
+        field:unsigned char common_preempt_count;
+        field:int common_pid;
+        field:int common_lock_depth;
+
+        field:char comm[TASK_COMM_LEN];
+        field:pid_t pid;
+        field:int prio;
+        field:int success;
+        field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+sub sched::sched_wakeup
+{
+   my ($event_name, $context, $common_cpu, $common_secs,
+       $common_nsecs, $common_pid, $common_comm,
+       $comm, $pid, $prio, $success, $target_cpu) = @_;
+}
+----
+
+The handler function takes the form subsystem::event_name.
+
+The $common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ $event_name 	  	    the name of the event as text
+ $context		    an opaque 'cookie' used in calls back into perf
+ $common_cpu		    the cpu the event occurred on
+ $common_secs		    the secs portion of the event timestamp
+ $common_nsecs		    the nsecs portion of the event timestamp
+ $common_pid		    the pid of the current task
+ $common_comm		    the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script.  The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf trace Perl script should start by setting up a Perl module
+search path and 'use'ing a few support modules (see module
+descriptions below):
+
+----
+ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+ use lib "./Perf-Trace-Util/lib";
+ use Perf::Trace::Core;
+ use Perf::Trace::Context;
+ use Perf::Trace::Util;
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+ sub trace_begin
+ {
+ }
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+sub trace_end
+{
+}
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it.  The standard set
+ of common arguments are passed into it:
+
+----
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs,
+        $common_nsecs, $common_pid, $common_comm) = @_;
+}
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf trace Perl modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various Perf::Trace::* Perl modules.  To use the functions and
+variables from the given module, add the corresponding 'use
+Perf::Trace::XXX' line to your perf trace script.
+
+Perf::Trace::Core Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields.  These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+  flag_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the flag field $field_name of event $event_name
+  symbol_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the symbolic field $field_name of event $event_name
+
+Perf::Trace::Context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+Perf::Trace::Context defines a set of functions that can be used to
+access this data in the context of the current event.  Each of these
+functions expects a $context variable, which is the same as the
+$context variable passed into every event handler as the second
+argument.
+
+ common_pc($context) - returns common_preempt count for the current event
+ common_flags($context) - returns common_flags for the current event
+ common_lock_depth($context) - returns common_lock_depth for the current event
+
+Perf::Trace::Util Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Various utility functions for use with perf trace:
+
+  nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
+  nsecs_secs($nsecs) - returns whole secs portion given nsecs
+  nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
+  nsecs_str($nsecs) - returns printable string in the form secs.nsecs
+  avg($total, $n) - returns average given a sum and a total number of values
+
+SEE ALSO
+--------
+linkperf:perf-trace[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 41ed753..07065ef 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -20,6 +20,15 @@
 --dump-raw-trace=::
         Display verbose dump of the trace data.
 
+-s::
+--script=::
+        Process trace data with the given script ([lang]:script[.ext]).
+
+-g::
+--gen-script=::
+        Generate perf-trace.[ext] starter script for given language,
+        using current perf.data.
+
 SEE ALSO
 --------
-linkperf:perf-record[1]
+linkperf:perf-record[1], linkperf:perf-trace-perl[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7e190d5..23ec660 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -2,6 +2,7 @@
 all::
 
 # Define V=1 to have a more verbose compile.
+# Define V=2 to have an even more verbose compile.
 #
 # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
 # or vsnprintf() return -1 instead of number of characters which would
@@ -145,6 +146,10 @@
 # Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
 # your external grep (e.g., if your system lacks grep, if its grep is
 # broken, or spawning external process is slower than built-in grep perf has).
+#
+# Define LDFLAGS=-static to build a static binary.
+#
+# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
 
 PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
 	@$(SHELL_PATH) util/PERF-VERSION-GEN
@@ -157,20 +162,6 @@
 uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
 uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 
-#
-# Add -m32 for cross-builds:
-#
-ifdef NO_64BIT
-  MBITS := -m32
-else
-  #
-  # If we're on a 64-bit kernel, use -m64:
-  #
-  ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M))
-    MBITS := -m64
-  endif
-endif
-
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
 #
@@ -200,8 +191,15 @@
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-prototypes
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wdeclaration-after-statement
 
-CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS)
-LDFLAGS = -lpthread -lrt -lelf -lm
+ifeq ("$(origin DEBUG)", "command line")
+  PERF_DEBUG = $(DEBUG)
+endif
+ifndef PERF_DEBUG
+  CFLAGS_OPTIMIZE = -O6
+endif
+
+CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+EXTLIBS = -lpthread -lrt -lelf -lm
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
 STRIP ?= strip
@@ -252,6 +250,9 @@
 # explicitly what architecture to check for. Fix this up for yours..
 SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
 
+ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null "$(QUIET_STDERR)" && echo y"), y)
+  CFLAGS := $(CFLAGS) -fstack-protector-all
+endif
 
 
 ### --- END CONFIGURATION SECTION ---
@@ -327,8 +328,28 @@
 LIB_H += ../../include/linux/perf_event.h
 LIB_H += ../../include/linux/rbtree.h
 LIB_H += ../../include/linux/list.h
+LIB_H += ../../include/linux/stringify.h
+LIB_H += util/include/linux/bitmap.h
+LIB_H += util/include/linux/bitops.h
+LIB_H += util/include/linux/compiler.h
+LIB_H += util/include/linux/ctype.h
+LIB_H += util/include/linux/kernel.h
 LIB_H += util/include/linux/list.h
+LIB_H += util/include/linux/module.h
+LIB_H += util/include/linux/poison.h
+LIB_H += util/include/linux/prefetch.h
+LIB_H += util/include/linux/rbtree.h
+LIB_H += util/include/linux/string.h
+LIB_H += util/include/linux/types.h
+LIB_H += util/include/asm/asm-offsets.h
+LIB_H += util/include/asm/bitops.h
+LIB_H += util/include/asm/byteorder.h
+LIB_H += util/include/asm/swab.h
+LIB_H += util/include/asm/system.h
+LIB_H += util/include/asm/uaccess.h
 LIB_H += perf.h
+LIB_H += util/debugfs.h
+LIB_H += util/event.h
 LIB_H += util/types.h
 LIB_H += util/levenshtein.h
 LIB_H += util/parse-options.h
@@ -342,15 +363,22 @@
 LIB_H += util/run-command.h
 LIB_H += util/sigchain.h
 LIB_H += util/symbol.h
-LIB_H += util/module.h
 LIB_H += util/color.h
 LIB_H += util/values.h
+LIB_H += util/sort.h
+LIB_H += util/hist.h
+LIB_H += util/thread.h
+LIB_H += util/data_map.h
+LIB_H += util/probe-finder.h
+LIB_H += util/probe-event.h
 
 LIB_OBJS += util/abspath.o
 LIB_OBJS += util/alias.o
 LIB_OBJS += util/config.o
 LIB_OBJS += util/ctype.o
+LIB_OBJS += util/debugfs.o
 LIB_OBJS += util/environment.o
+LIB_OBJS += util/event.o
 LIB_OBJS += util/exec_cmd.o
 LIB_OBJS += util/help.o
 LIB_OBJS += util/levenshtein.o
@@ -358,6 +386,9 @@
 LIB_OBJS += util/parse-events.o
 LIB_OBJS += util/path.o
 LIB_OBJS += util/rbtree.o
+LIB_OBJS += util/bitmap.o
+LIB_OBJS += util/hweight.o
+LIB_OBJS += util/find_next_bit.o
 LIB_OBJS += util/run-command.o
 LIB_OBJS += util/quote.o
 LIB_OBJS += util/strbuf.o
@@ -367,7 +398,6 @@
 LIB_OBJS += util/wrapper.o
 LIB_OBJS += util/sigchain.o
 LIB_OBJS += util/symbol.o
-LIB_OBJS += util/module.o
 LIB_OBJS += util/color.o
 LIB_OBJS += util/pager.o
 LIB_OBJS += util/header.o
@@ -379,11 +409,25 @@
 LIB_OBJS += util/trace-event-parse.o
 LIB_OBJS += util/trace-event-read.o
 LIB_OBJS += util/trace-event-info.o
+LIB_OBJS += util/trace-event-perl.o
 LIB_OBJS += util/svghelper.o
+LIB_OBJS += util/sort.o
+LIB_OBJS += util/hist.o
+LIB_OBJS += util/data_map.o
+LIB_OBJS += util/probe-event.o
 
 BUILTIN_OBJS += builtin-annotate.o
+
+BUILTIN_OBJS += builtin-bench.o
+
+# Benchmark modules
+BUILTIN_OBJS += bench/sched-messaging.o
+BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o
+
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-sched.o
+BUILTIN_OBJS += builtin-buildid-list.o
 BUILTIN_OBJS += builtin-list.o
 BUILTIN_OBJS += builtin-record.o
 BUILTIN_OBJS += builtin-report.o
@@ -391,9 +435,16 @@
 BUILTIN_OBJS += builtin-timechart.o
 BUILTIN_OBJS += builtin-top.o
 BUILTIN_OBJS += builtin-trace.o
+BUILTIN_OBJS += builtin-probe.o
+BUILTIN_OBJS += builtin-kmem.o
 
 PERFLIBS = $(LIB_FILE)
 
+ifeq ($(V), 2)
+	QUIET_STDERR = ">/dev/null"
+else
+	QUIET_STDERR = ">/dev/null 2>&1"
+endif
 #
 # Platform specific tweaks
 #
@@ -421,36 +472,58 @@
 	PTHREAD_LIBS =
 endif
 
-ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y)
-	ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y)
+ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
+ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
+	msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
+endif
+
+	ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
 		BASIC_CFLAGS += -DLIBELF_NO_MMAP
 	endif
 else
 	msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]);
 endif
 
+ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
+	msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231);
+	BASIC_CFLAGS += -DNO_LIBDWARF
+else
+	EXTLIBS += -lelf -ldwarf
+	LIB_OBJS += util/probe-finder.o
+endif
+
+PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
+PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+
+ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o /dev/null $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y)
+	BASIC_CFLAGS += -DNO_LIBPERL
+else
+	ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
+	LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o
+endif
+
 ifdef NO_DEMANGLE
 	BASIC_CFLAGS += -DNO_DEMANGLE
 else
-	has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
+	has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y")
 
 	ifeq ($(has_bfd),y)
 		EXTLIBS += -lbfd
 	else
-		has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
+		has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y")
 		ifeq ($(has_bfd_iberty),y)
 			EXTLIBS += -lbfd -liberty
 		else
-			has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
+			has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y")
 			ifeq ($(has_bfd_iberty_z),y)
 				EXTLIBS += -lbfd -liberty -lz
 			else
-				has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
+				has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y")
 				ifeq ($(has_cplus_demangle),y)
 					EXTLIBS += -liberty
 					BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
 				else
-					msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
+					msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
 					BASIC_CFLAGS += -DNO_DEMANGLE
 				endif
 			endif
@@ -787,6 +860,25 @@
 util/rbtree.o: ../../lib/rbtree.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o util/rbtree.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
+# some perf warning policies can't fit to lib/bitmap.c, eg: it warns about variable shadowing
+# from <string.h> that comes from kernel headers wrapping.
+KBITMAP_FLAGS=`echo $(ALL_CFLAGS) | sed s/-Wshadow// | sed s/-Wswitch-default// | sed s/-Wextra//`
+
+util/bitmap.o: ../../lib/bitmap.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o util/bitmap.o -c $(KBITMAP_FLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+util/hweight.o: ../../lib/hweight.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o util/hweight.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
+
+scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
+
 perf-%$X: %.o $(PERFLIBS)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
@@ -894,6 +986,13 @@
 install: all
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+	$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
+	$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
+	$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+	$(INSTALL) scripts/perl/Perf-Trace-Util/Makefile.PL -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util'
+	$(INSTALL) scripts/perl/Perf-Trace-Util/README -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util'
 ifdef BUILT_INS
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -979,7 +1078,7 @@
 #	$(RM) configure
 
 clean:
-	$(RM) *.o */*.o $(LIB_FILE)
+	$(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE)
 	$(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
 	$(RM) $(TEST_PROGRAMS)
 	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
new file mode 100644
index 0000000..f7781c6
--- /dev/null
+++ b/tools/perf/bench/bench.h
@@ -0,0 +1,17 @@
+#ifndef BENCH_H
+#define BENCH_H
+
+extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
+extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
+
+#define BENCH_FORMAT_DEFAULT_STR	"default"
+#define BENCH_FORMAT_DEFAULT		0
+#define BENCH_FORMAT_SIMPLE_STR		"simple"
+#define BENCH_FORMAT_SIMPLE		1
+
+#define BENCH_FORMAT_UNKNOWN		-1
+
+extern int bench_format;
+
+#endif
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 0000000..8977317
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,193 @@
+/*
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char	*length_str	= "1MB";
+static const char	*routine	= "default";
+static int		use_clock	= 0;
+static int		clock_fd;
+
+static const struct option options[] = {
+	OPT_STRING('l', "length", &length_str, "1MB",
+		    "Specify length of memory to copy. "
+		    "available unit: B, MB, GB (upper and lower)"),
+	OPT_STRING('r', "routine", &routine, "default",
+		    "Specify routine to copy"),
+	OPT_BOOLEAN('c', "clock", &use_clock,
+		    "Use CPU clock for measuring"),
+	OPT_END()
+};
+
+struct routine {
+	const char *name;
+	const char *desc;
+	void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+	{ "default",
+	  "Default memcpy() provided by glibc",
+	  memcpy },
+	{ NULL,
+	  NULL,
+	  NULL   }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+	"perf bench mem memcpy <options>",
+	NULL
+};
+
+static struct perf_event_attr clock_attr = {
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+
+	if (clock_fd < 0 && errno == ENOSYS)
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+	else
+		BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+	int ret;
+	u64 clk;
+
+	ret = read(clock_fd, &clk, sizeof(u64));
+	BUG_ON(ret != sizeof(u64));
+
+	return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+	return (double)ts->tv_sec +
+		(double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+		     const char *prefix __used)
+{
+	int i;
+	void *dst, *src;
+	size_t length;
+	double bps = 0.0;
+	struct timeval tv_start, tv_end, tv_diff;
+	u64 clock_start, clock_end, clock_diff;
+
+	clock_start = clock_end = clock_diff = 0ULL;
+	argc = parse_options(argc, argv, options,
+			     bench_mem_memcpy_usage, 0);
+
+	tv_diff.tv_sec = 0;
+	tv_diff.tv_usec = 0;
+	length = (size_t)perf_atoll((char *)length_str);
+
+	if ((s64)length <= 0) {
+		fprintf(stderr, "Invalid length:%s\n", length_str);
+		return 1;
+	}
+
+	for (i = 0; routines[i].name; i++) {
+		if (!strcmp(routines[i].name, routine))
+			break;
+	}
+	if (!routines[i].name) {
+		printf("Unknown routine:%s\n", routine);
+		printf("Available routines...\n");
+		for (i = 0; routines[i].name; i++) {
+			printf("\t%s ... %s\n",
+			       routines[i].name, routines[i].desc);
+		}
+		return 1;
+	}
+
+	dst = zalloc(length);
+	if (!dst)
+		die("memory allocation failed - maybe length is too large?\n");
+
+	src = zalloc(length);
+	if (!src)
+		die("memory allocation failed - maybe length is too large?\n");
+
+	if (bench_format == BENCH_FORMAT_DEFAULT) {
+		printf("# Copying %s Bytes from %p to %p ...\n\n",
+		       length_str, src, dst);
+	}
+
+	if (use_clock) {
+		init_clock();
+		clock_start = get_clock();
+	} else {
+		BUG_ON(gettimeofday(&tv_start, NULL));
+	}
+
+	routines[i].fn(dst, src, length);
+
+	if (use_clock) {
+		clock_end = get_clock();
+		clock_diff = clock_end - clock_start;
+	} else {
+		BUG_ON(gettimeofday(&tv_end, NULL));
+		timersub(&tv_end, &tv_start, &tv_diff);
+		bps = (double)((double)length / timeval2double(&tv_diff));
+	}
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		if (use_clock) {
+			printf(" %14lf Clock/Byte\n",
+			       (double)clock_diff / (double)length);
+		} else {
+			if (bps < K)
+				printf(" %14lf B/Sec\n", bps);
+			else if (bps < K * K)
+				printf(" %14lfd KB/Sec\n", bps / 1024);
+			else if (bps < K * K * K)
+				printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+			else {
+				printf(" %14lf GB/Sec\n",
+				       bps / 1024 / 1024 / 1024);
+			}
+		}
+		break;
+	case BENCH_FORMAT_SIMPLE:
+		if (use_clock) {
+			printf("%14lf\n",
+			       (double)clock_diff / (double)length);
+		} else
+			printf("%lf\n", bps);
+		break;
+	default:
+		/* reaching this means there's some disaster: */
+		die("unknown format: %d\n", bench_format);
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
new file mode 100644
index 0000000..605a2a9
--- /dev/null
+++ b/tools/perf/bench/sched-messaging.c
@@ -0,0 +1,336 @@
+/*
+ *
+ * builtin-bench-messaging.c
+ *
+ * messaging: Benchmark for scheduler and IPC mechanisms
+ *
+ * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../builtin.h"
+#include "bench.h"
+
+/* Test groups of 20 processes spraying to 20 receivers */
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/poll.h>
+#include <limits.h>
+
+#define DATASIZE 100
+
+static int use_pipes = 0;
+static unsigned int loops = 100;
+static unsigned int thread_mode = 0;
+static unsigned int num_groups = 10;
+
+struct sender_context {
+	unsigned int num_fds;
+	int ready_out;
+	int wakefd;
+	int out_fds[0];
+};
+
+struct receiver_context {
+	unsigned int num_packets;
+	int in_fds[2];
+	int ready_out;
+	int wakefd;
+};
+
+static void barf(const char *msg)
+{
+	fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
+	exit(1);
+}
+
+static void fdpair(int fds[2])
+{
+	if (use_pipes) {
+		if (pipe(fds) == 0)
+			return;
+	} else {
+		if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
+			return;
+	}
+
+	barf(use_pipes ? "pipe()" : "socketpair()");
+}
+
+/* Block until we're ready to go */
+static void ready(int ready_out, int wakefd)
+{
+	char dummy;
+	struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
+
+	/* Tell them we're ready. */
+	if (write(ready_out, &dummy, 1) != 1)
+		barf("CLIENT: ready write");
+
+	/* Wait for "GO" signal */
+	if (poll(&pollfd, 1, -1) != 1)
+		barf("poll");
+}
+
+/* Sender sprays loops messages down each file descriptor */
+static void *sender(struct sender_context *ctx)
+{
+	char data[DATASIZE];
+	unsigned int i, j;
+
+	ready(ctx->ready_out, ctx->wakefd);
+
+	/* Now pump to every receiver. */
+	for (i = 0; i < loops; i++) {
+		for (j = 0; j < ctx->num_fds; j++) {
+			int ret, done = 0;
+
+again:
+			ret = write(ctx->out_fds[j], data + done,
+				    sizeof(data)-done);
+			if (ret < 0)
+				barf("SENDER: write");
+			done += ret;
+			if (done < DATASIZE)
+				goto again;
+		}
+	}
+
+	return NULL;
+}
+
+
+/* One receiver per fd */
+static void *receiver(struct receiver_context* ctx)
+{
+	unsigned int i;
+
+	if (!thread_mode)
+		close(ctx->in_fds[1]);
+
+	/* Wait for start... */
+	ready(ctx->ready_out, ctx->wakefd);
+
+	/* Receive them all */
+	for (i = 0; i < ctx->num_packets; i++) {
+		char data[DATASIZE];
+		int ret, done = 0;
+
+again:
+		ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
+		if (ret < 0)
+			barf("SERVER: read");
+		done += ret;
+		if (done < DATASIZE)
+			goto again;
+	}
+
+	return NULL;
+}
+
+static pthread_t create_worker(void *ctx, void *(*func)(void *))
+{
+	pthread_attr_t attr;
+	pthread_t childid;
+	int err;
+
+	if (!thread_mode) {
+		/* process mode */
+		/* Fork the receiver. */
+		switch (fork()) {
+		case -1:
+			barf("fork()");
+			break;
+		case 0:
+			(*func) (ctx);
+			exit(0);
+			break;
+		default:
+			break;
+		}
+
+		return (pthread_t)0;
+	}
+
+	if (pthread_attr_init(&attr) != 0)
+		barf("pthread_attr_init:");
+
+#ifndef __ia64__
+	if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
+		barf("pthread_attr_setstacksize");
+#endif
+
+	err = pthread_create(&childid, &attr, func, ctx);
+	if (err != 0) {
+		fprintf(stderr, "pthread_create failed: %s (%d)\n",
+			strerror(err), err);
+		exit(-1);
+	}
+	return childid;
+}
+
+static void reap_worker(pthread_t id)
+{
+	int proc_status;
+	void *thread_status;
+
+	if (!thread_mode) {
+		/* process mode */
+		wait(&proc_status);
+		if (!WIFEXITED(proc_status))
+			exit(1);
+	} else {
+		pthread_join(id, &thread_status);
+	}
+}
+
+/* One group of senders and receivers */
+static unsigned int group(pthread_t *pth,
+		unsigned int num_fds,
+		int ready_out,
+		int wakefd)
+{
+	unsigned int i;
+	struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
+			+ num_fds * sizeof(int));
+
+	if (!snd_ctx)
+		barf("malloc()");
+
+	for (i = 0; i < num_fds; i++) {
+		int fds[2];
+		struct receiver_context *ctx = malloc(sizeof(*ctx));
+
+		if (!ctx)
+			barf("malloc()");
+
+
+		/* Create the pipe between client and server */
+		fdpair(fds);
+
+		ctx->num_packets = num_fds * loops;
+		ctx->in_fds[0] = fds[0];
+		ctx->in_fds[1] = fds[1];
+		ctx->ready_out = ready_out;
+		ctx->wakefd = wakefd;
+
+		pth[i] = create_worker(ctx, (void *)receiver);
+
+		snd_ctx->out_fds[i] = fds[1];
+		if (!thread_mode)
+			close(fds[0]);
+	}
+
+	/* Now we have all the fds, fork the senders */
+	for (i = 0; i < num_fds; i++) {
+		snd_ctx->ready_out = ready_out;
+		snd_ctx->wakefd = wakefd;
+		snd_ctx->num_fds = num_fds;
+
+		pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
+	}
+
+	/* Close the fds we have left */
+	if (!thread_mode)
+		for (i = 0; i < num_fds; i++)
+			close(snd_ctx->out_fds[i]);
+
+	/* Return number of children to reap */
+	return num_fds * 2;
+}
+
+static const struct option options[] = {
+	OPT_BOOLEAN('p', "pipe", &use_pipes,
+		    "Use pipe() instead of socketpair()"),
+	OPT_BOOLEAN('t', "thread", &thread_mode,
+		    "Be multi thread instead of multi process"),
+	OPT_INTEGER('g', "group", &num_groups,
+		    "Specify number of groups"),
+	OPT_INTEGER('l', "loop", &loops,
+		    "Specify number of loops"),
+	OPT_END()
+};
+
+static const char * const bench_sched_message_usage[] = {
+	"perf bench sched messaging <options>",
+	NULL
+};
+
+int bench_sched_messaging(int argc, const char **argv,
+		    const char *prefix __used)
+{
+	unsigned int i, total_children;
+	struct timeval start, stop, diff;
+	unsigned int num_fds = 20;
+	int readyfds[2], wakefds[2];
+	char dummy;
+	pthread_t *pth_tab;
+
+	argc = parse_options(argc, argv, options,
+			     bench_sched_message_usage, 0);
+
+	pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
+	if (!pth_tab)
+		barf("main:malloc()");
+
+	fdpair(readyfds);
+	fdpair(wakefds);
+
+	total_children = 0;
+	for (i = 0; i < num_groups; i++)
+		total_children += group(pth_tab+total_children, num_fds,
+					readyfds[1], wakefds[0]);
+
+	/* Wait for everyone to be ready */
+	for (i = 0; i < total_children; i++)
+		if (read(readyfds[0], &dummy, 1) != 1)
+			barf("Reading for readyfds");
+
+	gettimeofday(&start, NULL);
+
+	/* Kick them off */
+	if (write(wakefds[1], &dummy, 1) != 1)
+		barf("Writing to start them");
+
+	/* Reap them all */
+	for (i = 0; i < total_children; i++)
+		reap_worker(pth_tab[i]);
+
+	gettimeofday(&stop, NULL);
+
+	timersub(&stop, &start, &diff);
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		printf("# %d sender and receiver %s per group\n",
+		       num_fds, thread_mode ? "threads" : "processes");
+		printf("# %d groups == %d %s run\n\n",
+		       num_groups, num_groups * 2 * num_fds,
+		       thread_mode ? "threads" : "processes");
+		printf(" %14s: %lu.%03lu [sec]\n", "Total time",
+		       diff.tv_sec, diff.tv_usec/1000);
+		break;
+	case BENCH_FORMAT_SIMPLE:
+		printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
+		break;
+	default:
+		/* reaching here is something disaster */
+		fprintf(stderr, "Unknown format:%d\n", bench_format);
+		exit(1);
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
new file mode 100644
index 0000000..238185f
--- /dev/null
+++ b/tools/perf/bench/sched-pipe.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * builtin-bench-pipe.c
+ *
+ * pipe: Benchmark for pipe()
+ *
+ * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
+ *  http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../builtin.h"
+#include "bench.h"
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <linux/unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#define LOOPS_DEFAULT 1000000
+static int loops = LOOPS_DEFAULT;
+
+static const struct option options[] = {
+	OPT_INTEGER('l', "loop", &loops,
+		    "Specify number of loops"),
+	OPT_END()
+};
+
+static const char * const bench_sched_pipe_usage[] = {
+	"perf bench sched pipe <options>",
+	NULL
+};
+
+int bench_sched_pipe(int argc, const char **argv,
+		     const char *prefix __used)
+{
+	int pipe_1[2], pipe_2[2];
+	int m = 0, i;
+	struct timeval start, stop, diff;
+	unsigned long long result_usec = 0;
+
+	/*
+	 * why does "ret" exist?
+	 * discarding returned value of read(), write()
+	 * causes error in building environment for perf
+	 */
+	int ret, wait_stat;
+	pid_t pid, retpid;
+
+	argc = parse_options(argc, argv, options,
+			     bench_sched_pipe_usage, 0);
+
+	assert(!pipe(pipe_1));
+	assert(!pipe(pipe_2));
+
+	pid = fork();
+	assert(pid >= 0);
+
+	gettimeofday(&start, NULL);
+
+	if (!pid) {
+		for (i = 0; i < loops; i++) {
+			ret = read(pipe_1[0], &m, sizeof(int));
+			ret = write(pipe_2[1], &m, sizeof(int));
+		}
+	} else {
+		for (i = 0; i < loops; i++) {
+			ret = write(pipe_1[1], &m, sizeof(int));
+			ret = read(pipe_2[0], &m, sizeof(int));
+		}
+	}
+
+	gettimeofday(&stop, NULL);
+	timersub(&stop, &start, &diff);
+
+	if (pid) {
+		retpid = waitpid(pid, &wait_stat, 0);
+		assert((retpid == pid) && WIFEXITED(wait_stat));
+		return 0;
+	}
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		printf("# Extecuted %d pipe operations between two tasks\n\n",
+			loops);
+
+		result_usec = diff.tv_sec * 1000000;
+		result_usec += diff.tv_usec;
+
+		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+		       diff.tv_sec, diff.tv_usec/1000);
+
+		printf(" %14lf usecs/op\n",
+		       (double)result_usec / (double)loops);
+		printf(" %14d ops/sec\n",
+		       (int)((double)loops /
+			     ((double)result_usec / (double)1000000)));
+		break;
+
+	case BENCH_FORMAT_SIMPLE:
+		printf("%lu.%03lu\n",
+		       diff.tv_sec, diff.tv_usec / 1000);
+		break;
+
+	default:
+		/* reaching here is something disaster */
+		fprintf(stderr, "Unknown format:%d\n", bench_format);
+		exit(1);
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 1ec7416..0bf2e8f 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -19,29 +19,26 @@
 #include "perf.h"
 #include "util/debug.h"
 
+#include "util/event.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 #include "util/thread.h"
+#include "util/sort.h"
+#include "util/hist.h"
+#include "util/data_map.h"
 
 static char		const *input_name = "perf.data";
 
-static char		default_sort_order[] = "comm,symbol";
-static char		*sort_order = default_sort_order;
-
 static int		force;
-static int		input;
-static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static int		full_paths;
 
 static int		print_line;
 
-static unsigned long	page_size;
-static unsigned long	mmap_window = 32;
-
-static struct rb_root	threads;
-static struct thread	*last_match;
-
+struct sym_hist {
+	u64		sum;
+	u64		ip[0];
+};
 
 struct sym_ext {
 	struct rb_node	node;
@@ -49,247 +46,38 @@
 	char		*path;
 };
 
-/*
- * histogram, sorted on item, collects counts
- */
-
-static struct rb_root hist;
-
-struct hist_entry {
-	struct rb_node	 rb_node;
-
-	struct thread	 *thread;
-	struct map	 *map;
-	struct dso	 *dso;
-	struct symbol	 *sym;
-	u64	 ip;
-	char		 level;
-
-	uint32_t	 count;
+struct sym_priv {
+	struct sym_hist	*hist;
+	struct sym_ext	*ext;
 };
 
-/*
- * configurable sorting bits
- */
-
-struct sort_entry {
-	struct list_head list;
-
-	const char *header;
-
-	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
-	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
-	size_t	(*print)(FILE *fp, struct hist_entry *);
+static struct symbol_conf symbol_conf = {
+	.priv_size	  = sizeof(struct sym_priv),
+	.try_vmlinux_path = true,
 };
 
-/* --sort pid */
+static const char *sym_hist_filter;
 
-static int64_t
-sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+static int symbol_filter(struct map *map __used, struct symbol *sym)
 {
-	return right->thread->pid - left->thread->pid;
-}
+	if (sym_hist_filter == NULL ||
+	    strcmp(sym->name, sym_hist_filter) == 0) {
+		struct sym_priv *priv = symbol__priv(sym);
+		const int size = (sizeof(*priv->hist) +
+				  (sym->end - sym->start) * sizeof(u64));
 
-static size_t
-sort__thread_print(FILE *fp, struct hist_entry *self)
-{
-	return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
-}
-
-static struct sort_entry sort_thread = {
-	.header = "         Command:  Pid",
-	.cmp	= sort__thread_cmp,
-	.print	= sort__thread_print,
-};
-
-/* --sort comm */
-
-static int64_t
-sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return right->thread->pid - left->thread->pid;
-}
-
-static int64_t
-sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
-{
-	char *comm_l = left->thread->comm;
-	char *comm_r = right->thread->comm;
-
-	if (!comm_l || !comm_r) {
-		if (!comm_l && !comm_r)
-			return 0;
-		else if (!comm_l)
-			return -1;
-		else
-			return 1;
-	}
-
-	return strcmp(comm_l, comm_r);
-}
-
-static size_t
-sort__comm_print(FILE *fp, struct hist_entry *self)
-{
-	return fprintf(fp, "%16s", self->thread->comm);
-}
-
-static struct sort_entry sort_comm = {
-	.header		= "         Command",
-	.cmp		= sort__comm_cmp,
-	.collapse	= sort__comm_collapse,
-	.print		= sort__comm_print,
-};
-
-/* --sort dso */
-
-static int64_t
-sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	struct dso *dso_l = left->dso;
-	struct dso *dso_r = right->dso;
-
-	if (!dso_l || !dso_r) {
-		if (!dso_l && !dso_r)
-			return 0;
-		else if (!dso_l)
-			return -1;
-		else
-			return 1;
-	}
-
-	return strcmp(dso_l->name, dso_r->name);
-}
-
-static size_t
-sort__dso_print(FILE *fp, struct hist_entry *self)
-{
-	if (self->dso)
-		return fprintf(fp, "%-25s", self->dso->name);
-
-	return fprintf(fp, "%016llx         ", (u64)self->ip);
-}
-
-static struct sort_entry sort_dso = {
-	.header = "Shared Object            ",
-	.cmp	= sort__dso_cmp,
-	.print	= sort__dso_print,
-};
-
-/* --sort symbol */
-
-static int64_t
-sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	u64 ip_l, ip_r;
-
-	if (left->sym == right->sym)
-		return 0;
-
-	ip_l = left->sym ? left->sym->start : left->ip;
-	ip_r = right->sym ? right->sym->start : right->ip;
-
-	return (int64_t)(ip_r - ip_l);
-}
-
-static size_t
-sort__sym_print(FILE *fp, struct hist_entry *self)
-{
-	size_t ret = 0;
-
-	if (verbose)
-		ret += fprintf(fp, "%#018llx  ", (u64)self->ip);
-
-	if (self->sym) {
-		ret += fprintf(fp, "[%c] %s",
-			self->dso == kernel_dso ? 'k' : '.', self->sym->name);
-	} else {
-		ret += fprintf(fp, "%#016llx", (u64)self->ip);
-	}
-
-	return ret;
-}
-
-static struct sort_entry sort_sym = {
-	.header = "Symbol",
-	.cmp	= sort__sym_cmp,
-	.print	= sort__sym_print,
-};
-
-static int sort__need_collapse = 0;
-
-struct sort_dimension {
-	const char		*name;
-	struct sort_entry	*entry;
-	int			taken;
-};
-
-static struct sort_dimension sort_dimensions[] = {
-	{ .name = "pid",	.entry = &sort_thread,	},
-	{ .name = "comm",	.entry = &sort_comm,	},
-	{ .name = "dso",	.entry = &sort_dso,	},
-	{ .name = "symbol",	.entry = &sort_sym,	},
-};
-
-static LIST_HEAD(hist_entry__sort_list);
-
-static int sort_dimension__add(char *tok)
-{
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
-		struct sort_dimension *sd = &sort_dimensions[i];
-
-		if (sd->taken)
-			continue;
-
-		if (strncasecmp(tok, sd->name, strlen(tok)))
-			continue;
-
-		if (sd->entry->collapse)
-			sort__need_collapse = 1;
-
-		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
-		sd->taken = 1;
-
+		priv->hist = malloc(size);
+		if (priv->hist)
+			memset(priv->hist, 0, size);
 		return 0;
 	}
-
-	return -ESRCH;
-}
-
-static int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	struct sort_entry *se;
-	int64_t cmp = 0;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		cmp = se->cmp(left, right);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
-}
-
-static int64_t
-hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
-{
-	struct sort_entry *se;
-	int64_t cmp = 0;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		int64_t (*f)(struct hist_entry *, struct hist_entry *);
-
-		f = se->collapse ?: se->cmp;
-
-		cmp = f(left, right);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
+	/*
+	 * FIXME: We should really filter it out, as we don't want to go thru symbols
+	 * we're not interested, and if a DSO ends up with no symbols, delete it too,
+	 * but right now the kernel loading routines in symbol.c bail out if no symbols
+	 * are found, fix it later.
+	 */
+	return 0;
 }
 
 /*
@@ -299,380 +87,81 @@
 {
 	unsigned int sym_size, offset;
 	struct symbol *sym = he->sym;
+	struct sym_priv *priv;
+	struct sym_hist *h;
 
 	he->count++;
 
-	if (!sym || !sym->hist)
+	if (!sym || !he->map)
+		return;
+
+	priv = symbol__priv(sym);
+	if (!priv->hist)
 		return;
 
 	sym_size = sym->end - sym->start;
 	offset = ip - sym->start;
 
+	if (verbose)
+		fprintf(stderr, "%s: ip=%Lx\n", __func__,
+			he->map->unmap_ip(he->map, ip));
+
 	if (offset >= sym_size)
 		return;
 
-	sym->hist_sum++;
-	sym->hist[offset]++;
+	h = priv->hist;
+	h->sum++;
+	h->ip[offset]++;
 
 	if (verbose >= 3)
 		printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n",
 			(void *)(unsigned long)he->sym->start,
 			he->sym->name,
 			(void *)(unsigned long)ip, ip - he->sym->start,
-			sym->hist[offset]);
+			h->ip[offset]);
 }
 
-static int
-hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-		struct symbol *sym, u64 ip, char level)
+static int hist_entry__add(struct addr_location *al, u64 count)
 {
-	struct rb_node **p = &hist.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *he;
-	struct hist_entry entry = {
-		.thread	= thread,
-		.map	= map,
-		.dso	= dso,
-		.sym	= sym,
-		.ip	= ip,
-		.level	= level,
-		.count	= 1,
-	};
-	int cmp;
-
-	while (*p != NULL) {
-		parent = *p;
-		he = rb_entry(parent, struct hist_entry, rb_node);
-
-		cmp = hist_entry__cmp(&entry, he);
-
-		if (!cmp) {
-			hist_hit(he, ip);
-
-			return 0;
-		}
-
-		if (cmp < 0)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	he = malloc(sizeof(*he));
-	if (!he)
+	bool hit;
+	struct hist_entry *he = __hist_entry__add(al, NULL, count, &hit);
+	if (he == NULL)
 		return -ENOMEM;
-	*he = entry;
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &hist);
-
+	hist_hit(he, al->addr);
 	return 0;
 }
 
-static void hist_entry__free(struct hist_entry *he)
+static int process_sample_event(event_t *event)
 {
-	free(he);
-}
+	struct addr_location al;
 
-/*
- * collapse the histogram
- */
+	dump_printf("(IP, %d): %d: %p\n", event->header.misc,
+		    event->ip.pid, (void *)(long)event->ip.ip);
 
-static struct rb_root collapse_hists;
-
-static void collapse__insert_entry(struct hist_entry *he)
-{
-	struct rb_node **p = &collapse_hists.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *iter;
-	int64_t cmp;
-
-	while (*p != NULL) {
-		parent = *p;
-		iter = rb_entry(parent, struct hist_entry, rb_node);
-
-		cmp = hist_entry__collapse(iter, he);
-
-		if (!cmp) {
-			iter->count += he->count;
-			hist_entry__free(he);
-			return;
-		}
-
-		if (cmp < 0)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &collapse_hists);
-}
-
-static void collapse__resort(void)
-{
-	struct rb_node *next;
-	struct hist_entry *n;
-
-	if (!sort__need_collapse)
-		return;
-
-	next = rb_first(&hist);
-	while (next) {
-		n = rb_entry(next, struct hist_entry, rb_node);
-		next = rb_next(&n->rb_node);
-
-		rb_erase(&n->rb_node, &hist);
-		collapse__insert_entry(n);
-	}
-}
-
-/*
- * reverse the map, sort on count.
- */
-
-static struct rb_root output_hists;
-
-static void output__insert_entry(struct hist_entry *he)
-{
-	struct rb_node **p = &output_hists.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *iter;
-
-	while (*p != NULL) {
-		parent = *p;
-		iter = rb_entry(parent, struct hist_entry, rb_node);
-
-		if (he->count > iter->count)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &output_hists);
-}
-
-static void output__resort(void)
-{
-	struct rb_node *next;
-	struct hist_entry *n;
-	struct rb_root *tree = &hist;
-
-	if (sort__need_collapse)
-		tree = &collapse_hists;
-
-	next = rb_first(tree);
-
-	while (next) {
-		n = rb_entry(next, struct hist_entry, rb_node);
-		next = rb_next(&n->rb_node);
-
-		rb_erase(&n->rb_node, tree);
-		output__insert_entry(n);
-	}
-}
-
-static unsigned long total = 0,
-		     total_mmap = 0,
-		     total_comm = 0,
-		     total_fork = 0,
-		     total_unknown = 0;
-
-static int
-process_sample_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	char level;
-	int show = 0;
-	struct dso *dso = NULL;
-	struct thread *thread;
-	u64 ip = event->ip.ip;
-	struct map *map = NULL;
-
-	thread = threads__findnew(event->ip.pid, &threads, &last_match);
-
-	dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->header.misc,
-		event->ip.pid,
-		(void *)(long)ip);
-
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
-	if (thread == NULL) {
+	if (event__preprocess_sample(event, &al, symbol_filter) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 			event->header.type);
 		return -1;
 	}
 
-	if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
-		show = SHOW_KERNEL;
-		level = 'k';
-
-		dso = kernel_dso;
-
-		dump_printf(" ...... dso: %s\n", dso->name);
-
-	} else if (event->header.misc & PERF_RECORD_MISC_USER) {
-
-		show = SHOW_USER;
-		level = '.';
-
-		map = thread__find_map(thread, ip);
-		if (map != NULL) {
-			ip = map->map_ip(map, ip);
-			dso = map->dso;
-		} else {
-			/*
-			 * If this is outside of all known maps,
-			 * and is a negative address, try to look it
-			 * up in the kernel dso, as it might be a
-			 * vsyscall (which executes in user-mode):
-			 */
-			if ((long long)ip < 0)
-				dso = kernel_dso;
-		}
-		dump_printf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
-
-	} else {
-		show = SHOW_HV;
-		level = 'H';
-		dump_printf(" ...... dso: [hypervisor]\n");
-	}
-
-	if (show & show_mask) {
-		struct symbol *sym = NULL;
-
-		if (dso)
-			sym = dso->find_symbol(dso, ip);
-
-		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
-			fprintf(stderr,
-		"problem incrementing symbol count, skipping event\n");
-			return -1;
-		}
-	}
-	total++;
-
-	return 0;
-}
-
-static int
-process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread;
-	struct map *map = map__new(&event->mmap, NULL, 0);
-
-	thread = threads__findnew(event->mmap.pid, &threads, &last_match);
-
-	dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->mmap.pid,
-		(void *)(long)event->mmap.start,
-		(void *)(long)event->mmap.len,
-		(void *)(long)event->mmap.pgoff,
-		event->mmap.filename);
-
-	if (thread == NULL || map == NULL) {
-		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
-		return 0;
-	}
-
-	thread__insert_map(thread, map);
-	total_mmap++;
-
-	return 0;
-}
-
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread;
-
-	thread = threads__findnew(event->comm.pid, &threads, &last_match);
-	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->comm.comm, event->comm.pid);
-
-	if (thread == NULL ||
-	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
-		return -1;
-	}
-	total_comm++;
-
-	return 0;
-}
-
-static int
-process_fork_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread;
-	struct thread *parent;
-
-	thread = threads__findnew(event->fork.pid, &threads, &last_match);
-	parent = threads__findnew(event->fork.ppid, &threads, &last_match);
-	dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->fork.pid, event->fork.ppid);
-
-	/*
-	 * A thread clone will have the same PID for both
-	 * parent and child.
-	 */
-	if (thread == parent)
-		return 0;
-
-	if (!thread || !parent || thread__fork(thread, parent)) {
-		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
-		return -1;
-	}
-	total_fork++;
-
-	return 0;
-}
-
-static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	switch (event->header.type) {
-	case PERF_RECORD_SAMPLE:
-		return process_sample_event(event, offset, head);
-
-	case PERF_RECORD_MMAP:
-		return process_mmap_event(event, offset, head);
-
-	case PERF_RECORD_COMM:
-		return process_comm_event(event, offset, head);
-
-	case PERF_RECORD_FORK:
-		return process_fork_event(event, offset, head);
-	/*
-	 * We dont process them right now but they are fine:
-	 */
-
-	case PERF_RECORD_THROTTLE:
-	case PERF_RECORD_UNTHROTTLE:
-		return 0;
-
-	default:
+	if (hist_entry__add(&al, 1)) {
+		fprintf(stderr, "problem incrementing symbol count, "
+				"skipping event\n");
 		return -1;
 	}
 
 	return 0;
 }
 
-static int
-parse_line(FILE *file, struct symbol *sym, u64 start, u64 len)
+static int parse_line(FILE *file, struct hist_entry *he, u64 len)
 {
+	struct symbol *sym = he->sym;
 	char *line = NULL, *tmp, *tmp2;
 	static const char *prev_line;
 	static const char *prev_color;
 	unsigned int offset;
 	size_t line_len;
+	u64 start;
 	s64 line_ip;
 	int ret;
 	char *c;
@@ -709,22 +198,26 @@
 			line_ip = -1;
 	}
 
+	start = he->map->unmap_ip(he->map, sym->start);
+
 	if (line_ip != -1) {
 		const char *path = NULL;
 		unsigned int hits = 0;
 		double percent = 0.0;
 		const char *color;
-		struct sym_ext *sym_ext = sym->priv;
+		struct sym_priv *priv = symbol__priv(sym);
+		struct sym_ext *sym_ext = priv->ext;
+		struct sym_hist *h = priv->hist;
 
 		offset = line_ip - start;
 		if (offset < len)
-			hits = sym->hist[offset];
+			hits = h->ip[offset];
 
 		if (offset < len && sym_ext) {
 			path = sym_ext[offset].path;
 			percent = sym_ext[offset].percent;
-		} else if (sym->hist_sum)
-			percent = 100.0 * hits / sym->hist_sum;
+		} else if (h->sum)
+			percent = 100.0 * hits / h->sum;
 
 		color = get_percent_color(percent);
 
@@ -777,9 +270,10 @@
 	rb_insert_color(&sym_ext->node, &root_sym_ext);
 }
 
-static void free_source_line(struct symbol *sym, int len)
+static void free_source_line(struct hist_entry *he, int len)
 {
-	struct sym_ext *sym_ext = sym->priv;
+	struct sym_priv *priv = symbol__priv(he->sym);
+	struct sym_ext *sym_ext = priv->ext;
 	int i;
 
 	if (!sym_ext)
@@ -789,26 +283,30 @@
 		free(sym_ext[i].path);
 	free(sym_ext);
 
-	sym->priv = NULL;
+	priv->ext = NULL;
 	root_sym_ext = RB_ROOT;
 }
 
 /* Get the filename:line for the colored entries */
 static void
-get_source_line(struct symbol *sym, u64 start, int len, const char *filename)
+get_source_line(struct hist_entry *he, int len, const char *filename)
 {
+	struct symbol *sym = he->sym;
+	u64 start;
 	int i;
 	char cmd[PATH_MAX * 2];
 	struct sym_ext *sym_ext;
+	struct sym_priv *priv = symbol__priv(sym);
+	struct sym_hist *h = priv->hist;
 
-	if (!sym->hist_sum)
+	if (!h->sum)
 		return;
 
-	sym->priv = calloc(len, sizeof(struct sym_ext));
-	if (!sym->priv)
+	sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext));
+	if (!priv->ext)
 		return;
 
-	sym_ext = sym->priv;
+	start = he->map->unmap_ip(he->map, sym->start);
 
 	for (i = 0; i < len; i++) {
 		char *path = NULL;
@@ -816,7 +314,7 @@
 		u64 offset;
 		FILE *fp;
 
-		sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum;
+		sym_ext[i].percent = 100.0 * h->ip[i] / h->sum;
 		if (sym_ext[i].percent <= 0.5)
 			continue;
 
@@ -870,33 +368,34 @@
 	}
 }
 
-static void annotate_sym(struct dso *dso, struct symbol *sym)
+static void annotate_sym(struct hist_entry *he)
 {
-	const char *filename = dso->name, *d_filename;
-	u64 start, end, len;
+	struct map *map = he->map;
+	struct dso *dso = map->dso;
+	struct symbol *sym = he->sym;
+	const char *filename = dso->long_name, *d_filename;
+	u64 len;
 	char command[PATH_MAX*2];
 	FILE *file;
 
 	if (!filename)
 		return;
-	if (sym->module)
-		filename = sym->module->path;
-	else if (dso == kernel_dso)
-		filename = vmlinux_name;
 
-	start = sym->obj_start;
-	if (!start)
-		start = sym->start;
+	if (verbose)
+		fprintf(stderr, "%s: filename=%s, sym=%s, start=%Lx, end=%Lx\n",
+			__func__, filename, sym->name,
+			map->unmap_ip(map, sym->start),
+			map->unmap_ip(map, sym->end));
+
 	if (full_paths)
 		d_filename = filename;
 	else
 		d_filename = basename(filename);
 
-	end = start + sym->end - sym->start + 1;
 	len = sym->end - sym->start;
 
 	if (print_line) {
-		get_source_line(sym, start, len, filename);
+		get_source_line(he, len, filename);
 		print_summary(filename);
 	}
 
@@ -905,10 +404,12 @@
 	printf("------------------------------------------------\n");
 
 	if (verbose >= 2)
-		printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
+		printf("annotating [%p] %30s : [%p] %30s\n",
+		       dso, dso->long_name, sym, sym->name);
 
 	sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s",
-			(u64)start, (u64)end, filename, filename);
+		map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end),
+		filename, filename);
 
 	if (verbose >= 3)
 		printf("doing: %s\n", command);
@@ -918,159 +419,78 @@
 		return;
 
 	while (!feof(file)) {
-		if (parse_line(file, sym, start, len) < 0)
+		if (parse_line(file, he, len) < 0)
 			break;
 	}
 
 	pclose(file);
 	if (print_line)
-		free_source_line(sym, len);
+		free_source_line(he, len);
 }
 
 static void find_annotations(void)
 {
 	struct rb_node *nd;
-	struct dso *dso;
-	int count = 0;
 
-	list_for_each_entry(dso, &dsos, node) {
+	for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
+		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+		struct sym_priv *priv;
 
-		for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) {
-			struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+		if (he->sym == NULL)
+			continue;
 
-			if (sym->hist) {
-				annotate_sym(dso, sym);
-				count++;
-			}
-		}
+		priv = symbol__priv(he->sym);
+		if (priv->hist == NULL)
+			continue;
+
+		annotate_sym(he);
+		/*
+		 * Since we have a hist_entry per IP for the same symbol, free
+		 * he->sym->hist to signal we already processed this symbol.
+		 */
+		free(priv->hist);
+		priv->hist = NULL;
 	}
-
-	if (!count)
-		printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter);
 }
 
+static struct perf_file_handler file_handler = {
+	.process_sample_event	= process_sample_event,
+	.process_mmap_event	= event__process_mmap,
+	.process_comm_event	= event__process_comm,
+	.process_fork_event	= event__process_task,
+};
+
 static int __cmd_annotate(void)
 {
-	int ret, rc = EXIT_FAILURE;
-	unsigned long offset = 0;
-	unsigned long head = 0;
-	struct stat input_stat;
-	event_t *event;
-	uint32_t size;
-	char *buf;
+	struct perf_header *header;
+	struct thread *idle;
+	int ret;
 
-	register_idle_thread(&threads, &last_match);
+	idle = register_idle_thread();
+	register_perf_file_handler(&file_handler);
 
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		perror("failed to open file");
-		exit(-1);
-	}
+	ret = mmap_dispatch_perf_file(&header, input_name, 0, 0,
+				      &event__cwdlen, &event__cwd);
+	if (ret)
+		return ret;
 
-	ret = fstat(input, &input_stat);
-	if (ret < 0) {
-		perror("failed to stat file");
-		exit(-1);
-	}
-
-	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
-		fprintf(stderr, "file: %s not owned by current user or root\n", input_name);
-		exit(-1);
-	}
-
-	if (!input_stat.st_size) {
-		fprintf(stderr, "zero-sized file, nothing to do!\n");
-		exit(0);
-	}
-
-	if (load_kernel() < 0) {
-		perror("failed to load kernel symbols");
-		return EXIT_FAILURE;
-	}
-
-remap:
-	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-			   MAP_SHARED, input, offset);
-	if (buf == MAP_FAILED) {
-		perror("failed to mmap file");
-		exit(-1);
-	}
-
-more:
-	event = (event_t *)(buf + head);
-
-	size = event->header.size;
-	if (!size)
-		size = 8;
-
-	if (head + event->header.size >= page_size * mmap_window) {
-		unsigned long shift = page_size * (head / page_size);
-		int munmap_ret;
-
-		munmap_ret = munmap(buf, page_size * mmap_window);
-		assert(munmap_ret == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-	size = event->header.size;
-
-	dump_printf("%p [%p]: event: %d\n",
-			(void *)(offset + head),
-			(void *)(long)event->header.size,
-			event->header.type);
-
-	if (!size || process_event(event, offset, head) < 0) {
-
-		dump_printf("%p [%p]: skipping unknown header type: %d\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->header.type);
-
-		total_unknown++;
-
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
-	}
-
-	head += size;
-
-	if (offset + head < (unsigned long)input_stat.st_size)
-		goto more;
-
-	rc = EXIT_SUCCESS;
-	close(input);
-
-	dump_printf("      IP events: %10ld\n", total);
-	dump_printf("    mmap events: %10ld\n", total_mmap);
-	dump_printf("    comm events: %10ld\n", total_comm);
-	dump_printf("    fork events: %10ld\n", total_fork);
-	dump_printf(" unknown events: %10ld\n", total_unknown);
-
-	if (dump_trace)
+	if (dump_trace) {
+		event__print_totals();
 		return 0;
+	}
 
-	if (verbose >= 3)
-		threads__fprintf(stdout, &threads);
+	if (verbose > 3)
+		threads__fprintf(stdout);
 
-	if (verbose >= 2)
+	if (verbose > 2)
 		dsos__fprintf(stdout);
 
 	collapse__resort();
-	output__resort();
+	output__resort(event__total[0]);
 
 	find_annotations();
 
-	return rc;
+	return ret;
 }
 
 static const char * const annotate_usage[] = {
@@ -1088,8 +508,9 @@
 		    "be more verbose (show symbol address, etc)"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
-	OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
-	OPT_BOOLEAN('m', "modules", &modules,
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
 		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
 	OPT_BOOLEAN('l', "print-line", &print_line,
 		    "print matching source lines (may be slow)"),
@@ -1115,9 +536,8 @@
 
 int cmd_annotate(int argc, const char **argv, const char *prefix __used)
 {
-	symbol__init();
-
-	page_size = getpagesize();
+	if (symbol__init(&symbol_conf) < 0)
+		return -1;
 
 	argc = parse_options(argc, argv, options, annotate_usage, 0);
 
@@ -1134,10 +554,13 @@
 		sym_hist_filter = argv[0];
 	}
 
-	if (!sym_hist_filter)
-		usage_with_options(annotate_usage, options);
-
 	setup_pager();
 
+	if (field_sep && *field_sep == '.') {
+		fputs("'.' is the only non valid --field-separator argument\n",
+				stderr);
+		exit(129);
+	}
+
 	return __cmd_annotate();
 }
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
new file mode 100644
index 0000000..e043eb8
--- /dev/null
+++ b/tools/perf/builtin-bench.c
@@ -0,0 +1,196 @@
+/*
+ *
+ * builtin-bench.c
+ *
+ * General benchmarking subsystem provided by perf
+ *
+ * Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+/*
+ *
+ * Available subsystem list:
+ *  sched ... scheduler and IPC mechanism
+ *  mem   ... memory access performance
+ *
+ */
+
+#include "perf.h"
+#include "util/util.h"
+#include "util/parse-options.h"
+#include "builtin.h"
+#include "bench/bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct bench_suite {
+	const char *name;
+	const char *summary;
+	int (*fn)(int, const char **, const char *);
+};
+
+static struct bench_suite sched_suites[] = {
+	{ "messaging",
+	  "Benchmark for scheduler and IPC mechanisms",
+	  bench_sched_messaging },
+	{ "pipe",
+	  "Flood of communication over pipe() between two processes",
+	  bench_sched_pipe      },
+	{ NULL,
+	  NULL,
+	  NULL                  }
+};
+
+static struct bench_suite mem_suites[] = {
+	{ "memcpy",
+	  "Simple memory copy in various ways",
+	  bench_mem_memcpy },
+	{ NULL,
+	  NULL,
+	  NULL             }
+};
+
+struct bench_subsys {
+	const char *name;
+	const char *summary;
+	struct bench_suite *suites;
+};
+
+static struct bench_subsys subsystems[] = {
+	{ "sched",
+	  "scheduler and IPC mechanism",
+	  sched_suites },
+	{ "mem",
+	  "memory access performance",
+	  mem_suites },
+	{ NULL,
+	  NULL,
+	  NULL       }
+};
+
+static void dump_suites(int subsys_index)
+{
+	int i;
+
+	printf("List of available suites for %s...\n\n",
+	       subsystems[subsys_index].name);
+
+	for (i = 0; subsystems[subsys_index].suites[i].name; i++)
+		printf("\t%s: %s\n",
+		       subsystems[subsys_index].suites[i].name,
+		       subsystems[subsys_index].suites[i].summary);
+
+	printf("\n");
+	return;
+}
+
+static char *bench_format_str;
+int bench_format = BENCH_FORMAT_DEFAULT;
+
+static const struct option bench_options[] = {
+	OPT_STRING('f', "format", &bench_format_str, "default",
+		    "Specify format style"),
+	OPT_END()
+};
+
+static const char * const bench_usage[] = {
+	"perf bench [<common options>] <subsystem> <suite> [<options>]",
+	NULL
+};
+
+static void print_usage(void)
+{
+	int i;
+
+	printf("Usage: \n");
+	for (i = 0; bench_usage[i]; i++)
+		printf("\t%s\n", bench_usage[i]);
+	printf("\n");
+
+	printf("List of available subsystems...\n\n");
+
+	for (i = 0; subsystems[i].name; i++)
+		printf("\t%s: %s\n",
+		       subsystems[i].name, subsystems[i].summary);
+	printf("\n");
+}
+
+static int bench_str2int(char *str)
+{
+	if (!str)
+		return BENCH_FORMAT_DEFAULT;
+
+	if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
+		return BENCH_FORMAT_DEFAULT;
+	else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
+		return BENCH_FORMAT_SIMPLE;
+
+	return BENCH_FORMAT_UNKNOWN;
+}
+
+int cmd_bench(int argc, const char **argv, const char *prefix __used)
+{
+	int i, j, status = 0;
+
+	if (argc < 2) {
+		/* No subsystem specified. */
+		print_usage();
+		goto end;
+	}
+
+	argc = parse_options(argc, argv, bench_options, bench_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
+	bench_format = bench_str2int(bench_format_str);
+	if (bench_format == BENCH_FORMAT_UNKNOWN) {
+		printf("Unknown format descriptor:%s\n", bench_format_str);
+		goto end;
+	}
+
+	if (argc < 1) {
+		print_usage();
+		goto end;
+	}
+
+	for (i = 0; subsystems[i].name; i++) {
+		if (strcmp(subsystems[i].name, argv[0]))
+			continue;
+
+		if (argc < 2) {
+			/* No suite specified. */
+			dump_suites(i);
+			goto end;
+		}
+
+		for (j = 0; subsystems[i].suites[j].name; j++) {
+			if (strcmp(subsystems[i].suites[j].name, argv[1]))
+				continue;
+
+			if (bench_format == BENCH_FORMAT_DEFAULT)
+				printf("# Running %s/%s benchmark...\n",
+				       subsystems[i].name,
+				       subsystems[i].suites[j].name);
+			status = subsystems[i].suites[j].fn(argc - 1,
+							    argv + 1, prefix);
+			goto end;
+		}
+
+		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+			dump_suites(i);
+			goto end;
+		}
+
+		printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
+		status = 1;
+		goto end;
+	}
+
+	printf("Unknown subsystem:%s\n", argv[0]);
+	status = 1;
+
+end:
+	return status;
+}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
new file mode 100644
index 0000000..7dee9d19
--- /dev/null
+++ b/tools/perf/builtin-buildid-list.c
@@ -0,0 +1,116 @@
+/*
+ * builtin-buildid-list.c
+ *
+ * Builtin buildid-list command: list buildids in perf.data
+ *
+ * Copyright (C) 2009, Red Hat Inc.
+ * Copyright (C) 2009, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "builtin.h"
+#include "perf.h"
+#include "util/cache.h"
+#include "util/data_map.h"
+#include "util/debug.h"
+#include "util/header.h"
+#include "util/parse-options.h"
+#include "util/symbol.h"
+
+static char const *input_name = "perf.data";
+static int force;
+
+static const char *const buildid_list_usage[] = {
+	"perf report [<options>]",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose"),
+	OPT_END()
+};
+
+static int perf_file_section__process_buildids(struct perf_file_section *self,
+					       int feat, int fd)
+{
+	if (feat != HEADER_BUILD_ID)
+		return 0;
+
+	if (lseek(fd, self->offset, SEEK_SET) < 0) {
+		pr_warning("Failed to lseek to %Ld offset for buildids!\n",
+			   self->offset);
+		return -1;
+	}
+
+	if (perf_header__read_build_ids(fd, self->offset, self->size)) {
+		pr_warning("Failed to read buildids!\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int __cmd_buildid_list(void)
+{
+	int err = -1;
+	struct perf_header *header;
+	struct perf_file_header f_header;
+	struct stat input_stat;
+	int input = open(input_name, O_RDONLY);
+
+	if (input < 0) {
+		pr_err("failed to open file: %s", input_name);
+		if (!strcmp(input_name, "perf.data"))
+			pr_err("  (try 'perf record' first)");
+		pr_err("\n");
+		goto out;
+	}
+
+	err = fstat(input, &input_stat);
+	if (err < 0) {
+		perror("failed to stat file");
+		goto out_close;
+	}
+
+	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
+		pr_err("file %s not owned by current user or root\n",
+		       input_name);
+		goto out_close;
+	}
+
+	if (!input_stat.st_size) {
+		pr_info("zero-sized file, nothing to do!\n");
+		goto out_close;
+	}
+
+	err = -1;
+	header = perf_header__new();
+	if (header == NULL)
+		goto out_close;
+
+	if (perf_file_header__read(&f_header, header, input) < 0) {
+		pr_warning("incompatible file format");
+		goto out_close;
+	}
+
+	err = perf_header__process_sections(header, input,
+				         perf_file_section__process_buildids);
+
+	if (err < 0)
+		goto out_close;
+
+	dsos__fprintf_buildid(stdout);
+out_close:
+	close(input);
+out:
+	return err;
+}
+
+int cmd_buildid_list(int argc, const char **argv, const char *prefix __used)
+{
+	argc = parse_options(argc, argv, options, buildid_list_usage, 0);
+	setup_pager();
+	return __cmd_buildid_list();
+}
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 4fb8734..9f810b1 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -61,8 +61,7 @@
 {
 	struct man_viewer_info_list *viewer;
 
-	for (viewer = man_viewer_info_list; viewer; viewer = viewer->next)
-	{
+	for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) {
 		if (!strcasecmp(name, viewer->name))
 			return viewer->info;
 	}
@@ -115,7 +114,7 @@
 	return 0;
 }
 
-static void exec_woman_emacs(const char* path, const char *page)
+static void exec_woman_emacs(const char *path, const char *page)
 {
 	if (!check_emacsclient_version()) {
 		/* This works only with emacsclient version >= 22. */
@@ -129,7 +128,7 @@
 	}
 }
 
-static void exec_man_konqueror(const char* path, const char *page)
+static void exec_man_konqueror(const char *path, const char *page)
 {
 	const char *display = getenv("DISPLAY");
 	if (display && *display) {
@@ -157,7 +156,7 @@
 	}
 }
 
-static void exec_man_man(const char* path, const char *page)
+static void exec_man_man(const char *path, const char *page)
 {
 	if (!path)
 		path = "man";
@@ -180,7 +179,7 @@
 
 	while (*p)
 		p = &((*p)->next);
-	*p = calloc(1, (sizeof(**p) + len + 1));
+	*p = zalloc(sizeof(**p) + len + 1);
 	strncpy((*p)->name, name, len);
 }
 
@@ -195,7 +194,7 @@
 				   size_t len,
 				   const char *value)
 {
-	struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1);
+	struct man_viewer_info_list *new = zalloc(sizeof(*new) + len + 1);
 
 	strncpy(new->name, name, len);
 	new->info = strdup(value);
@@ -364,9 +363,8 @@
 
 	setup_man_path();
 	for (viewer = man_viewer_list; viewer; viewer = viewer->next)
-	{
 		exec_viewer(viewer->name, page); /* will return when unable */
-	}
+
 	if (fallback)
 		exec_viewer(fallback, page);
 	exec_viewer("man", page);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
new file mode 100644
index 0000000..047fef7
--- /dev/null
+++ b/tools/perf/builtin-kmem.c
@@ -0,0 +1,807 @@
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+
+#include "util/parse-options.h"
+#include "util/trace-event.h"
+
+#include "util/debug.h"
+#include "util/data_map.h"
+
+#include <linux/rbtree.h>
+
+struct alloc_stat;
+typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
+
+static char const		*input_name = "perf.data";
+
+static struct perf_header	*header;
+static u64			sample_type;
+
+static int			alloc_flag;
+static int			caller_flag;
+
+static int			alloc_lines = -1;
+static int			caller_lines = -1;
+
+static bool			raw_ip;
+
+static char			default_sort_order[] = "frag,hit,bytes";
+
+static int			*cpunode_map;
+static int			max_cpu_num;
+
+struct alloc_stat {
+	u64	call_site;
+	u64	ptr;
+	u64	bytes_req;
+	u64	bytes_alloc;
+	u32	hit;
+	u32	pingpong;
+
+	short	alloc_cpu;
+
+	struct rb_node node;
+};
+
+static struct rb_root root_alloc_stat;
+static struct rb_root root_alloc_sorted;
+static struct rb_root root_caller_stat;
+static struct rb_root root_caller_sorted;
+
+static unsigned long total_requested, total_allocated;
+static unsigned long nr_allocs, nr_cross_allocs;
+
+struct raw_event_sample {
+	u32 size;
+	char data[0];
+};
+
+#define PATH_SYS_NODE	"/sys/devices/system/node"
+
+static void init_cpunode_map(void)
+{
+	FILE *fp;
+	int i;
+
+	fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
+	if (!fp) {
+		max_cpu_num = 4096;
+		return;
+	}
+
+	if (fscanf(fp, "%d", &max_cpu_num) < 1)
+		die("Failed to read 'kernel_max' from sysfs");
+	max_cpu_num++;
+
+	cpunode_map = calloc(max_cpu_num, sizeof(int));
+	if (!cpunode_map)
+		die("calloc");
+	for (i = 0; i < max_cpu_num; i++)
+		cpunode_map[i] = -1;
+	fclose(fp);
+}
+
+static void setup_cpunode_map(void)
+{
+	struct dirent *dent1, *dent2;
+	DIR *dir1, *dir2;
+	unsigned int cpu, mem;
+	char buf[PATH_MAX];
+
+	init_cpunode_map();
+
+	dir1 = opendir(PATH_SYS_NODE);
+	if (!dir1)
+		return;
+
+	while (true) {
+		dent1 = readdir(dir1);
+		if (!dent1)
+			break;
+
+		if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+			continue;
+
+		snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
+		dir2 = opendir(buf);
+		if (!dir2)
+			continue;
+		while (true) {
+			dent2 = readdir(dir2);
+			if (!dent2)
+				break;
+			if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+				continue;
+			cpunode_map[cpu] = mem;
+		}
+	}
+}
+
+static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
+			      int bytes_req, int bytes_alloc, int cpu)
+{
+	struct rb_node **node = &root_alloc_stat.rb_node;
+	struct rb_node *parent = NULL;
+	struct alloc_stat *data = NULL;
+
+	while (*node) {
+		parent = *node;
+		data = rb_entry(*node, struct alloc_stat, node);
+
+		if (ptr > data->ptr)
+			node = &(*node)->rb_right;
+		else if (ptr < data->ptr)
+			node = &(*node)->rb_left;
+		else
+			break;
+	}
+
+	if (data && data->ptr == ptr) {
+		data->hit++;
+		data->bytes_req += bytes_req;
+		data->bytes_alloc += bytes_req;
+	} else {
+		data = malloc(sizeof(*data));
+		if (!data)
+			die("malloc");
+		data->ptr = ptr;
+		data->pingpong = 0;
+		data->hit = 1;
+		data->bytes_req = bytes_req;
+		data->bytes_alloc = bytes_alloc;
+
+		rb_link_node(&data->node, parent, node);
+		rb_insert_color(&data->node, &root_alloc_stat);
+	}
+	data->call_site = call_site;
+	data->alloc_cpu = cpu;
+}
+
+static void insert_caller_stat(unsigned long call_site,
+			      int bytes_req, int bytes_alloc)
+{
+	struct rb_node **node = &root_caller_stat.rb_node;
+	struct rb_node *parent = NULL;
+	struct alloc_stat *data = NULL;
+
+	while (*node) {
+		parent = *node;
+		data = rb_entry(*node, struct alloc_stat, node);
+
+		if (call_site > data->call_site)
+			node = &(*node)->rb_right;
+		else if (call_site < data->call_site)
+			node = &(*node)->rb_left;
+		else
+			break;
+	}
+
+	if (data && data->call_site == call_site) {
+		data->hit++;
+		data->bytes_req += bytes_req;
+		data->bytes_alloc += bytes_req;
+	} else {
+		data = malloc(sizeof(*data));
+		if (!data)
+			die("malloc");
+		data->call_site = call_site;
+		data->pingpong = 0;
+		data->hit = 1;
+		data->bytes_req = bytes_req;
+		data->bytes_alloc = bytes_alloc;
+
+		rb_link_node(&data->node, parent, node);
+		rb_insert_color(&data->node, &root_caller_stat);
+	}
+}
+
+static void process_alloc_event(struct raw_event_sample *raw,
+				struct event *event,
+				int cpu,
+				u64 timestamp __used,
+				struct thread *thread __used,
+				int node)
+{
+	unsigned long call_site;
+	unsigned long ptr;
+	int bytes_req;
+	int bytes_alloc;
+	int node1, node2;
+
+	ptr = raw_field_value(event, "ptr", raw->data);
+	call_site = raw_field_value(event, "call_site", raw->data);
+	bytes_req = raw_field_value(event, "bytes_req", raw->data);
+	bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
+
+	insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
+	insert_caller_stat(call_site, bytes_req, bytes_alloc);
+
+	total_requested += bytes_req;
+	total_allocated += bytes_alloc;
+
+	if (node) {
+		node1 = cpunode_map[cpu];
+		node2 = raw_field_value(event, "node", raw->data);
+		if (node1 != node2)
+			nr_cross_allocs++;
+	}
+	nr_allocs++;
+}
+
+static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
+static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
+
+static struct alloc_stat *search_alloc_stat(unsigned long ptr,
+					    unsigned long call_site,
+					    struct rb_root *root,
+					    sort_fn_t sort_fn)
+{
+	struct rb_node *node = root->rb_node;
+	struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
+
+	while (node) {
+		struct alloc_stat *data;
+		int cmp;
+
+		data = rb_entry(node, struct alloc_stat, node);
+
+		cmp = sort_fn(&key, data);
+		if (cmp < 0)
+			node = node->rb_left;
+		else if (cmp > 0)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+static void process_free_event(struct raw_event_sample *raw,
+			       struct event *event,
+			       int cpu,
+			       u64 timestamp __used,
+			       struct thread *thread __used)
+{
+	unsigned long ptr;
+	struct alloc_stat *s_alloc, *s_caller;
+
+	ptr = raw_field_value(event, "ptr", raw->data);
+
+	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
+	if (!s_alloc)
+		return;
+
+	if (cpu != s_alloc->alloc_cpu) {
+		s_alloc->pingpong++;
+
+		s_caller = search_alloc_stat(0, s_alloc->call_site,
+					     &root_caller_stat, callsite_cmp);
+		assert(s_caller);
+		s_caller->pingpong++;
+	}
+	s_alloc->alloc_cpu = -1;
+}
+
+static void
+process_raw_event(event_t *raw_event __used, void *more_data,
+		  int cpu, u64 timestamp, struct thread *thread)
+{
+	struct raw_event_sample *raw = more_data;
+	struct event *event;
+	int type;
+
+	type = trace_parse_common_type(raw->data);
+	event = trace_find_event(type);
+
+	if (!strcmp(event->name, "kmalloc") ||
+	    !strcmp(event->name, "kmem_cache_alloc")) {
+		process_alloc_event(raw, event, cpu, timestamp, thread, 0);
+		return;
+	}
+
+	if (!strcmp(event->name, "kmalloc_node") ||
+	    !strcmp(event->name, "kmem_cache_alloc_node")) {
+		process_alloc_event(raw, event, cpu, timestamp, thread, 1);
+		return;
+	}
+
+	if (!strcmp(event->name, "kfree") ||
+	    !strcmp(event->name, "kmem_cache_free")) {
+		process_free_event(raw, event, cpu, timestamp, thread);
+		return;
+	}
+}
+
+static int process_sample_event(event_t *event)
+{
+	u64 ip = event->ip.ip;
+	u64 timestamp = -1;
+	u32 cpu = -1;
+	u64 period = 1;
+	void *more_data = event->ip.__more_data;
+	struct thread *thread = threads__findnew(event->ip.pid);
+
+	if (sample_type & PERF_SAMPLE_TIME) {
+		timestamp = *(u64 *)more_data;
+		more_data += sizeof(u64);
+	}
+
+	if (sample_type & PERF_SAMPLE_CPU) {
+		cpu = *(u32 *)more_data;
+		more_data += sizeof(u32);
+		more_data += sizeof(u32); /* reserved */
+	}
+
+	if (sample_type & PERF_SAMPLE_PERIOD) {
+		period = *(u64 *)more_data;
+		more_data += sizeof(u64);
+	}
+
+	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
+		event->header.misc,
+		event->ip.pid, event->ip.tid,
+		(void *)(long)ip,
+		(long long)period);
+
+	if (thread == NULL) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			 event->header.type);
+		return -1;
+	}
+
+	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	process_raw_event(event, more_data, cpu, timestamp, thread);
+
+	return 0;
+}
+
+static int sample_type_check(u64 type)
+{
+	sample_type = type;
+
+	if (!(sample_type & PERF_SAMPLE_RAW)) {
+		fprintf(stderr,
+			"No trace sample to read. Did you call perf record "
+			"without -R?");
+		return -1;
+	}
+
+	return 0;
+}
+
+static struct perf_file_handler file_handler = {
+	.process_sample_event	= process_sample_event,
+	.process_comm_event	= event__process_comm,
+	.sample_type_check	= sample_type_check,
+};
+
+static int read_events(void)
+{
+	register_idle_thread();
+	register_perf_file_handler(&file_handler);
+
+	return mmap_dispatch_perf_file(&header, input_name, 0, 0,
+				       &event__cwdlen, &event__cwd);
+}
+
+static double fragmentation(unsigned long n_req, unsigned long n_alloc)
+{
+	if (n_alloc == 0)
+		return 0.0;
+	else
+		return 100.0 - (100.0 * n_req / n_alloc);
+}
+
+static void __print_result(struct rb_root *root, int n_lines, int is_caller)
+{
+	struct rb_node *next;
+
+	printf("%.102s\n", graph_dotted_line);
+	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
+	printf(" Total_alloc/Per | Total_req/Per   | Hit   | Ping-pong | Frag\n");
+	printf("%.102s\n", graph_dotted_line);
+
+	next = rb_first(root);
+
+	while (next && n_lines--) {
+		struct alloc_stat *data = rb_entry(next, struct alloc_stat,
+						   node);
+		struct symbol *sym = NULL;
+		char buf[BUFSIZ];
+		u64 addr;
+
+		if (is_caller) {
+			addr = data->call_site;
+			if (!raw_ip)
+				sym = thread__find_function(kthread, addr, NULL);
+		} else
+			addr = data->ptr;
+
+		if (sym != NULL)
+			snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
+				 addr - sym->start);
+		else
+			snprintf(buf, sizeof(buf), "%#Lx", addr);
+		printf(" %-34s |", buf);
+
+		printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
+		       (unsigned long long)data->bytes_alloc,
+		       (unsigned long)data->bytes_alloc / data->hit,
+		       (unsigned long long)data->bytes_req,
+		       (unsigned long)data->bytes_req / data->hit,
+		       (unsigned long)data->hit,
+		       (unsigned long)data->pingpong,
+		       fragmentation(data->bytes_req, data->bytes_alloc));
+
+		next = rb_next(next);
+	}
+
+	if (n_lines == -1)
+		printf(" ...                                | ...             | ...             | ...    | ...      | ...   \n");
+
+	printf("%.102s\n", graph_dotted_line);
+}
+
+static void print_summary(void)
+{
+	printf("\nSUMMARY\n=======\n");
+	printf("Total bytes requested: %lu\n", total_requested);
+	printf("Total bytes allocated: %lu\n", total_allocated);
+	printf("Total bytes wasted on internal fragmentation: %lu\n",
+	       total_allocated - total_requested);
+	printf("Internal fragmentation: %f%%\n",
+	       fragmentation(total_requested, total_allocated));
+	printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
+}
+
+static void print_result(void)
+{
+	if (caller_flag)
+		__print_result(&root_caller_sorted, caller_lines, 1);
+	if (alloc_flag)
+		__print_result(&root_alloc_sorted, alloc_lines, 0);
+	print_summary();
+}
+
+struct sort_dimension {
+	const char		name[20];
+	sort_fn_t		cmp;
+	struct list_head	list;
+};
+
+static LIST_HEAD(caller_sort);
+static LIST_HEAD(alloc_sort);
+
+static void sort_insert(struct rb_root *root, struct alloc_stat *data,
+			struct list_head *sort_list)
+{
+	struct rb_node **new = &(root->rb_node);
+	struct rb_node *parent = NULL;
+	struct sort_dimension *sort;
+
+	while (*new) {
+		struct alloc_stat *this;
+		int cmp = 0;
+
+		this = rb_entry(*new, struct alloc_stat, node);
+		parent = *new;
+
+		list_for_each_entry(sort, sort_list, list) {
+			cmp = sort->cmp(data, this);
+			if (cmp)
+				break;
+		}
+
+		if (cmp > 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
+			  struct list_head *sort_list)
+{
+	struct rb_node *node;
+	struct alloc_stat *data;
+
+	for (;;) {
+		node = rb_first(root);
+		if (!node)
+			break;
+
+		rb_erase(node, root);
+		data = rb_entry(node, struct alloc_stat, node);
+		sort_insert(root_sorted, data, sort_list);
+	}
+}
+
+static void sort_result(void)
+{
+	__sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
+	__sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
+}
+
+static int __cmd_kmem(void)
+{
+	setup_pager();
+	read_events();
+	sort_result();
+	print_result();
+
+	return 0;
+}
+
+static const char * const kmem_usage[] = {
+	"perf kmem [<options>] {record}",
+	NULL
+};
+
+static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+	if (l->ptr < r->ptr)
+		return -1;
+	else if (l->ptr > r->ptr)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension ptr_sort_dimension = {
+	.name	= "ptr",
+	.cmp	= ptr_cmp,
+};
+
+static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+	if (l->call_site < r->call_site)
+		return -1;
+	else if (l->call_site > r->call_site)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension callsite_sort_dimension = {
+	.name	= "callsite",
+	.cmp	= callsite_cmp,
+};
+
+static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+	if (l->hit < r->hit)
+		return -1;
+	else if (l->hit > r->hit)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension hit_sort_dimension = {
+	.name	= "hit",
+	.cmp	= hit_cmp,
+};
+
+static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+	if (l->bytes_alloc < r->bytes_alloc)
+		return -1;
+	else if (l->bytes_alloc > r->bytes_alloc)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension bytes_sort_dimension = {
+	.name	= "bytes",
+	.cmp	= bytes_cmp,
+};
+
+static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+	double x, y;
+
+	x = fragmentation(l->bytes_req, l->bytes_alloc);
+	y = fragmentation(r->bytes_req, r->bytes_alloc);
+
+	if (x < y)
+		return -1;
+	else if (x > y)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension frag_sort_dimension = {
+	.name	= "frag",
+	.cmp	= frag_cmp,
+};
+
+static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+	if (l->pingpong < r->pingpong)
+		return -1;
+	else if (l->pingpong > r->pingpong)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension pingpong_sort_dimension = {
+	.name	= "pingpong",
+	.cmp	= pingpong_cmp,
+};
+
+static struct sort_dimension *avail_sorts[] = {
+	&ptr_sort_dimension,
+	&callsite_sort_dimension,
+	&hit_sort_dimension,
+	&bytes_sort_dimension,
+	&frag_sort_dimension,
+	&pingpong_sort_dimension,
+};
+
+#define NUM_AVAIL_SORTS	\
+	(int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
+
+static int sort_dimension__add(const char *tok, struct list_head *list)
+{
+	struct sort_dimension *sort;
+	int i;
+
+	for (i = 0; i < NUM_AVAIL_SORTS; i++) {
+		if (!strcmp(avail_sorts[i]->name, tok)) {
+			sort = malloc(sizeof(*sort));
+			if (!sort)
+				die("malloc");
+			memcpy(sort, avail_sorts[i], sizeof(*sort));
+			list_add_tail(&sort->list, list);
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static int setup_sorting(struct list_head *sort_list, const char *arg)
+{
+	char *tok;
+	char *str = strdup(arg);
+
+	if (!str)
+		die("strdup");
+
+	while (true) {
+		tok = strsep(&str, ",");
+		if (!tok)
+			break;
+		if (sort_dimension__add(tok, sort_list) < 0) {
+			error("Unknown --sort key: '%s'", tok);
+			return -1;
+		}
+	}
+
+	free(str);
+	return 0;
+}
+
+static int parse_sort_opt(const struct option *opt __used,
+			  const char *arg, int unset __used)
+{
+	if (!arg)
+		return -1;
+
+	if (caller_flag > alloc_flag)
+		return setup_sorting(&caller_sort, arg);
+	else
+		return setup_sorting(&alloc_sort, arg);
+
+	return 0;
+}
+
+static int parse_stat_opt(const struct option *opt __used,
+			  const char *arg, int unset __used)
+{
+	if (!arg)
+		return -1;
+
+	if (strcmp(arg, "alloc") == 0)
+		alloc_flag = (caller_flag + 1);
+	else if (strcmp(arg, "caller") == 0)
+		caller_flag = (alloc_flag + 1);
+	else
+		return -1;
+	return 0;
+}
+
+static int parse_line_opt(const struct option *opt __used,
+			  const char *arg, int unset __used)
+{
+	int lines;
+
+	if (!arg)
+		return -1;
+
+	lines = strtoul(arg, NULL, 10);
+
+	if (caller_flag > alloc_flag)
+		caller_lines = lines;
+	else
+		alloc_lines = lines;
+
+	return 0;
+}
+
+static const struct option kmem_options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		   "input file name"),
+	OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
+		     "stat selector, Pass 'alloc' or 'caller'.",
+		     parse_stat_opt),
+	OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
+		     "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
+		     parse_sort_opt),
+	OPT_CALLBACK('l', "line", NULL, "num",
+		     "show n lins",
+		     parse_line_opt),
+	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
+	OPT_END()
+};
+
+static const char *record_args[] = {
+	"record",
+	"-a",
+	"-R",
+	"-M",
+	"-f",
+	"-c", "1",
+	"-e", "kmem:kmalloc",
+	"-e", "kmem:kmalloc_node",
+	"-e", "kmem:kfree",
+	"-e", "kmem:kmem_cache_alloc",
+	"-e", "kmem:kmem_cache_alloc_node",
+	"-e", "kmem:kmem_cache_free",
+};
+
+static int __cmd_record(int argc, const char **argv)
+{
+	unsigned int rec_argc, i, j;
+	const char **rec_argv;
+
+	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[i] = strdup(record_args[i]);
+
+	for (j = 1; j < (unsigned int)argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	return cmd_record(i, rec_argv, NULL);
+}
+
+int cmd_kmem(int argc, const char **argv, const char *prefix __used)
+{
+	symbol__init(0);
+
+	argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
+
+	if (argc && !strncmp(argv[0], "rec", 3))
+		return __cmd_record(argc, argv);
+	else if (argc)
+		usage_with_options(kmem_usage, kmem_options);
+
+	if (list_empty(&caller_sort))
+		setup_sorting(&caller_sort, default_sort_order);
+	if (list_empty(&alloc_sort))
+		setup_sorting(&alloc_sort, default_sort_order);
+
+	setup_cpunode_map();
+
+	return __cmd_kmem();
+}
+
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
new file mode 100644
index 0000000..a58e11b
--- /dev/null
+++ b/tools/perf/builtin-probe.c
@@ -0,0 +1,242 @@
+/*
+ * builtin-probe.c
+ *
+ * Builtin probe command: Set up probe events by C expression
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#define _GNU_SOURCE
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+#undef _GNU_SOURCE
+#include "perf.h"
+#include "builtin.h"
+#include "util/util.h"
+#include "util/event.h"
+#include "util/debug.h"
+#include "util/parse-options.h"
+#include "util/parse-events.h"	/* For debugfs_path */
+#include "util/probe-finder.h"
+#include "util/probe-event.h"
+
+/* Default vmlinux search paths */
+#define NR_SEARCH_PATH 3
+const char *default_search_path[NR_SEARCH_PATH] = {
+"/lib/modules/%s/build/vmlinux",		/* Custom build kernel */
+"/usr/lib/debug/lib/modules/%s/vmlinux",	/* Red Hat debuginfo */
+"/boot/vmlinux-debug-%s",			/* Ubuntu */
+};
+
+#define MAX_PATH_LEN 256
+#define MAX_PROBES 128
+
+/* Session management structure */
+static struct {
+	char *vmlinux;
+	char *release;
+	int need_dwarf;
+	int nr_probe;
+	struct probe_point probes[MAX_PROBES];
+} session;
+
+static bool listing;
+
+/* Parse an event definition. Note that any error must die. */
+static void parse_probe_event(const char *str)
+{
+	struct probe_point *pp = &session.probes[session.nr_probe];
+
+	pr_debug("probe-definition(%d): %s\n", session.nr_probe, str);
+	if (++session.nr_probe == MAX_PROBES)
+		die("Too many probes (> %d) are specified.", MAX_PROBES);
+
+	/* Parse perf-probe event into probe_point */
+	session.need_dwarf = parse_perf_probe_event(str, pp);
+
+	pr_debug("%d arguments\n", pp->nr_args);
+}
+
+static int opt_add_probe_event(const struct option *opt __used,
+			      const char *str, int unset __used)
+{
+	if (str)
+		parse_probe_event(str);
+	return 0;
+}
+
+#ifndef NO_LIBDWARF
+static int open_default_vmlinux(void)
+{
+	struct utsname uts;
+	char fname[MAX_PATH_LEN];
+	int fd, ret, i;
+
+	ret = uname(&uts);
+	if (ret) {
+		pr_debug("uname() failed.\n");
+		return -errno;
+	}
+	session.release = uts.release;
+	for (i = 0; i < NR_SEARCH_PATH; i++) {
+		ret = snprintf(fname, MAX_PATH_LEN,
+			       default_search_path[i], session.release);
+		if (ret >= MAX_PATH_LEN || ret < 0) {
+			pr_debug("Filename(%d,%s) is too long.\n", i,
+				uts.release);
+			errno = E2BIG;
+			return -E2BIG;
+		}
+		pr_debug("try to open %s\n", fname);
+		fd = open(fname, O_RDONLY);
+		if (fd >= 0)
+			break;
+	}
+	return fd;
+}
+#endif
+
+static const char * const probe_usage[] = {
+	"perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
+	"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
+	"perf probe --list",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show parsed arguments, etc)"),
+#ifndef NO_LIBDWARF
+	OPT_STRING('k', "vmlinux", &session.vmlinux, "file",
+		"vmlinux/module pathname"),
+#endif
+	OPT_BOOLEAN('l', "list", &listing, "list up current probes"),
+	OPT_CALLBACK('a', "add", NULL,
+#ifdef NO_LIBDWARF
+		"FUNC[+OFFS|%return] [ARG ...]",
+#else
+		"FUNC[+OFFS|%return|:RLN][@SRC]|SRC:ALN [ARG ...]",
+#endif
+		"probe point definition, where\n"
+		"\t\tGRP:\tGroup name (optional)\n"
+		"\t\tNAME:\tEvent name\n"
+		"\t\tFUNC:\tFunction name\n"
+		"\t\tOFFS:\tOffset from function entry (in byte)\n"
+		"\t\t%return:\tPut the probe at function return\n"
+#ifdef NO_LIBDWARF
+		"\t\tARG:\tProbe argument (only \n"
+#else
+		"\t\tSRC:\tSource code path\n"
+		"\t\tRLN:\tRelative line number from function entry.\n"
+		"\t\tALN:\tAbsolute line number in file.\n"
+		"\t\tARG:\tProbe argument (local variable name or\n"
+#endif
+		"\t\t\tkprobe-tracer argument format.)\n",
+		opt_add_probe_event),
+	OPT_END()
+};
+
+int cmd_probe(int argc, const char **argv, const char *prefix __used)
+{
+	int i, j, ret;
+#ifndef NO_LIBDWARF
+	int fd;
+#endif
+	struct probe_point *pp;
+
+	argc = parse_options(argc, argv, options, probe_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+	for (i = 0; i < argc; i++)
+		parse_probe_event(argv[i]);
+
+	if ((session.nr_probe == 0 && !listing) ||
+	    (session.nr_probe != 0 && listing))
+		usage_with_options(probe_usage, options);
+
+	if (listing) {
+		show_perf_probe_events();
+		return 0;
+	}
+
+	if (session.need_dwarf)
+#ifdef NO_LIBDWARF
+		die("Debuginfo-analysis is not supported");
+#else	/* !NO_LIBDWARF */
+		pr_debug("Some probes require debuginfo.\n");
+
+	if (session.vmlinux)
+		fd = open(session.vmlinux, O_RDONLY);
+	else
+		fd = open_default_vmlinux();
+	if (fd < 0) {
+		if (session.need_dwarf)
+			die("Could not open vmlinux/module file.");
+
+		pr_warning("Could not open vmlinux/module file."
+			   " Try to use symbols.\n");
+		goto end_dwarf;
+	}
+
+	/* Searching probe points */
+	for (j = 0; j < session.nr_probe; j++) {
+		pp = &session.probes[j];
+		if (pp->found)
+			continue;
+
+		lseek(fd, SEEK_SET, 0);
+		ret = find_probepoint(fd, pp);
+		if (ret < 0) {
+			if (session.need_dwarf)
+				die("Could not analyze debuginfo.");
+
+			pr_warning("An error occurred in debuginfo analysis. Try to use symbols.\n");
+			break;
+		}
+		if (ret == 0)	/* No error but failed to find probe point. */
+			die("No probe point found.");
+	}
+	close(fd);
+
+end_dwarf:
+#endif /* !NO_LIBDWARF */
+
+	/* Synthesize probes without dwarf */
+	for (j = 0; j < session.nr_probe; j++) {
+		pp = &session.probes[j];
+		if (pp->found)	/* This probe is already found. */
+			continue;
+
+		ret = synthesize_trace_kprobe_event(pp);
+		if (ret == -E2BIG)
+			die("probe point definition becomes too long.");
+		else if (ret < 0)
+			die("Failed to synthesize a probe point.");
+	}
+
+	/* Settng up probe points */
+	add_trace_kprobe_events(session.probes, session.nr_probe);
+	return 0;
+}
+
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a4be453..0e519c6 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -17,55 +17,52 @@
 #include "util/header.h"
 #include "util/event.h"
 #include "util/debug.h"
-#include "util/trace-event.h"
+#include "util/symbol.h"
 
 #include <unistd.h>
 #include <sched.h>
 
-#define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
-#define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
-
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
-static long			default_interval		= 100000;
+static long			default_interval		=      0;
 
-static int			nr_cpus				= 0;
+static int			nr_cpus				=      0;
 static unsigned int		page_size;
-static unsigned int		mmap_pages			= 128;
-static int			freq				= 0;
+static unsigned int		mmap_pages			=    128;
+static int			freq				=   1000;
 static int			output;
 static const char		*output_name			= "perf.data";
-static int			group				= 0;
-static unsigned int		realtime_prio			= 0;
-static int			raw_samples			= 0;
-static int			system_wide			= 0;
-static int			profile_cpu			= -1;
-static pid_t			target_pid			= -1;
-static pid_t			child_pid			= -1;
-static int			inherit				= 1;
-static int			force				= 0;
-static int			append_file			= 0;
-static int			call_graph			= 0;
-static int			inherit_stat			= 0;
-static int			no_samples			= 0;
-static int			sample_address			= 0;
-static int			multiplex			= 0;
-static int			multiplex_fd			= -1;
+static int			group				=      0;
+static unsigned int		realtime_prio			=      0;
+static int			raw_samples			=      0;
+static int			system_wide			=      0;
+static int			profile_cpu			=     -1;
+static pid_t			target_pid			=     -1;
+static pid_t			child_pid			=     -1;
+static int			inherit				=      1;
+static int			force				=      0;
+static int			append_file			=      0;
+static int			call_graph			=      0;
+static int			inherit_stat			=      0;
+static int			no_samples			=      0;
+static int			sample_address			=      0;
+static int			multiplex			=      0;
+static int			multiplex_fd			=     -1;
 
-static long			samples;
+static long			samples				=      0;
 static struct timeval		last_read;
 static struct timeval		this_read;
 
-static u64			bytes_written;
+static u64			bytes_written			=      0;
 
 static struct pollfd		event_array[MAX_NR_CPUS * MAX_COUNTERS];
 
-static int			nr_poll;
-static int			nr_cpu;
+static int			nr_poll				=      0;
+static int			nr_cpu				=      0;
 
-static int			file_new = 1;
+static int			file_new			=      1;
 
-struct perf_header		*header;
+struct perf_header		*header				=   NULL;
 
 struct mmap_data {
 	int			counter;
@@ -113,6 +110,24 @@
 	}
 }
 
+static void write_event(event_t *buf, size_t size)
+{
+	/*
+	* Add it to the list of DSOs, so that when we finish this
+	 * record session we can pick the available build-ids.
+	 */
+	if (buf->header.type == PERF_RECORD_MMAP)
+		dsos__findnew(buf->mmap.filename);
+
+	write_output(buf, size);
+}
+
+static int process_synthesized_event(event_t *event)
+{
+	write_event(event, event->header.size);
+	return 0;
+}
+
 static void mmap_read(struct mmap_data *md)
 {
 	unsigned int head = mmap_read_head(md);
@@ -161,14 +176,14 @@
 		size = md->mask + 1 - (old & md->mask);
 		old += size;
 
-		write_output(buf, size);
+		write_event(buf, size);
 	}
 
 	buf = &data[old & md->mask];
 	size = head - old;
 	old += size;
 
-	write_output(buf, size);
+	write_event(buf, size);
 
 	md->prev = old;
 	mmap_write_tail(md, old);
@@ -195,168 +210,6 @@
 	kill(getpid(), signr);
 }
 
-static pid_t pid_synthesize_comm_event(pid_t pid, int full)
-{
-	struct comm_event comm_ev;
-	char filename[PATH_MAX];
-	char bf[BUFSIZ];
-	FILE *fp;
-	size_t size = 0;
-	DIR *tasks;
-	struct dirent dirent, *next;
-	pid_t tgid = 0;
-
-	snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
-
-	fp = fopen(filename, "r");
-	if (fp == NULL) {
-		/*
-		 * We raced with a task exiting - just return:
-		 */
-		if (verbose)
-			fprintf(stderr, "couldn't open %s\n", filename);
-		return 0;
-	}
-
-	memset(&comm_ev, 0, sizeof(comm_ev));
-	while (!comm_ev.comm[0] || !comm_ev.pid) {
-		if (fgets(bf, sizeof(bf), fp) == NULL)
-			goto out_failure;
-
-		if (memcmp(bf, "Name:", 5) == 0) {
-			char *name = bf + 5;
-			while (*name && isspace(*name))
-				++name;
-			size = strlen(name) - 1;
-			memcpy(comm_ev.comm, name, size++);
-		} else if (memcmp(bf, "Tgid:", 5) == 0) {
-			char *tgids = bf + 5;
-			while (*tgids && isspace(*tgids))
-				++tgids;
-			tgid = comm_ev.pid = atoi(tgids);
-		}
-	}
-
-	comm_ev.header.type = PERF_RECORD_COMM;
-	size = ALIGN(size, sizeof(u64));
-	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
-
-	if (!full) {
-		comm_ev.tid = pid;
-
-		write_output(&comm_ev, comm_ev.header.size);
-		goto out_fclose;
-	}
-
-	snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
-
-	tasks = opendir(filename);
-	while (!readdir_r(tasks, &dirent, &next) && next) {
-		char *end;
-		pid = strtol(dirent.d_name, &end, 10);
-		if (*end)
-			continue;
-
-		comm_ev.tid = pid;
-
-		write_output(&comm_ev, comm_ev.header.size);
-	}
-	closedir(tasks);
-
-out_fclose:
-	fclose(fp);
-	return tgid;
-
-out_failure:
-	fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
-		filename);
-	exit(EXIT_FAILURE);
-}
-
-static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
-{
-	char filename[PATH_MAX];
-	FILE *fp;
-
-	snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
-
-	fp = fopen(filename, "r");
-	if (fp == NULL) {
-		/*
-		 * We raced with a task exiting - just return:
-		 */
-		if (verbose)
-			fprintf(stderr, "couldn't open %s\n", filename);
-		return;
-	}
-	while (1) {
-		char bf[BUFSIZ], *pbf = bf;
-		struct mmap_event mmap_ev = {
-			.header = { .type = PERF_RECORD_MMAP },
-		};
-		int n;
-		size_t size;
-		if (fgets(bf, sizeof(bf), fp) == NULL)
-			break;
-
-		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = hex2u64(pbf, &mmap_ev.start);
-		if (n < 0)
-			continue;
-		pbf += n + 1;
-		n = hex2u64(pbf, &mmap_ev.len);
-		if (n < 0)
-			continue;
-		pbf += n + 3;
-		if (*pbf == 'x') { /* vm_exec */
-			char *execname = strchr(bf, '/');
-
-			/* Catch VDSO */
-			if (execname == NULL)
-				execname = strstr(bf, "[vdso]");
-
-			if (execname == NULL)
-				continue;
-
-			size = strlen(execname);
-			execname[size - 1] = '\0'; /* Remove \n */
-			memcpy(mmap_ev.filename, execname, size);
-			size = ALIGN(size, sizeof(u64));
-			mmap_ev.len -= mmap_ev.start;
-			mmap_ev.header.size = (sizeof(mmap_ev) -
-					       (sizeof(mmap_ev.filename) - size));
-			mmap_ev.pid = tgid;
-			mmap_ev.tid = pid;
-
-			write_output(&mmap_ev, mmap_ev.header.size);
-		}
-	}
-
-	fclose(fp);
-}
-
-static void synthesize_all(void)
-{
-	DIR *proc;
-	struct dirent dirent, *next;
-
-	proc = opendir("/proc");
-
-	while (!readdir_r(proc, &dirent, &next) && next) {
-		char *end;
-		pid_t pid, tgid;
-
-		pid = strtol(dirent.d_name, &end, 10);
-		if (*end) /* only interested in proper numerical dirents */
-			continue;
-
-		tgid = pid_synthesize_comm_event(pid, 1);
-		pid_synthesize_mmap_samples(pid, tgid);
-	}
-
-	closedir(proc);
-}
-
 static int group_fd;
 
 static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
@@ -367,7 +220,11 @@
 		h_attr = header->attr[nr];
 	} else {
 		h_attr = perf_header_attr__new(a);
-		perf_header__add_attr(header, h_attr);
+		if (h_attr != NULL)
+			if (perf_header__add_attr(header, h_attr) < 0) {
+				perf_header_attr__delete(h_attr);
+				h_attr = NULL;
+			}
 	}
 
 	return h_attr;
@@ -375,9 +232,11 @@
 
 static void create_counter(int counter, int cpu, pid_t pid)
 {
+	char *filter = filters[counter];
 	struct perf_event_attr *attr = attrs + counter;
 	struct perf_header_attr *h_attr;
 	int track = !counter; /* only the first counter needs these */
+	int ret;
 	struct {
 		u64 count;
 		u64 time_enabled;
@@ -448,11 +307,19 @@
 		printf("\n");
 		error("perfcounter syscall returned with %d (%s)\n",
 			fd[nr_cpu][counter], strerror(err));
+
+#if defined(__i386__) || defined(__x86_64__)
+		if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
+			die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n");
+#endif
+
 		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 		exit(-1);
 	}
 
 	h_attr = get_header_attr(attr, counter);
+	if (h_attr == NULL)
+		die("nomem\n");
 
 	if (!file_new) {
 		if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
@@ -466,7 +333,10 @@
 		exit(-1);
 	}
 
-	perf_header_attr__add_id(h_attr, read_data.id);
+	if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
+		pr_warning("Not enough memory to add id\n");
+		exit(-1);
+	}
 
 	assert(fd[nr_cpu][counter] >= 0);
 	fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
@@ -480,7 +350,6 @@
 		multiplex_fd = fd[nr_cpu][counter];
 
 	if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
-		int ret;
 
 		ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd);
 		assert(ret != -1);
@@ -500,6 +369,16 @@
 		}
 	}
 
+	if (filter != NULL) {
+		ret = ioctl(fd[nr_cpu][counter],
+			    PERF_EVENT_IOC_SET_FILTER, filter);
+		if (ret) {
+			error("failed to set filter with %d (%s)\n", errno,
+			      strerror(errno));
+			exit(-1);
+		}
+	}
+
 	ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE);
 }
 
@@ -518,7 +397,7 @@
 {
 	header->data_size += bytes_written;
 
-	perf_header__write(header, output);
+	perf_header__write(header, output, true);
 }
 
 static int __cmd_record(int argc, const char **argv)
@@ -527,7 +406,7 @@
 	struct stat st;
 	pid_t pid = 0;
 	int flags;
-	int ret;
+	int err;
 	unsigned long waking = 0;
 
 	page_size = sysconf(_SC_PAGE_SIZE);
@@ -561,22 +440,29 @@
 		exit(-1);
 	}
 
-	if (!file_new)
-		header = perf_header__read(output);
-	else
-		header = perf_header__new();
+	header = perf_header__new();
+	if (header == NULL) {
+		pr_err("Not enough memory for reading perf file header\n");
+		return -1;
+	}
 
+	if (!file_new) {
+		err = perf_header__read(header, output);
+		if (err < 0)
+			return err;
+	}
 
 	if (raw_samples) {
-		read_tracing_data(attrs, nr_counters);
+		perf_header__set_feat(header, HEADER_TRACE_INFO);
 	} else {
 		for (i = 0; i < nr_counters; i++) {
 			if (attrs[i].sample_type & PERF_SAMPLE_RAW) {
-				read_tracing_data(attrs, nr_counters);
+				perf_header__set_feat(header, HEADER_TRACE_INFO);
 				break;
 			}
 		}
 	}
+
 	atexit(atexit_header);
 
 	if (!system_wide) {
@@ -594,25 +480,36 @@
 		}
 	}
 
-	if (file_new)
-		perf_header__write(header, output);
+	if (file_new) {
+		err = perf_header__write(header, output, false);
+		if (err < 0)
+			return err;
+	}
 
-	if (!system_wide) {
-		pid_t tgid = pid_synthesize_comm_event(pid, 0);
-		pid_synthesize_mmap_samples(pid, tgid);
-	} else
-		synthesize_all();
+	if (!system_wide)
+		event__synthesize_thread(pid, process_synthesized_event);
+	else
+		event__synthesize_threads(process_synthesized_event);
 
 	if (target_pid == -1 && argc) {
 		pid = fork();
 		if (pid < 0)
-			perror("failed to fork");
+			die("failed to fork");
 
 		if (!pid) {
 			if (execvp(argv[0], (char **)argv)) {
 				perror(argv[0]);
 				exit(-1);
 			}
+		} else {
+			/*
+			 * Wait a bit for the execv'ed child to appear
+			 * and be updated in /proc
+			 * FIXME: Do you know a less heuristical solution?
+			 */
+			usleep(1000);
+			event__synthesize_thread(pid,
+						 process_synthesized_event);
 		}
 
 		child_pid = pid;
@@ -623,7 +520,7 @@
 
 		param.sched_priority = realtime_prio;
 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
-			printf("Could not set realtime priority.\n");
+			pr_err("Could not set realtime priority.\n");
 			exit(-1);
 		}
 	}
@@ -641,7 +538,7 @@
 		if (hits == samples) {
 			if (done)
 				break;
-			ret = poll(event_array, nr_poll, -1);
+			err = poll(event_array, nr_poll, -1);
 			waking++;
 		}
 
@@ -677,6 +574,8 @@
 	OPT_CALLBACK('e', "event", NULL, "event",
 		     "event selector. use 'perf list' to list available events",
 		     parse_events),
+	OPT_CALLBACK(0, "filter", NULL, "filter",
+		     "event filter", parse_filter),
 	OPT_INTEGER('p', "pid", &target_pid,
 		    "record events on existing pid"),
 	OPT_INTEGER('r', "realtime", &realtime_prio,
@@ -720,6 +619,8 @@
 {
 	int counter;
 
+	symbol__init(0);
+
 	argc = parse_options(argc, argv, options, record_usage,
 		PARSE_OPT_STOP_AT_NON_OPTION);
 	if (!argc && target_pid == -1 && !system_wide)
@@ -731,6 +632,18 @@
 		attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
 	}
 
+	/*
+	 * User specified count overrides default frequency.
+	 */
+	if (default_interval)
+		freq = 0;
+	else if (freq) {
+		default_interval = freq;
+	} else {
+		fprintf(stderr, "frequency and count are zero, aborting\n");
+		exit(EXIT_FAILURE);
+	}
+
 	for (counter = 0; counter < nr_counters; counter++) {
 		if (attrs[counter].sample_period)
 			continue;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 19669c2..383c4ab 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -26,20 +26,18 @@
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 
+#include "util/data_map.h"
 #include "util/thread.h"
+#include "util/sort.h"
+#include "util/hist.h"
 
 static char		const *input_name = "perf.data";
 
-static char		default_sort_order[] = "comm,dso,symbol";
-static char		*sort_order = default_sort_order;
 static char		*dso_list_str, *comm_list_str, *sym_list_str,
 			*col_width_list_str;
 static struct strlist	*dso_list, *comm_list, *sym_list;
-static char		*field_sep;
 
 static int		force;
-static int		input;
-static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static int		full_paths;
 static int		show_nr_samples;
@@ -50,374 +48,38 @@
 static char		default_pretty_printing_style[] = "normal";
 static char		*pretty_printing_style = default_pretty_printing_style;
 
-static unsigned long	page_size;
-static unsigned long	mmap_window = 32;
-
-static char		default_parent_pattern[] = "^sys_|^do_page_fault";
-static char		*parent_pattern = default_parent_pattern;
-static regex_t		parent_regex;
-
 static int		exclude_other = 1;
 
 static char		callchain_default_opt[] = "fractal,0.5";
 
-static int		callchain;
-
-static char		__cwd[PATH_MAX];
-static char		*cwd = __cwd;
-static int		cwdlen;
-
-static struct rb_root	threads;
-static struct thread	*last_match;
-
 static struct perf_header *header;
 
-static
-struct callchain_param	callchain_param = {
-	.mode	= CHAIN_GRAPH_REL,
-	.min_percent = 0.5
-};
-
 static u64		sample_type;
 
-static int repsep_fprintf(FILE *fp, const char *fmt, ...)
-{
-	int n;
-	va_list ap;
+struct symbol_conf	symbol_conf;
 
-	va_start(ap, fmt);
-	if (!field_sep)
-		n = vfprintf(fp, fmt, ap);
-	else {
-		char *bf = NULL;
-		n = vasprintf(&bf, fmt, ap);
-		if (n > 0) {
-			char *sep = bf;
-
-			while (1) {
-				sep = strchr(sep, *field_sep);
-				if (sep == NULL)
-					break;
-				*sep = '.';
-			}
-		}
-		fputs(bf, fp);
-		free(bf);
-	}
-	va_end(ap);
-	return n;
-}
-
-static unsigned int dsos__col_width,
-		    comms__col_width,
-		    threads__col_width;
-
-/*
- * histogram, sorted on item, collects counts
- */
-
-static struct rb_root hist;
-
-struct hist_entry {
-	struct rb_node		rb_node;
-
-	struct thread		*thread;
-	struct map		*map;
-	struct dso		*dso;
-	struct symbol		*sym;
-	struct symbol		*parent;
-	u64			ip;
-	char			level;
-	struct callchain_node	callchain;
-	struct rb_root		sorted_chain;
-
-	u64			count;
-};
-
-/*
- * configurable sorting bits
- */
-
-struct sort_entry {
-	struct list_head list;
-
-	const char *header;
-
-	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
-	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
-	size_t	(*print)(FILE *fp, struct hist_entry *, unsigned int width);
-	unsigned int *width;
-	bool	elide;
-};
-
-static int64_t cmp_null(void *l, void *r)
-{
-	if (!l && !r)
-		return 0;
-	else if (!l)
-		return -1;
-	else
-		return 1;
-}
-
-/* --sort pid */
-
-static int64_t
-sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return right->thread->pid - left->thread->pid;
-}
 
 static size_t
-sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width)
+callchain__fprintf_left_margin(FILE *fp, int left_margin)
 {
-	return repsep_fprintf(fp, "%*s:%5d", width - 6,
-			      self->thread->comm ?: "", self->thread->pid);
-}
+	int i;
+	int ret;
 
-static struct sort_entry sort_thread = {
-	.header = "Command:  Pid",
-	.cmp	= sort__thread_cmp,
-	.print	= sort__thread_print,
-	.width	= &threads__col_width,
-};
+	ret = fprintf(fp, "            ");
 
-/* --sort comm */
-
-static int64_t
-sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return right->thread->pid - left->thread->pid;
-}
-
-static int64_t
-sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
-{
-	char *comm_l = left->thread->comm;
-	char *comm_r = right->thread->comm;
-
-	if (!comm_l || !comm_r)
-		return cmp_null(comm_l, comm_r);
-
-	return strcmp(comm_l, comm_r);
-}
-
-static size_t
-sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width)
-{
-	return repsep_fprintf(fp, "%*s", width, self->thread->comm);
-}
-
-static struct sort_entry sort_comm = {
-	.header		= "Command",
-	.cmp		= sort__comm_cmp,
-	.collapse	= sort__comm_collapse,
-	.print		= sort__comm_print,
-	.width		= &comms__col_width,
-};
-
-/* --sort dso */
-
-static int64_t
-sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	struct dso *dso_l = left->dso;
-	struct dso *dso_r = right->dso;
-
-	if (!dso_l || !dso_r)
-		return cmp_null(dso_l, dso_r);
-
-	return strcmp(dso_l->name, dso_r->name);
-}
-
-static size_t
-sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width)
-{
-	if (self->dso)
-		return repsep_fprintf(fp, "%-*s", width, self->dso->name);
-
-	return repsep_fprintf(fp, "%*llx", width, (u64)self->ip);
-}
-
-static struct sort_entry sort_dso = {
-	.header = "Shared Object",
-	.cmp	= sort__dso_cmp,
-	.print	= sort__dso_print,
-	.width	= &dsos__col_width,
-};
-
-/* --sort symbol */
-
-static int64_t
-sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	u64 ip_l, ip_r;
-
-	if (left->sym == right->sym)
-		return 0;
-
-	ip_l = left->sym ? left->sym->start : left->ip;
-	ip_r = right->sym ? right->sym->start : right->ip;
-
-	return (int64_t)(ip_r - ip_l);
-}
-
-static size_t
-sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
-{
-	size_t ret = 0;
-
-	if (verbose)
-		ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip,
-				      dso__symtab_origin(self->dso));
-
-	ret += repsep_fprintf(fp, "[%c] ", self->level);
-	if (self->sym) {
-		ret += repsep_fprintf(fp, "%s", self->sym->name);
-
-		if (self->sym->module)
-			ret += repsep_fprintf(fp, "\t[%s]",
-					     self->sym->module->name);
-	} else {
-		ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip);
-	}
+	for (i = 0; i < left_margin; i++)
+		ret += fprintf(fp, " ");
 
 	return ret;
 }
 
-static struct sort_entry sort_sym = {
-	.header = "Symbol",
-	.cmp	= sort__sym_cmp,
-	.print	= sort__sym_print,
-};
-
-/* --sort parent */
-
-static int64_t
-sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	struct symbol *sym_l = left->parent;
-	struct symbol *sym_r = right->parent;
-
-	if (!sym_l || !sym_r)
-		return cmp_null(sym_l, sym_r);
-
-	return strcmp(sym_l->name, sym_r->name);
-}
-
-static size_t
-sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width)
-{
-	return repsep_fprintf(fp, "%-*s", width,
-			      self->parent ? self->parent->name : "[other]");
-}
-
-static unsigned int parent_symbol__col_width;
-
-static struct sort_entry sort_parent = {
-	.header = "Parent symbol",
-	.cmp	= sort__parent_cmp,
-	.print	= sort__parent_print,
-	.width	= &parent_symbol__col_width,
-};
-
-static int sort__need_collapse = 0;
-static int sort__has_parent = 0;
-
-struct sort_dimension {
-	const char		*name;
-	struct sort_entry	*entry;
-	int			taken;
-};
-
-static struct sort_dimension sort_dimensions[] = {
-	{ .name = "pid",	.entry = &sort_thread,	},
-	{ .name = "comm",	.entry = &sort_comm,	},
-	{ .name = "dso",	.entry = &sort_dso,	},
-	{ .name = "symbol",	.entry = &sort_sym,	},
-	{ .name = "parent",	.entry = &sort_parent,	},
-};
-
-static LIST_HEAD(hist_entry__sort_list);
-
-static int sort_dimension__add(const char *tok)
-{
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
-		struct sort_dimension *sd = &sort_dimensions[i];
-
-		if (sd->taken)
-			continue;
-
-		if (strncasecmp(tok, sd->name, strlen(tok)))
-			continue;
-
-		if (sd->entry->collapse)
-			sort__need_collapse = 1;
-
-		if (sd->entry == &sort_parent) {
-			int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
-			if (ret) {
-				char err[BUFSIZ];
-
-				regerror(ret, &parent_regex, err, sizeof(err));
-				fprintf(stderr, "Invalid regex: %s\n%s",
-					parent_pattern, err);
-				exit(-1);
-			}
-			sort__has_parent = 1;
-		}
-
-		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
-		sd->taken = 1;
-
-		return 0;
-	}
-
-	return -ESRCH;
-}
-
-static int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	struct sort_entry *se;
-	int64_t cmp = 0;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		cmp = se->cmp(left, right);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
-}
-
-static int64_t
-hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
-{
-	struct sort_entry *se;
-	int64_t cmp = 0;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		int64_t (*f)(struct hist_entry *, struct hist_entry *);
-
-		f = se->collapse ?: se->cmp;
-
-		cmp = f(left, right);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
-}
-
-static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask)
+static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
+					  int left_margin)
 {
 	int i;
 	size_t ret = 0;
 
-	ret += fprintf(fp, "%s", "                ");
+	ret += callchain__fprintf_left_margin(fp, left_margin);
 
 	for (i = 0; i < depth; i++)
 		if (depth_mask & (1 << i))
@@ -432,12 +94,12 @@
 static size_t
 ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
 		       int depth_mask, int count, u64 total_samples,
-		       int hits)
+		       int hits, int left_margin)
 {
 	int i;
 	size_t ret = 0;
 
-	ret += fprintf(fp, "%s", "                ");
+	ret += callchain__fprintf_left_margin(fp, left_margin);
 	for (i = 0; i < depth; i++) {
 		if (depth_mask & (1 << i))
 			ret += fprintf(fp, "|");
@@ -475,8 +137,9 @@
 }
 
 static size_t
-callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
-			u64 total_samples, int depth, int depth_mask)
+__callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
+			   u64 total_samples, int depth, int depth_mask,
+			   int left_margin)
 {
 	struct rb_node *node, *next;
 	struct callchain_node *child;
@@ -517,7 +180,8 @@
 		 * But we keep the older depth mask for the line seperator
 		 * to keep the level link until we reach the last child
 		 */
-		ret += ipchain__fprintf_graph_line(fp, depth, depth_mask);
+		ret += ipchain__fprintf_graph_line(fp, depth, depth_mask,
+						   left_margin);
 		i = 0;
 		list_for_each_entry(chain, &child->val, list) {
 			if (chain->ip >= PERF_CONTEXT_MAX)
@@ -525,11 +189,13 @@
 			ret += ipchain__fprintf_graph(fp, chain, depth,
 						      new_depth_mask, i++,
 						      new_total,
-						      cumul);
+						      cumul,
+						      left_margin);
 		}
-		ret += callchain__fprintf_graph(fp, child, new_total,
-						depth + 1,
-						new_depth_mask | (1 << depth));
+		ret += __callchain__fprintf_graph(fp, child, new_total,
+						  depth + 1,
+						  new_depth_mask | (1 << depth),
+						  left_margin);
 		node = next;
 	}
 
@@ -543,12 +209,51 @@
 
 		ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
 					      new_depth_mask, 0, new_total,
-					      remaining);
+					      remaining, left_margin);
 	}
 
 	return ret;
 }
 
+
+static size_t
+callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
+			 u64 total_samples, int left_margin)
+{
+	struct callchain_list *chain;
+	bool printed = false;
+	int i = 0;
+	int ret = 0;
+
+	list_for_each_entry(chain, &self->val, list) {
+		if (chain->ip >= PERF_CONTEXT_MAX)
+			continue;
+
+		if (!i++ && sort__first_dimension == SORT_SYM)
+			continue;
+
+		if (!printed) {
+			ret += callchain__fprintf_left_margin(fp, left_margin);
+			ret += fprintf(fp, "|\n");
+			ret += callchain__fprintf_left_margin(fp, left_margin);
+			ret += fprintf(fp, "---");
+
+			left_margin += 3;
+			printed = true;
+		} else
+			ret += callchain__fprintf_left_margin(fp, left_margin);
+
+		if (chain->sym)
+			ret += fprintf(fp, " %s\n", chain->sym->name);
+		else
+			ret += fprintf(fp, " %p\n", (void *)(long)chain->ip);
+	}
+
+	ret += __callchain__fprintf_graph(fp, self, total_samples, 1, 1, left_margin);
+
+	return ret;
+}
+
 static size_t
 callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
 			u64 total_samples)
@@ -577,7 +282,7 @@
 
 static size_t
 hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
-			      u64 total_samples)
+			      u64 total_samples, int left_margin)
 {
 	struct rb_node *rb_node;
 	struct callchain_node *chain;
@@ -597,8 +302,8 @@
 			break;
 		case CHAIN_GRAPH_ABS: /* Falldown */
 		case CHAIN_GRAPH_REL:
-			ret += callchain__fprintf_graph(fp, chain,
-							total_samples, 1, 1);
+			ret += callchain__fprintf_graph(fp, chain, total_samples,
+							left_margin);
 		case CHAIN_NONE:
 		default:
 			break;
@@ -610,7 +315,6 @@
 	return ret;
 }
 
-
 static size_t
 hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
 {
@@ -644,8 +348,19 @@
 
 	ret += fprintf(fp, "\n");
 
-	if (callchain)
-		hist_entry_callchain__fprintf(fp, self, total_samples);
+	if (callchain) {
+		int left_margin = 0;
+
+		if (sort__first_dimension == SORT_COMM) {
+			se = list_first_entry(&hist_entry__sort_list, typeof(*se),
+						list);
+			left_margin = se->width ? *se->width : 0;
+			left_margin -= thread__comm_len(self->thread);
+		}
+
+		hist_entry_callchain__fprintf(fp, self, total_samples,
+					      left_margin);
+	}
 
 	return ret;
 }
@@ -693,63 +408,6 @@
 	return 0;
 }
 
-
-static struct symbol *
-resolve_symbol(struct thread *thread, struct map **mapp,
-	       struct dso **dsop, u64 *ipp)
-{
-	struct dso *dso = dsop ? *dsop : NULL;
-	struct map *map = mapp ? *mapp : NULL;
-	u64 ip = *ipp;
-
-	if (!thread)
-		return NULL;
-
-	if (dso)
-		goto got_dso;
-
-	if (map)
-		goto got_map;
-
-	map = thread__find_map(thread, ip);
-	if (map != NULL) {
-		/*
-		 * We have to do this here as we may have a dso
-		 * with no symbol hit that has a name longer than
-		 * the ones with symbols sampled.
-		 */
-		if (!sort_dso.elide && !map->dso->slen_calculated)
-			dso__calc_col_width(map->dso);
-
-		if (mapp)
-			*mapp = map;
-got_map:
-		ip = map->map_ip(map, ip);
-
-		dso = map->dso;
-	} else {
-		/*
-		 * If this is outside of all known maps,
-		 * and is a negative address, try to look it
-		 * up in the kernel dso, as it might be a
-		 * vsyscall (which executes in user-mode):
-		 */
-		if ((long long)ip < 0)
-		dso = kernel_dso;
-	}
-	dump_printf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
-	dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip);
-	*ipp  = ip;
-
-	if (dsop)
-		*dsop = dso;
-
-	if (!dso)
-		return NULL;
-got_dso:
-	return dso->find_symbol(dso, ip);
-}
-
 static int call__match(struct symbol *sym)
 {
 	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
@@ -758,11 +416,11 @@
 	return 0;
 }
 
-static struct symbol **
-resolve_callchain(struct thread *thread, struct map *map __used,
-		    struct ip_callchain *chain, struct hist_entry *entry)
+static struct symbol **resolve_callchain(struct thread *thread,
+					 struct ip_callchain *chain,
+					 struct symbol **parent)
 {
-	u64 context = PERF_CONTEXT_MAX;
+	u8 cpumode = PERF_RECORD_MISC_USER;
 	struct symbol **syms = NULL;
 	unsigned int i;
 
@@ -776,34 +434,31 @@
 
 	for (i = 0; i < chain->nr; i++) {
 		u64 ip = chain->ips[i];
-		struct dso *dso = NULL;
-		struct symbol *sym;
+		struct addr_location al;
 
 		if (ip >= PERF_CONTEXT_MAX) {
-			context = ip;
+			switch (ip) {
+			case PERF_CONTEXT_HV:
+				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break;
+			case PERF_CONTEXT_KERNEL:
+				cpumode = PERF_RECORD_MISC_KERNEL;	break;
+			case PERF_CONTEXT_USER:
+				cpumode = PERF_RECORD_MISC_USER;	break;
+			default:
+				break;
+			}
 			continue;
 		}
 
-		switch (context) {
-		case PERF_CONTEXT_HV:
-			dso = hypervisor_dso;
-			break;
-		case PERF_CONTEXT_KERNEL:
-			dso = kernel_dso;
-			break;
-		default:
-			break;
-		}
-
-		sym = resolve_symbol(thread, NULL, &dso, &ip);
-
-		if (sym) {
-			if (sort__has_parent && call__match(sym) &&
-			    !entry->parent)
-				entry->parent = sym;
+		thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
+					   ip, &al, NULL);
+		if (al.sym != NULL) {
+			if (sort__has_parent && !*parent &&
+			    call__match(al.sym))
+				*parent = al.sym;
 			if (!callchain)
 				break;
-			syms[i] = sym;
+			syms[i] = al.sym;
 		}
 	}
 
@@ -814,178 +469,33 @@
  * collect histogram counts
  */
 
-static int
-hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-		struct symbol *sym, u64 ip, struct ip_callchain *chain,
-		char level, u64 count)
+static int hist_entry__add(struct addr_location *al,
+			   struct ip_callchain *chain, u64 count)
 {
-	struct rb_node **p = &hist.rb_node;
-	struct rb_node *parent = NULL;
+	struct symbol **syms = NULL, *parent = NULL;
+	bool hit;
 	struct hist_entry *he;
-	struct symbol **syms = NULL;
-	struct hist_entry entry = {
-		.thread	= thread,
-		.map	= map,
-		.dso	= dso,
-		.sym	= sym,
-		.ip	= ip,
-		.level	= level,
-		.count	= count,
-		.parent = NULL,
-		.sorted_chain = RB_ROOT
-	};
-	int cmp;
 
 	if ((sort__has_parent || callchain) && chain)
-		syms = resolve_callchain(thread, map, chain, &entry);
+		syms = resolve_callchain(al->thread, chain, &parent);
 
-	while (*p != NULL) {
-		parent = *p;
-		he = rb_entry(parent, struct hist_entry, rb_node);
-
-		cmp = hist_entry__cmp(&entry, he);
-
-		if (!cmp) {
-			he->count += count;
-			if (callchain) {
-				append_chain(&he->callchain, chain, syms);
-				free(syms);
-			}
-			return 0;
-		}
-
-		if (cmp < 0)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	he = malloc(sizeof(*he));
-	if (!he)
+	he = __hist_entry__add(al, parent, count, &hit);
+	if (he == NULL)
 		return -ENOMEM;
-	*he = entry;
+
+	if (hit)
+		he->count += count;
+
 	if (callchain) {
-		callchain_init(&he->callchain);
+		if (!hit)
+			callchain_init(&he->callchain);
 		append_chain(&he->callchain, chain, syms);
 		free(syms);
 	}
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &hist);
 
 	return 0;
 }
 
-static void hist_entry__free(struct hist_entry *he)
-{
-	free(he);
-}
-
-/*
- * collapse the histogram
- */
-
-static struct rb_root collapse_hists;
-
-static void collapse__insert_entry(struct hist_entry *he)
-{
-	struct rb_node **p = &collapse_hists.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *iter;
-	int64_t cmp;
-
-	while (*p != NULL) {
-		parent = *p;
-		iter = rb_entry(parent, struct hist_entry, rb_node);
-
-		cmp = hist_entry__collapse(iter, he);
-
-		if (!cmp) {
-			iter->count += he->count;
-			hist_entry__free(he);
-			return;
-		}
-
-		if (cmp < 0)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &collapse_hists);
-}
-
-static void collapse__resort(void)
-{
-	struct rb_node *next;
-	struct hist_entry *n;
-
-	if (!sort__need_collapse)
-		return;
-
-	next = rb_first(&hist);
-	while (next) {
-		n = rb_entry(next, struct hist_entry, rb_node);
-		next = rb_next(&n->rb_node);
-
-		rb_erase(&n->rb_node, &hist);
-		collapse__insert_entry(n);
-	}
-}
-
-/*
- * reverse the map, sort on count.
- */
-
-static struct rb_root output_hists;
-
-static void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits)
-{
-	struct rb_node **p = &output_hists.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *iter;
-
-	if (callchain)
-		callchain_param.sort(&he->sorted_chain, &he->callchain,
-				      min_callchain_hits, &callchain_param);
-
-	while (*p != NULL) {
-		parent = *p;
-		iter = rb_entry(parent, struct hist_entry, rb_node);
-
-		if (he->count > iter->count)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &output_hists);
-}
-
-static void output__resort(u64 total_samples)
-{
-	struct rb_node *next;
-	struct hist_entry *n;
-	struct rb_root *tree = &hist;
-	u64 min_callchain_hits;
-
-	min_callchain_hits = total_samples * (callchain_param.min_percent / 100);
-
-	if (sort__need_collapse)
-		tree = &collapse_hists;
-
-	next = rb_first(tree);
-
-	while (next) {
-		n = rb_entry(next, struct hist_entry, rb_node);
-		next = rb_next(&n->rb_node);
-
-		rb_erase(&n->rb_node, tree);
-		output__insert_entry(n, min_callchain_hits);
-	}
-}
-
 static size_t output__fprintf(FILE *fp, u64 total_samples)
 {
 	struct hist_entry *pos;
@@ -1080,13 +590,6 @@
 	return ret;
 }
 
-static unsigned long total = 0,
-		     total_mmap = 0,
-		     total_comm = 0,
-		     total_fork = 0,
-		     total_unknown = 0,
-		     total_lost = 0;
-
 static int validate_chain(struct ip_callchain *chain, event_t *event)
 {
 	unsigned int chain_size;
@@ -1100,30 +603,22 @@
 	return 0;
 }
 
-static int
-process_sample_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_sample_event(event_t *event)
 {
-	char level;
-	int show = 0;
-	struct dso *dso = NULL;
-	struct thread *thread;
 	u64 ip = event->ip.ip;
 	u64 period = 1;
-	struct map *map = NULL;
 	void *more_data = event->ip.__more_data;
 	struct ip_callchain *chain = NULL;
 	int cpumode;
-
-	thread = threads__findnew(event->ip.pid, &threads, &last_match);
+	struct addr_location al;
+	struct thread *thread = threads__findnew(event->ip.pid);
 
 	if (sample_type & PERF_SAMPLE_PERIOD) {
 		period = *(u64 *)more_data;
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
+	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
 		event->header.misc,
 		event->ip.pid, event->ip.tid,
 		(void *)(long)ip,
@@ -1137,7 +632,8 @@
 		dump_printf("... chain: nr:%Lu\n", chain->nr);
 
 		if (validate_chain(chain, event) < 0) {
-			eprintf("call-chain problem with event, skipping it.\n");
+			pr_debug("call-chain problem with event, "
+				 "skipping it.\n");
 			return 0;
 		}
 
@@ -1147,163 +643,64 @@
 		}
 	}
 
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
 	if (thread == NULL) {
-		eprintf("problem processing %d event, skipping it.\n",
+		pr_debug("problem processing %d event, skipping it.\n",
 			event->header.type);
 		return -1;
 	}
 
+	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
 	if (comm_list && !strlist__has_entry(comm_list, thread->comm))
 		return 0;
 
 	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_RECORD_MISC_KERNEL) {
-		show = SHOW_KERNEL;
-		level = 'k';
+	thread__find_addr_location(thread, cpumode,
+				   MAP__FUNCTION, ip, &al, NULL);
+	/*
+	 * We have to do this here as we may have a dso with no symbol hit that
+	 * has a name longer than the ones with symbols sampled.
+	 */
+	if (al.map && !sort_dso.elide && !al.map->dso->slen_calculated)
+		dso__calc_col_width(al.map->dso);
 
-		dso = kernel_dso;
-
-		dump_printf(" ...... dso: %s\n", dso->name);
-
-	} else if (cpumode == PERF_RECORD_MISC_USER) {
-
-		show = SHOW_USER;
-		level = '.';
-
-	} else {
-		show = SHOW_HV;
-		level = 'H';
-
-		dso = hypervisor_dso;
-
-		dump_printf(" ...... dso: [hypervisor]\n");
-	}
-
-	if (show & show_mask) {
-		struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
-
-		if (dso_list && (!dso || !dso->name ||
-				 !strlist__has_entry(dso_list, dso->name)))
-			return 0;
-
-		if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
-			return 0;
-
-		if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
-			eprintf("problem incrementing symbol count, skipping event\n");
-			return -1;
-		}
-	}
-	total += period;
-
-	return 0;
-}
-
-static int
-process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread;
-	struct map *map = map__new(&event->mmap, cwd, cwdlen);
-
-	thread = threads__findnew(event->mmap.pid, &threads, &last_match);
-
-	dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->mmap.pid,
-		event->mmap.tid,
-		(void *)(long)event->mmap.start,
-		(void *)(long)event->mmap.len,
-		(void *)(long)event->mmap.pgoff,
-		event->mmap.filename);
-
-	if (thread == NULL || map == NULL) {
-		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
+	if (dso_list &&
+	    (!al.map || !al.map->dso ||
+	     !(strlist__has_entry(dso_list, al.map->dso->short_name) ||
+	       (al.map->dso->short_name != al.map->dso->long_name &&
+		strlist__has_entry(dso_list, al.map->dso->long_name)))))
 		return 0;
+
+	if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name))
+		return 0;
+
+	if (hist_entry__add(&al, chain, period)) {
+		pr_debug("problem incrementing symbol count, skipping event\n");
+		return -1;
 	}
 
-	thread__insert_map(thread, map);
-	total_mmap++;
+	event__stats.total += period;
 
 	return 0;
 }
 
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_comm_event(event_t *event)
 {
-	struct thread *thread;
+	struct thread *thread = threads__findnew(event->comm.pid);
 
-	thread = threads__findnew(event->comm.pid, &threads, &last_match);
-
-	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->comm.comm, event->comm.pid);
+	dump_printf(": %s:%d\n", event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm_adjust(thread, event->comm.comm)) {
 		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
-	total_comm++;
 
 	return 0;
 }
 
-static int
-process_task_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread;
-	struct thread *parent;
-
-	thread = threads__findnew(event->fork.pid, &threads, &last_match);
-	parent = threads__findnew(event->fork.ppid, &threads, &last_match);
-
-	dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT",
-		event->fork.pid, event->fork.tid,
-		event->fork.ppid, event->fork.ptid);
-
-	/*
-	 * A thread clone will have the same PID for both
-	 * parent and child.
-	 */
-	if (thread == parent)
-		return 0;
-
-	if (event->header.type == PERF_RECORD_EXIT)
-		return 0;
-
-	if (!thread || !parent || thread__fork(thread, parent)) {
-		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
-		return -1;
-	}
-	total_fork++;
-
-	return 0;
-}
-
-static int
-process_lost_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->lost.id,
-		event->lost.lost);
-
-	total_lost += event->lost.lost;
-
-	return 0;
-}
-
-static int
-process_read_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_read_event(event_t *event)
 {
 	struct perf_event_attr *attr;
 
@@ -1319,238 +716,91 @@
 					   event->read.value);
 	}
 
-	dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->read.pid,
-			event->read.tid,
-			attr ? __event_name(attr->type, attr->config)
-			     : "FAIL",
-			event->read.value);
+	dump_printf(": %d %d %s %Lu\n", event->read.pid, event->read.tid,
+		    attr ? __event_name(attr->type, attr->config) : "FAIL",
+		    event->read.value);
 
 	return 0;
 }
 
-static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
+static int sample_type_check(u64 type)
 {
-	trace_event(event);
-
-	switch (event->header.type) {
-	case PERF_RECORD_SAMPLE:
-		return process_sample_event(event, offset, head);
-
-	case PERF_RECORD_MMAP:
-		return process_mmap_event(event, offset, head);
-
-	case PERF_RECORD_COMM:
-		return process_comm_event(event, offset, head);
-
-	case PERF_RECORD_FORK:
-	case PERF_RECORD_EXIT:
-		return process_task_event(event, offset, head);
-
-	case PERF_RECORD_LOST:
-		return process_lost_event(event, offset, head);
-
-	case PERF_RECORD_READ:
-		return process_read_event(event, offset, head);
-
-	/*
-	 * We dont process them right now but they are fine:
-	 */
-
-	case PERF_RECORD_THROTTLE:
-	case PERF_RECORD_UNTHROTTLE:
-		return 0;
-
-	default:
-		return -1;
-	}
-
-	return 0;
-}
-
-static int __cmd_report(void)
-{
-	int ret, rc = EXIT_FAILURE;
-	unsigned long offset = 0;
-	unsigned long head, shift;
-	struct stat input_stat;
-	struct thread *idle;
-	event_t *event;
-	uint32_t size;
-	char *buf;
-
-	idle = register_idle_thread(&threads, &last_match);
-	thread__comm_adjust(idle);
-
-	if (show_threads)
-		perf_read_values_init(&show_threads_values);
-
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		fprintf(stderr, " failed to open file: %s", input_name);
-		if (!strcmp(input_name, "perf.data"))
-			fprintf(stderr, "  (try 'perf record' first)");
-		fprintf(stderr, "\n");
-		exit(-1);
-	}
-
-	ret = fstat(input, &input_stat);
-	if (ret < 0) {
-		perror("failed to stat file");
-		exit(-1);
-	}
-
-	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
-		fprintf(stderr, "file: %s not owned by current user or root\n", input_name);
-		exit(-1);
-	}
-
-	if (!input_stat.st_size) {
-		fprintf(stderr, "zero-sized file, nothing to do!\n");
-		exit(0);
-	}
-
-	header = perf_header__read(input);
-	head = header->data_offset;
-
-	sample_type = perf_header__sample_type(header);
+	sample_type = type;
 
 	if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
 		if (sort__has_parent) {
 			fprintf(stderr, "selected --sort parent, but no"
 					" callchain data. Did you call"
 					" perf record without -g?\n");
-			exit(-1);
+			return -1;
 		}
 		if (callchain) {
 			fprintf(stderr, "selected -g but no callchain data."
 					" Did you call perf record without"
 					" -g?\n");
-			exit(-1);
+			return -1;
 		}
 	} else if (callchain_param.mode != CHAIN_NONE && !callchain) {
 			callchain = 1;
 			if (register_callchain_param(&callchain_param) < 0) {
 				fprintf(stderr, "Can't register callchain"
 						" params\n");
-				exit(-1);
+				return -1;
 			}
 	}
 
-	if (load_kernel() < 0) {
-		perror("failed to load kernel symbols");
-		return EXIT_FAILURE;
-	}
+	return 0;
+}
 
-	if (!full_paths) {
-		if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
-			perror("failed to get the current directory");
-			return EXIT_FAILURE;
-		}
-		cwdlen = strlen(cwd);
-	} else {
-		cwd = NULL;
-		cwdlen = 0;
-	}
+static struct perf_file_handler file_handler = {
+	.process_sample_event	= process_sample_event,
+	.process_mmap_event	= event__process_mmap,
+	.process_comm_event	= process_comm_event,
+	.process_exit_event	= event__process_task,
+	.process_fork_event	= event__process_task,
+	.process_lost_event	= event__process_lost,
+	.process_read_event	= process_read_event,
+	.sample_type_check	= sample_type_check,
+};
 
-	shift = page_size * (head / page_size);
-	offset += shift;
-	head -= shift;
 
-remap:
-	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-			   MAP_SHARED, input, offset);
-	if (buf == MAP_FAILED) {
-		perror("failed to mmap file");
-		exit(-1);
-	}
+static int __cmd_report(void)
+{
+	struct thread *idle;
+	int ret;
 
-more:
-	event = (event_t *)(buf + head);
+	idle = register_idle_thread();
+	thread__comm_adjust(idle);
 
-	size = event->header.size;
-	if (!size)
-		size = 8;
+	if (show_threads)
+		perf_read_values_init(&show_threads_values);
 
-	if (head + event->header.size >= page_size * mmap_window) {
-		int munmap_ret;
+	register_perf_file_handler(&file_handler);
 
-		shift = page_size * (head / page_size);
+	ret = mmap_dispatch_perf_file(&header, input_name, force,
+				      full_paths, &event__cwdlen, &event__cwd);
+	if (ret)
+		return ret;
 
-		munmap_ret = munmap(buf, page_size * mmap_window);
-		assert(munmap_ret == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-	size = event->header.size;
-
-	dump_printf("\n%p [%p]: event: %d\n",
-			(void *)(offset + head),
-			(void *)(long)event->header.size,
-			event->header.type);
-
-	if (!size || process_event(event, offset, head) < 0) {
-
-		dump_printf("%p [%p]: skipping unknown header type: %d\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->header.type);
-
-		total_unknown++;
-
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
-	}
-
-	head += size;
-
-	if (offset + head >= header->data_offset + header->data_size)
-		goto done;
-
-	if (offset + head < (unsigned long)input_stat.st_size)
-		goto more;
-
-done:
-	rc = EXIT_SUCCESS;
-	close(input);
-
-	dump_printf("      IP events: %10ld\n", total);
-	dump_printf("    mmap events: %10ld\n", total_mmap);
-	dump_printf("    comm events: %10ld\n", total_comm);
-	dump_printf("    fork events: %10ld\n", total_fork);
-	dump_printf("    lost events: %10ld\n", total_lost);
-	dump_printf(" unknown events: %10ld\n", total_unknown);
-
-	if (dump_trace)
+	if (dump_trace) {
+		event__print_totals();
 		return 0;
+	}
 
-	if (verbose >= 3)
-		threads__fprintf(stdout, &threads);
+	if (verbose > 3)
+		threads__fprintf(stdout);
 
-	if (verbose >= 2)
+	if (verbose > 2)
 		dsos__fprintf(stdout);
 
 	collapse__resort();
-	output__resort(total);
-	output__fprintf(stdout, total);
+	output__resort(event__stats.total);
+	output__fprintf(stdout, event__stats.total);
 
 	if (show_threads)
 		perf_read_values_destroy(&show_threads_values);
 
-	return rc;
+	return ret;
 }
 
 static int
@@ -1606,7 +856,8 @@
 	return 0;
 }
 
-static const char * const report_usage[] = {
+//static const char * const report_usage[] = {
+const char * const report_usage[] = {
 	"perf report [<options>] <command>",
 	NULL
 };
@@ -1618,9 +869,10 @@
 		    "be more verbose (show symbol address, etc)"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
-	OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
-	OPT_BOOLEAN('m', "modules", &modules,
+	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
 		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
 	OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
 		    "Show a column with the number of samples"),
@@ -1690,9 +942,8 @@
 
 int cmd_report(int argc, const char **argv, const char *prefix __used)
 {
-	symbol__init();
-
-	page_size = getpagesize();
+	if (symbol__init(&symbol_conf) < 0)
+		return -1;
 
 	argc = parse_options(argc, argv, options, report_usage, 0);
 
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index ce2d5be..26b782f 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -11,6 +11,7 @@
 #include "util/trace-event.h"
 
 #include "util/debug.h"
+#include "util/data_map.h"
 
 #include <sys/types.h>
 #include <sys/prctl.h>
@@ -20,14 +21,6 @@
 #include <math.h>
 
 static char			const *input_name = "perf.data";
-static int			input;
-static unsigned long		page_size;
-static unsigned long		mmap_window = 32;
-
-static unsigned long		total_comm = 0;
-
-static struct rb_root		threads;
-static struct thread		*last_match;
 
 static struct perf_header	*header;
 static u64			sample_type;
@@ -35,11 +28,11 @@
 static char			default_sort_order[] = "avg, max, switch, runtime";
 static char			*sort_order = default_sort_order;
 
+static int			profile_cpu = -1;
+
 #define PR_SET_NAME		15               /* Set process name */
 #define MAX_CPUS		4096
 
-#define BUG_ON(x)		assert(!(x))
-
 static u64			run_measurement_overhead;
 static u64			sleep_measurement_overhead;
 
@@ -74,6 +67,7 @@
 	SCHED_EVENT_RUN,
 	SCHED_EVENT_SLEEP,
 	SCHED_EVENT_WAKEUP,
+	SCHED_EVENT_MIGRATION,
 };
 
 struct sched_atom {
@@ -226,7 +220,7 @@
 static struct sched_atom *
 get_new_event(struct task_desc *task, u64 timestamp)
 {
-	struct sched_atom *event = calloc(1, sizeof(*event));
+	struct sched_atom *event = zalloc(sizeof(*event));
 	unsigned long idx = task->nr_events;
 	size_t size;
 
@@ -294,7 +288,7 @@
 		return;
 	}
 
-	wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem));
+	wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem));
 	sem_init(wakee_event->wait_sem, 0, 0);
 	wakee_event->specific_wait = 1;
 	event->wait_sem = wakee_event->wait_sem;
@@ -324,7 +318,7 @@
 	if (task)
 		return task;
 
-	task = calloc(1, sizeof(*task));
+	task = zalloc(sizeof(*task));
 	task->pid = pid;
 	task->nr = nr_tasks;
 	strcpy(task->comm, comm);
@@ -398,6 +392,8 @@
 				ret = sem_post(atom->wait_sem);
 			BUG_ON(ret);
 			break;
+		case SCHED_EVENT_MIGRATION:
+			break;
 		default:
 			BUG_ON(1);
 	}
@@ -632,29 +628,6 @@
 	printf("the sleep test took %Ld nsecs\n", T1-T0);
 }
 
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread;
-
-	thread = threads__findnew(event->comm.pid, &threads, &last_match);
-
-	dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->comm.comm, event->comm.pid);
-
-	if (thread == NULL ||
-	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing perf_event_comm, skipping event.\n");
-		return -1;
-	}
-	total_comm++;
-
-	return 0;
-}
-
-
 struct raw_event_sample {
 	u32 size;
 	char data[0];
@@ -745,6 +718,22 @@
 	u32 child_pid;
 };
 
+struct trace_migrate_task_event {
+	u32 size;
+
+	u16 common_type;
+	u8 common_flags;
+	u8 common_preempt_count;
+	u32 common_pid;
+	u32 common_tgid;
+
+	char comm[16];
+	u32 pid;
+
+	u32 prio;
+	u32 cpu;
+};
+
 struct trace_sched_handler {
 	void (*switch_event)(struct trace_switch_event *,
 			     struct event *,
@@ -769,6 +758,12 @@
 			   int cpu,
 			   u64 timestamp,
 			   struct thread *thread);
+
+	void (*migrate_task_event)(struct trace_migrate_task_event *,
+			   struct event *,
+			   int cpu,
+			   u64 timestamp,
+			   struct thread *thread);
 };
 
 
@@ -941,9 +936,7 @@
 
 static void thread_atoms_insert(struct thread *thread)
 {
-	struct work_atoms *atoms;
-
-	atoms = calloc(sizeof(*atoms), 1);
+	struct work_atoms *atoms = zalloc(sizeof(*atoms));
 	if (!atoms)
 		die("No memory");
 
@@ -975,9 +968,7 @@
 		    char run_state,
 		    u64 timestamp)
 {
-	struct work_atom *atom;
-
-	atom = calloc(sizeof(*atom), 1);
+	struct work_atom *atom = zalloc(sizeof(*atom));
 	if (!atom)
 		die("Non memory");
 
@@ -1058,8 +1049,8 @@
 		die("hm, delta: %Ld < 0 ?\n", delta);
 
 
-	sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
-	sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+	sched_out = threads__findnew(switch_event->prev_pid);
+	sched_in = threads__findnew(switch_event->next_pid);
 
 	out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
 	if (!out_events) {
@@ -1092,13 +1083,10 @@
 		     u64 timestamp,
 		     struct thread *this_thread __used)
 {
-	struct work_atoms *atoms;
-	struct thread *thread;
+	struct thread *thread = threads__findnew(runtime_event->pid);
+	struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
 
 	BUG_ON(cpu >= MAX_CPUS || cpu < 0);
-
-	thread = threads__findnew(runtime_event->pid, &threads, &last_match);
-	atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
 	if (!atoms) {
 		thread_atoms_insert(thread);
 		atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
@@ -1125,7 +1113,7 @@
 	if (!wakeup_event->success)
 		return;
 
-	wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
+	wakee = threads__findnew(wakeup_event->pid);
 	atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
 	if (!atoms) {
 		thread_atoms_insert(wakee);
@@ -1139,7 +1127,12 @@
 
 	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
 
-	if (atom->state != THREAD_SLEEPING)
+	/*
+	 * You WILL be missing events if you've recorded only
+	 * one CPU, or are only looking at only one, so don't
+	 * make useless noise.
+	 */
+	if (profile_cpu == -1 && atom->state != THREAD_SLEEPING)
 		nr_state_machine_bugs++;
 
 	nr_timestamps++;
@@ -1152,11 +1145,51 @@
 	atom->wake_up_time = timestamp;
 }
 
+static void
+latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
+		     struct event *__event __used,
+		     int cpu __used,
+		     u64 timestamp,
+		     struct thread *thread __used)
+{
+	struct work_atoms *atoms;
+	struct work_atom *atom;
+	struct thread *migrant;
+
+	/*
+	 * Only need to worry about migration when profiling one CPU.
+	 */
+	if (profile_cpu == -1)
+		return;
+
+	migrant = threads__findnew(migrate_task_event->pid);
+	atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
+	if (!atoms) {
+		thread_atoms_insert(migrant);
+		register_pid(migrant->pid, migrant->comm);
+		atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
+		if (!atoms)
+			die("migration-event: Internal tree error");
+		add_sched_out_event(atoms, 'R', timestamp);
+	}
+
+	BUG_ON(list_empty(&atoms->work_list));
+
+	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+	atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
+
+	nr_timestamps++;
+
+	if (atom->sched_out_time > timestamp)
+		nr_unordered_timestamps++;
+}
+
 static struct trace_sched_handler lat_ops  = {
 	.wakeup_event		= latency_wakeup_event,
 	.switch_event		= latency_switch_event,
 	.runtime_event		= latency_runtime_event,
 	.fork_event		= latency_fork_event,
+	.migrate_task_event	= latency_migrate_task_event,
 };
 
 static void output_lat_thread(struct work_atoms *work_list)
@@ -1385,8 +1418,8 @@
 		die("hm, delta: %Ld < 0 ?\n", delta);
 
 
-	sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
-	sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+	sched_out = threads__findnew(switch_event->prev_pid);
+	sched_in = threads__findnew(switch_event->next_pid);
 
 	curr_thread[this_cpu] = sched_in;
 
@@ -1517,6 +1550,26 @@
 }
 
 static void
+process_sched_migrate_task_event(struct raw_event_sample *raw,
+			   struct event *event,
+			   int cpu __used,
+			   u64 timestamp __used,
+			   struct thread *thread __used)
+{
+	struct trace_migrate_task_event migrate_task_event;
+
+	FILL_COMMON_FIELDS(migrate_task_event, event, raw->data);
+
+	FILL_ARRAY(migrate_task_event, comm, event, raw->data);
+	FILL_FIELD(migrate_task_event, pid, event, raw->data);
+	FILL_FIELD(migrate_task_event, prio, event, raw->data);
+	FILL_FIELD(migrate_task_event, cpu, event, raw->data);
+
+	if (trace_handler->migrate_task_event)
+		trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread);
+}
+
+static void
 process_raw_event(event_t *raw_event __used, void *more_data,
 		  int cpu, u64 timestamp, struct thread *thread)
 {
@@ -1539,23 +1592,23 @@
 		process_sched_fork_event(raw, event, cpu, timestamp, thread);
 	if (!strcmp(event->name, "sched_process_exit"))
 		process_sched_exit_event(event, cpu, timestamp, thread);
+	if (!strcmp(event->name, "sched_migrate_task"))
+		process_sched_migrate_task_event(raw, event, cpu, timestamp, thread);
 }
 
-static int
-process_sample_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_sample_event(event_t *event)
 {
-	char level;
-	int show = 0;
-	struct dso *dso = NULL;
 	struct thread *thread;
 	u64 ip = event->ip.ip;
 	u64 timestamp = -1;
 	u32 cpu = -1;
 	u64 period = 1;
 	void *more_data = event->ip.__more_data;
-	int cpumode;
 
-	thread = threads__findnew(event->ip.pid, &threads, &last_match);
+	if (!(sample_type & PERF_SAMPLE_RAW))
+		return 0;
+
+	thread = threads__findnew(event->ip.pid);
 
 	if (sample_type & PERF_SAMPLE_TIME) {
 		timestamp = *(u64 *)more_data;
@@ -1573,177 +1626,64 @@
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
+	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
 		event->header.misc,
 		event->ip.pid, event->ip.tid,
 		(void *)(long)ip,
 		(long long)period);
 
+	if (thread == NULL) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			 event->header.type);
+		return -1;
+	}
+
 	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-	if (thread == NULL) {
-		eprintf("problem processing %d event, skipping it.\n",
-			event->header.type);
-		return -1;
-	}
+	if (profile_cpu != -1 && profile_cpu != (int) cpu)
+		return 0;
 
-	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
-	if (cpumode == PERF_RECORD_MISC_KERNEL) {
-		show = SHOW_KERNEL;
-		level = 'k';
-
-		dso = kernel_dso;
-
-		dump_printf(" ...... dso: %s\n", dso->name);
-
-	} else if (cpumode == PERF_RECORD_MISC_USER) {
-
-		show = SHOW_USER;
-		level = '.';
-
-	} else {
-		show = SHOW_HV;
-		level = 'H';
-
-		dso = hypervisor_dso;
-
-		dump_printf(" ...... dso: [hypervisor]\n");
-	}
-
-	if (sample_type & PERF_SAMPLE_RAW)
-		process_raw_event(event, more_data, cpu, timestamp, thread);
+	process_raw_event(event, more_data, cpu, timestamp, thread);
 
 	return 0;
 }
 
-static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_lost_event(event_t *event __used)
 {
-	trace_event(event);
+	nr_lost_chunks++;
+	nr_lost_events += event->lost.lost;
 
-	nr_events++;
-	switch (event->header.type) {
-	case PERF_RECORD_MMAP:
-		return 0;
-	case PERF_RECORD_LOST:
-		nr_lost_chunks++;
-		nr_lost_events += event->lost.lost;
-		return 0;
+	return 0;
+}
 
-	case PERF_RECORD_COMM:
-		return process_comm_event(event, offset, head);
+static int sample_type_check(u64 type)
+{
+	sample_type = type;
 
-	case PERF_RECORD_EXIT ... PERF_RECORD_READ:
-		return 0;
-
-	case PERF_RECORD_SAMPLE:
-		return process_sample_event(event, offset, head);
-
-	case PERF_RECORD_MAX:
-	default:
+	if (!(sample_type & PERF_SAMPLE_RAW)) {
+		fprintf(stderr,
+			"No trace sample to read. Did you call perf record "
+			"without -R?");
 		return -1;
 	}
 
 	return 0;
 }
 
+static struct perf_file_handler file_handler = {
+	.process_sample_event	= process_sample_event,
+	.process_comm_event	= event__process_comm,
+	.process_lost_event	= process_lost_event,
+	.sample_type_check	= sample_type_check,
+};
+
 static int read_events(void)
 {
-	int ret, rc = EXIT_FAILURE;
-	unsigned long offset = 0;
-	unsigned long head = 0;
-	struct stat perf_stat;
-	event_t *event;
-	uint32_t size;
-	char *buf;
+	register_idle_thread();
+	register_perf_file_handler(&file_handler);
 
-	trace_report();
-	register_idle_thread(&threads, &last_match);
-
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		perror("failed to open file");
-		exit(-1);
-	}
-
-	ret = fstat(input, &perf_stat);
-	if (ret < 0) {
-		perror("failed to stat file");
-		exit(-1);
-	}
-
-	if (!perf_stat.st_size) {
-		fprintf(stderr, "zero-sized file, nothing to do!\n");
-		exit(0);
-	}
-	header = perf_header__read(input);
-	head = header->data_offset;
-	sample_type = perf_header__sample_type(header);
-
-	if (!(sample_type & PERF_SAMPLE_RAW))
-		die("No trace sample to read. Did you call perf record "
-		    "without -R?");
-
-	if (load_kernel() < 0) {
-		perror("failed to load kernel symbols");
-		return EXIT_FAILURE;
-	}
-
-remap:
-	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-			   MAP_SHARED, input, offset);
-	if (buf == MAP_FAILED) {
-		perror("failed to mmap file");
-		exit(-1);
-	}
-
-more:
-	event = (event_t *)(buf + head);
-
-	size = event->header.size;
-	if (!size)
-		size = 8;
-
-	if (head + event->header.size >= page_size * mmap_window) {
-		unsigned long shift = page_size * (head / page_size);
-		int res;
-
-		res = munmap(buf, page_size * mmap_window);
-		assert(res == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-	size = event->header.size;
-
-
-	if (!size || process_event(event, offset, head) < 0) {
-
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
-	}
-
-	head += size;
-
-	if (offset + head < (unsigned long)perf_stat.st_size)
-		goto more;
-
-	rc = EXIT_SUCCESS;
-	close(input);
-
-	return rc;
+	return mmap_dispatch_perf_file(&header, input_name, 0, 0,
+				       &event__cwdlen, &event__cwd);
 }
 
 static void print_bad_events(void)
@@ -1883,6 +1823,8 @@
 		   "sort by key(s): runtime, switch, avg, max"),
 	OPT_BOOLEAN('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_INTEGER('C', "CPU", &profile_cpu,
+		    "CPU to profile on"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_END()
@@ -1960,8 +1902,7 @@
 
 int cmd_sched(int argc, const char **argv, const char *prefix __used)
 {
-	symbol__init();
-	page_size = getpagesize();
+	symbol__init(0);
 
 	argc = parse_options(argc, argv, sched_options, sched_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3db31e7..c70d720 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -50,15 +50,17 @@
 
 static struct perf_event_attr default_attrs[] = {
 
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK	},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS	},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
 
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES		},
 
 };
 
@@ -125,6 +127,7 @@
 struct stats			runtime_nsecs_stats;
 struct stats			walltime_nsecs_stats;
 struct stats			runtime_cycles_stats;
+struct stats			runtime_branches_stats;
 
 #define MATCH_EVENT(t, c, counter)			\
 	(attrs[counter].type == PERF_TYPE_##t &&	\
@@ -235,6 +238,8 @@
 		update_stats(&runtime_nsecs_stats, count[0]);
 	if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
 		update_stats(&runtime_cycles_stats, count[0]);
+	if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
+		update_stats(&runtime_branches_stats, count[0]);
 }
 
 static int run_perf_stat(int argc __used, const char **argv)
@@ -352,7 +357,16 @@
 			ratio = avg / total;
 
 		fprintf(stderr, " # %10.3f IPC  ", ratio);
-	} else {
+	} else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
+			runtime_branches_stats.n != 0) {
+		total = avg_stats(&runtime_branches_stats);
+
+		if (total)
+			ratio = avg * 100 / total;
+
+		fprintf(stderr, " # %10.3f %%    ", ratio);
+
+	} else if (runtime_nsecs_stats.n != 0) {
 		total = avg_stats(&runtime_nsecs_stats);
 
 		if (total)
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index e8a510d9..cb58b66 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -29,14 +29,14 @@
 #include "util/header.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/event.h"
+#include "util/data_map.h"
 #include "util/svghelper.h"
 
 static char		const *input_name = "perf.data";
 static char		const *output_name = "output.svg";
 
 
-static unsigned long	page_size;
-static unsigned long	mmap_window = 32;
 static u64		sample_type;
 
 static unsigned int	numcpus;
@@ -49,8 +49,6 @@
 static int		power_only;
 
 
-static struct perf_header	*header;
-
 struct per_pid;
 struct per_pidcomm;
 
@@ -153,6 +151,17 @@
 
 struct sample_wrapper *all_samples;
 
+
+struct process_filter;
+struct process_filter {
+	char			*name;
+	int			pid;
+	struct process_filter	*next;
+};
+
+static struct process_filter *process_filter;
+
+
 static struct per_pid *find_create_pid(int pid)
 {
 	struct per_pid *cursor = all_data;
@@ -763,11 +772,11 @@
 				c = p->all;
 				while (c) {
 					if (c->Y && c->start_time <= we->time && c->end_time >= we->time) {
-						if (p->pid == we->waker) {
+						if (p->pid == we->waker && !from) {
 							from = c->Y;
 							task_from = strdup(c->comm);
 						}
-						if (p->pid == we->wakee) {
+						if (p->pid == we->wakee && !to) {
 							to = c->Y;
 							task_to = strdup(c->comm);
 						}
@@ -882,12 +891,89 @@
 	}
 }
 
+static void add_process_filter(const char *string)
+{
+	struct process_filter *filt;
+	int pid;
+
+	pid = strtoull(string, NULL, 10);
+	filt = malloc(sizeof(struct process_filter));
+	if (!filt)
+		return;
+
+	filt->name = strdup(string);
+	filt->pid  = pid;
+	filt->next = process_filter;
+
+	process_filter = filt;
+}
+
+static int passes_filter(struct per_pid *p, struct per_pidcomm *c)
+{
+	struct process_filter *filt;
+	if (!process_filter)
+		return 1;
+
+	filt = process_filter;
+	while (filt) {
+		if (filt->pid && p->pid == filt->pid)
+			return 1;
+		if (strcmp(filt->name, c->comm) == 0)
+			return 1;
+		filt = filt->next;
+	}
+	return 0;
+}
+
+static int determine_display_tasks_filtered(void)
+{
+	struct per_pid *p;
+	struct per_pidcomm *c;
+	int count = 0;
+
+	p = all_data;
+	while (p) {
+		p->display = 0;
+		if (p->start_time == 1)
+			p->start_time = first_time;
+
+		/* no exit marker, task kept running to the end */
+		if (p->end_time == 0)
+			p->end_time = last_time;
+
+		c = p->all;
+
+		while (c) {
+			c->display = 0;
+
+			if (c->start_time == 1)
+				c->start_time = first_time;
+
+			if (passes_filter(p, c)) {
+				c->display = 1;
+				p->display = 1;
+				count++;
+			}
+
+			if (c->end_time == 0)
+				c->end_time = last_time;
+
+			c = c->next;
+		}
+		p = p->next;
+	}
+	return count;
+}
+
 static int determine_display_tasks(u64 threshold)
 {
 	struct per_pid *p;
 	struct per_pidcomm *c;
 	int count = 0;
 
+	if (process_filter)
+		return determine_display_tasks_filtered();
+
 	p = all_data;
 	while (p) {
 		p->display = 0;
@@ -957,36 +1043,6 @@
 	svg_close();
 }
 
-static int
-process_event(event_t *event)
-{
-
-	switch (event->header.type) {
-
-	case PERF_RECORD_COMM:
-		return process_comm_event(event);
-	case PERF_RECORD_FORK:
-		return process_fork_event(event);
-	case PERF_RECORD_EXIT:
-		return process_exit_event(event);
-	case PERF_RECORD_SAMPLE:
-		return queue_sample_event(event);
-
-	/*
-	 * We dont process them right now but they are fine:
-	 */
-	case PERF_RECORD_MMAP:
-	case PERF_RECORD_THROTTLE:
-	case PERF_RECORD_UNTHROTTLE:
-		return 0;
-
-	default:
-		return -1;
-	}
-
-	return 0;
-}
-
 static void process_samples(void)
 {
 	struct sample_wrapper *cursor;
@@ -1002,107 +1058,38 @@
 	}
 }
 
+static int sample_type_check(u64 type)
+{
+	sample_type = type;
+
+	if (!(sample_type & PERF_SAMPLE_RAW)) {
+		fprintf(stderr, "No trace samples found in the file.\n"
+				"Have you used 'perf timechart record' to record it?\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static struct perf_file_handler file_handler = {
+	.process_comm_event	= process_comm_event,
+	.process_fork_event	= process_fork_event,
+	.process_exit_event	= process_exit_event,
+	.process_sample_event	= queue_sample_event,
+	.sample_type_check	= sample_type_check,
+};
 
 static int __cmd_timechart(void)
 {
-	int ret, rc = EXIT_FAILURE;
-	unsigned long offset = 0;
-	unsigned long head, shift;
-	struct stat statbuf;
-	event_t *event;
-	uint32_t size;
-	char *buf;
-	int input;
+	struct perf_header *header;
+	int ret;
 
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		fprintf(stderr, " failed to open file: %s", input_name);
-		if (!strcmp(input_name, "perf.data"))
-			fprintf(stderr, "  (try 'perf record' first)");
-		fprintf(stderr, "\n");
-		exit(-1);
-	}
+	register_perf_file_handler(&file_handler);
 
-	ret = fstat(input, &statbuf);
-	if (ret < 0) {
-		perror("failed to stat file");
-		exit(-1);
-	}
-
-	if (!statbuf.st_size) {
-		fprintf(stderr, "zero-sized file, nothing to do!\n");
-		exit(0);
-	}
-
-	header = perf_header__read(input);
-	head = header->data_offset;
-
-	sample_type = perf_header__sample_type(header);
-
-	shift = page_size * (head / page_size);
-	offset += shift;
-	head -= shift;
-
-remap:
-	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-			   MAP_SHARED, input, offset);
-	if (buf == MAP_FAILED) {
-		perror("failed to mmap file");
-		exit(-1);
-	}
-
-more:
-	event = (event_t *)(buf + head);
-
-	size = event->header.size;
-	if (!size)
-		size = 8;
-
-	if (head + event->header.size >= page_size * mmap_window) {
-		int ret2;
-
-		shift = page_size * (head / page_size);
-
-		ret2 = munmap(buf, page_size * mmap_window);
-		assert(ret2 == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-	size = event->header.size;
-
-	if (!size || process_event(event) < 0) {
-
-		printf("%p [%p]: skipping unknown header type: %d\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->header.type);
-
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
-	}
-
-	head += size;
-
-	if (offset + head >= header->data_offset + header->data_size)
-		goto done;
-
-	if (offset + head < (unsigned long)statbuf.st_size)
-		goto more;
-
-done:
-	rc = EXIT_SUCCESS;
-	close(input);
-
+	ret = mmap_dispatch_perf_file(&header, input_name, 0, 0,
+				      &event__cwdlen, &event__cwd);
+	if (ret)
+		return EXIT_FAILURE;
 
 	process_samples();
 
@@ -1112,9 +1099,10 @@
 
 	write_svg_file(output_name);
 
-	printf("Written %2.1f seconds of trace to %s.\n", (last_time - first_time) / 1000000000.0, output_name);
+	pr_info("Written %2.1f seconds of trace to %s.\n",
+		(last_time - first_time) / 1000000000.0, output_name);
 
-	return rc;
+	return EXIT_SUCCESS;
 }
 
 static const char * const timechart_usage[] = {
@@ -1153,6 +1141,14 @@
 	return cmd_record(i, rec_argv, NULL);
 }
 
+static int
+parse_process(const struct option *opt __used, const char *arg, int __used unset)
+{
+	if (arg)
+		add_process_filter(arg);
+	return 0;
+}
+
 static const struct option options[] = {
 	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
@@ -1160,17 +1156,18 @@
 		    "output file name"),
 	OPT_INTEGER('w', "width", &svg_page_width,
 		    "page width"),
-	OPT_BOOLEAN('p', "power-only", &power_only,
+	OPT_BOOLEAN('P', "power-only", &power_only,
 		    "output power data only"),
+	OPT_CALLBACK('p', "process", NULL, "process",
+		      "process selector. Pass a pid or process name.",
+		       parse_process),
 	OPT_END()
 };
 
 
 int cmd_timechart(int argc, const char **argv, const char *prefix __used)
 {
-	symbol__init();
-
-	page_size = getpagesize();
+	symbol__init(0);
 
 	argc = parse_options(argc, argv, options, timechart_usage,
 			PARSE_OPT_STOP_AT_NON_OPTION);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e23bc74..e0a374d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -22,6 +22,7 @@
 
 #include "util/symbol.h"
 #include "util/color.h"
+#include "util/thread.h"
 #include "util/util.h"
 #include <linux/rbtree.h>
 #include "util/parse-options.h"
@@ -54,26 +55,31 @@
 
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
-static int			system_wide			=  0;
+static int			system_wide			=      0;
 
-static int			default_interval		= 100000;
+static int			default_interval		=      0;
 
-static int			count_filter			=  5;
-static int			print_entries			= 15;
+static int			count_filter			=      5;
+static int			print_entries;
 
-static int			target_pid			= -1;
-static int			inherit				=  0;
-static int			profile_cpu			= -1;
-static int			nr_cpus				=  0;
-static unsigned int		realtime_prio			=  0;
-static int			group				=  0;
+static int			target_pid			=     -1;
+static int			inherit				=      0;
+static int			profile_cpu			=     -1;
+static int			nr_cpus				=      0;
+static unsigned int		realtime_prio			=      0;
+static int			group				=      0;
 static unsigned int		page_size;
-static unsigned int		mmap_pages			= 16;
-static int			freq				=  0;
+static unsigned int		mmap_pages			=     16;
+static int			freq				=   1000; /* 1 KHz */
 
-static int			delay_secs			=  2;
-static int			zero;
-static int			dump_symtab;
+static int			delay_secs			=      2;
+static int			zero                            =      0;
+static int			dump_symtab                     =      0;
+
+static bool			hide_kernel_symbols		=  false;
+static bool			hide_user_symbols		=  false;
+static struct winsize		winsize;
+struct symbol_conf		symbol_conf;
 
 /*
  * Source
@@ -86,83 +92,126 @@
 	struct source_line	*next;
 };
 
-static char			*sym_filter			=  NULL;
-struct sym_entry		*sym_filter_entry		=  NULL;
-static int			sym_pcnt_filter			=  5;
-static int			sym_counter			=  0;
-static int			display_weighted		= -1;
+static char			*sym_filter			=   NULL;
+struct sym_entry		*sym_filter_entry		=   NULL;
+static int			sym_pcnt_filter			=      5;
+static int			sym_counter			=      0;
+static int			display_weighted		=     -1;
 
 /*
  * Symbols
  */
 
-static u64			min_ip;
-static u64			max_ip = -1ll;
+struct sym_entry_source {
+	struct source_line	*source;
+	struct source_line	*lines;
+	struct source_line	**lines_tail;
+	pthread_mutex_t		lock;
+};
 
 struct sym_entry {
 	struct rb_node		rb_node;
 	struct list_head	node;
-	unsigned long		count[MAX_COUNTERS];
 	unsigned long		snap_count;
 	double			weight;
 	int			skip;
-	struct source_line	*source;
-	struct source_line	*lines;
-	struct source_line	**lines_tail;
-	pthread_mutex_t		source_lock;
+	u16			name_len;
+	u8			origin;
+	struct map		*map;
+	struct sym_entry_source	*src;
+	unsigned long		count[0];
 };
 
 /*
  * Source functions
  */
 
+static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
+{
+       return ((void *)self) + symbol_conf.priv_size;
+}
+
+static void get_term_dimensions(struct winsize *ws)
+{
+	char *s = getenv("LINES");
+
+	if (s != NULL) {
+		ws->ws_row = atoi(s);
+		s = getenv("COLUMNS");
+		if (s != NULL) {
+			ws->ws_col = atoi(s);
+			if (ws->ws_row && ws->ws_col)
+				return;
+		}
+	}
+#ifdef TIOCGWINSZ
+	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+	    ws->ws_row && ws->ws_col)
+		return;
+#endif
+	ws->ws_row = 25;
+	ws->ws_col = 80;
+}
+
+static void update_print_entries(struct winsize *ws)
+{
+	print_entries = ws->ws_row;
+
+	if (print_entries > 9)
+		print_entries -= 9;
+}
+
+static void sig_winch_handler(int sig __used)
+{
+	get_term_dimensions(&winsize);
+	update_print_entries(&winsize);
+}
+
 static void parse_source(struct sym_entry *syme)
 {
 	struct symbol *sym;
-	struct module *module;
-	struct section *section = NULL;
+	struct sym_entry_source *source;
+	struct map *map;
 	FILE *file;
 	char command[PATH_MAX*2];
-	const char *path = vmlinux_name;
-	u64 start, end, len;
+	const char *path;
+	u64 len;
 
 	if (!syme)
 		return;
 
-	if (syme->lines) {
-		pthread_mutex_lock(&syme->source_lock);
+	if (syme->src == NULL) {
+		syme->src = zalloc(sizeof(*source));
+		if (syme->src == NULL)
+			return;
+		pthread_mutex_init(&syme->src->lock, NULL);
+	}
+
+	source = syme->src;
+
+	if (source->lines) {
+		pthread_mutex_lock(&source->lock);
 		goto out_assign;
 	}
 
-	sym = (struct symbol *)(syme + 1);
-	module = sym->module;
+	sym = sym_entry__symbol(syme);
+	map = syme->map;
+	path = map->dso->long_name;
 
-	if (module)
-		path = module->path;
-	if (!path)
-		return;
-
-	start = sym->obj_start;
-	if (!start)
-		start = sym->start;
-
-	if (module) {
-		section = module->sections->find_section(module->sections, ".text");
-		if (section)
-			start -= section->vma;
-	}
-
-	end = start + sym->end - sym->start + 1;
 	len = sym->end - sym->start;
 
-	sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
+	sprintf(command,
+		"objdump --start-address=0x%016Lx "
+			 "--stop-address=0x%016Lx -dS %s",
+		map->unmap_ip(map, sym->start),
+		map->unmap_ip(map, sym->end), path);
 
 	file = popen(command, "r");
 	if (!file)
 		return;
 
-	pthread_mutex_lock(&syme->source_lock);
-	syme->lines_tail = &syme->lines;
+	pthread_mutex_lock(&source->lock);
+	source->lines_tail = &source->lines;
 	while (!feof(file)) {
 		struct source_line *src;
 		size_t dummy = 0;
@@ -182,24 +231,22 @@
 			*c = 0;
 
 		src->next = NULL;
-		*syme->lines_tail = src;
-		syme->lines_tail = &src->next;
+		*source->lines_tail = src;
+		source->lines_tail = &src->next;
 
 		if (strlen(src->line)>8 && src->line[8] == ':') {
 			src->eip = strtoull(src->line, NULL, 16);
-			if (section)
-				src->eip += section->vma;
+			src->eip = map->unmap_ip(map, src->eip);
 		}
 		if (strlen(src->line)>8 && src->line[16] == ':') {
 			src->eip = strtoull(src->line, NULL, 16);
-			if (section)
-				src->eip += section->vma;
+			src->eip = map->unmap_ip(map, src->eip);
 		}
 	}
 	pclose(file);
 out_assign:
 	sym_filter_entry = syme;
-	pthread_mutex_unlock(&syme->source_lock);
+	pthread_mutex_unlock(&source->lock);
 }
 
 static void __zero_source_counters(struct sym_entry *syme)
@@ -207,7 +254,7 @@
 	int i;
 	struct source_line *line;
 
-	line = syme->lines;
+	line = syme->src->lines;
 	while (line) {
 		for (i = 0; i < nr_counters; i++)
 			line->count[i] = 0;
@@ -222,13 +269,13 @@
 	if (syme != sym_filter_entry)
 		return;
 
-	if (pthread_mutex_trylock(&syme->source_lock))
+	if (pthread_mutex_trylock(&syme->src->lock))
 		return;
 
-	if (!syme->source)
+	if (syme->src == NULL || syme->src->source == NULL)
 		goto out_unlock;
 
-	for (line = syme->lines; line; line = line->next) {
+	for (line = syme->src->lines; line; line = line->next) {
 		if (line->eip == ip) {
 			line->count[counter]++;
 			break;
@@ -237,32 +284,25 @@
 			break;
 	}
 out_unlock:
-	pthread_mutex_unlock(&syme->source_lock);
+	pthread_mutex_unlock(&syme->src->lock);
 }
 
 static void lookup_sym_source(struct sym_entry *syme)
 {
-	struct symbol *symbol = (struct symbol *)(syme + 1);
+	struct symbol *symbol = sym_entry__symbol(syme);
 	struct source_line *line;
 	char pattern[PATH_MAX];
-	char *idx;
 
 	sprintf(pattern, "<%s>:", symbol->name);
 
-	if (symbol->module) {
-		idx = strstr(pattern, "\t");
-		if (idx)
-			*idx = 0;
-	}
-
-	pthread_mutex_lock(&syme->source_lock);
-	for (line = syme->lines; line; line = line->next) {
+	pthread_mutex_lock(&syme->src->lock);
+	for (line = syme->src->lines; line; line = line->next) {
 		if (strstr(line->line, pattern)) {
-			syme->source = line;
+			syme->src->source = line;
 			break;
 		}
 	}
-	pthread_mutex_unlock(&syme->source_lock);
+	pthread_mutex_unlock(&syme->src->lock);
 }
 
 static void show_lines(struct source_line *queue, int count, int total)
@@ -292,24 +332,24 @@
 	if (!syme)
 		return;
 
-	if (!syme->source)
+	if (!syme->src->source)
 		lookup_sym_source(syme);
 
-	if (!syme->source)
+	if (!syme->src->source)
 		return;
 
-	symbol = (struct symbol *)(syme + 1);
+	symbol = sym_entry__symbol(syme);
 	printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
 	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);
 
-	pthread_mutex_lock(&syme->source_lock);
-	line = syme->source;
+	pthread_mutex_lock(&syme->src->lock);
+	line = syme->src->source;
 	while (line) {
 		total += line->count[sym_counter];
 		line = line->next;
 	}
 
-	line = syme->source;
+	line = syme->src->source;
 	while (line) {
 		float pcnt = 0.0;
 
@@ -334,13 +374,13 @@
 		line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
 		line = line->next;
 	}
-	pthread_mutex_unlock(&syme->source_lock);
+	pthread_mutex_unlock(&syme->src->lock);
 	if (more)
 		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
 }
 
 /*
- * Symbols will be added here in record_ip and will get out
+ * Symbols will be added here in event__process_sample and will get out
  * after decayed.
  */
 static LIST_HEAD(active_symbols);
@@ -411,6 +451,8 @@
 	struct sym_entry *syme, *n;
 	struct rb_root tmp = RB_ROOT;
 	struct rb_node *nd;
+	int sym_width = 0, dso_width = 0, max_dso_width;
+	const int win_width = winsize.ws_col - 1;
 
 	samples = userspace_samples = 0;
 
@@ -422,6 +464,14 @@
 	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
 		syme->snap_count = syme->count[snap];
 		if (syme->snap_count != 0) {
+
+			if ((hide_user_symbols &&
+			     syme->origin == PERF_RECORD_MISC_USER) ||
+			    (hide_kernel_symbols &&
+			     syme->origin == PERF_RECORD_MISC_KERNEL)) {
+				list_remove_active_sym(syme);
+				continue;
+			}
 			syme->weight = sym_weight(syme);
 			rb_insert_active_sym(&tmp, syme);
 			sum_ksamples += syme->snap_count;
@@ -434,8 +484,7 @@
 
 	puts(CONSOLE_CLEAR);
 
-	printf(
-"------------------------------------------------------------------------------\n");
+	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
 	printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
 		samples_per_sec,
 		100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
@@ -473,33 +522,57 @@
 			printf(", %d CPUs)\n", nr_cpus);
 	}
 
-	printf("------------------------------------------------------------------------------\n\n");
+	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
 
 	if (sym_filter_entry) {
 		show_details(sym_filter_entry);
 		return;
 	}
 
+	/*
+	 * Find the longest symbol name that will be displayed
+	 */
+	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
+		syme = rb_entry(nd, struct sym_entry, rb_node);
+		if (++printed > print_entries ||
+		    (int)syme->snap_count < count_filter)
+			continue;
+
+		if (syme->map->dso->long_name_len > dso_width)
+			dso_width = syme->map->dso->long_name_len;
+
+		if (syme->name_len > sym_width)
+			sym_width = syme->name_len;
+	}
+
+	printed = 0;
+
+	max_dso_width = winsize.ws_col - sym_width - 29;
+	if (dso_width > max_dso_width)
+		dso_width = max_dso_width;
+	putchar('\n');
 	if (nr_counters == 1)
-		printf("             samples    pcnt");
+		printf("             samples  pcnt");
 	else
-		printf("   weight    samples    pcnt");
+		printf("   weight    samples  pcnt");
 
 	if (verbose)
 		printf("         RIP       ");
-	printf("   kernel function\n");
-	printf("   %s    _______   _____",
+	printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
+	printf("   %s    _______ _____",
 	       nr_counters == 1 ? "      " : "______");
 	if (verbose)
-		printf("   ________________");
-	printf("   _______________\n\n");
+		printf(" ________________");
+	printf(" %-*.*s", sym_width, sym_width, graph_line);
+	printf(" %-*.*s", dso_width, dso_width, graph_line);
+	puts("\n");
 
 	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
 		struct symbol *sym;
 		double pcnt;
 
 		syme = rb_entry(nd, struct sym_entry, rb_node);
-		sym = (struct symbol *)(syme + 1);
+		sym = sym_entry__symbol(syme);
 
 		if (++printed > print_entries || (int)syme->snap_count < count_filter)
 			continue;
@@ -508,17 +581,18 @@
 					 sum_ksamples));
 
 		if (nr_counters == 1 || !display_weighted)
-			printf("%20.2f - ", syme->weight);
+			printf("%20.2f ", syme->weight);
 		else
-			printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
+			printf("%9.1f %10ld ", syme->weight, syme->snap_count);
 
 		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
 		if (verbose)
-			printf(" - %016llx", sym->start);
-		printf(" : %s", sym->name);
-		if (sym->module)
-			printf("\t[%s]", sym->module->name);
-		printf("\n");
+			printf(" %016llx", sym->start);
+		printf(" %-*.*s", sym_width, sym_width, sym->name);
+		printf(" %-*.*s\n", dso_width, dso_width,
+		       dso_width >= syme->map->dso->long_name_len ?
+					syme->map->dso->long_name :
+					syme->map->dso->short_name);
 	}
 }
 
@@ -565,10 +639,10 @@
 
 	/* zero counters of active symbol */
 	if (syme) {
-		pthread_mutex_lock(&syme->source_lock);
+		pthread_mutex_lock(&syme->src->lock);
 		__zero_source_counters(syme);
 		*target = NULL;
-		pthread_mutex_unlock(&syme->source_lock);
+		pthread_mutex_unlock(&syme->src->lock);
 	}
 
 	fprintf(stdout, "\n%s: ", msg);
@@ -584,7 +658,7 @@
 	pthread_mutex_unlock(&active_symbols_lock);
 
 	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
-		struct symbol *sym = (struct symbol *)(syme + 1);
+		struct symbol *sym = sym_entry__symbol(syme);
 
 		if (!strcmp(buf, sym->name)) {
 			found = syme;
@@ -608,7 +682,7 @@
 	char *name = NULL;
 
 	if (sym_filter_entry) {
-		struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
+		struct symbol *sym = sym_entry__symbol(sym_filter_entry);
 		name = sym->name;
 	}
 
@@ -621,7 +695,7 @@
 
 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);
 
-	if (vmlinux_name) {
+	if (symbol_conf.vmlinux_name) {
 		fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
 		fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
 		fprintf(stdout, "\t[S]     stop annotation.\n");
@@ -630,6 +704,12 @@
 	if (nr_counters > 1)
 		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
 
+	fprintf(stdout,
+		"\t[K]     hide kernel_symbols symbols.             \t(%s)\n",
+		hide_kernel_symbols ? "yes" : "no");
+	fprintf(stdout,
+		"\t[U]     hide user symbols.               \t(%s)\n",
+		hide_user_symbols ? "yes" : "no");
 	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", zero ? 1 : 0);
 	fprintf(stdout, "\t[qQ]    quit.\n");
 }
@@ -643,6 +723,8 @@
 		case 'z':
 		case 'q':
 		case 'Q':
+		case 'K':
+		case 'U':
 			return 1;
 		case 'E':
 		case 'w':
@@ -650,7 +732,7 @@
 		case 'F':
 		case 's':
 		case 'S':
-			return vmlinux_name ? 1 : 0;
+			return symbol_conf.vmlinux_name ? 1 : 0;
 		default:
 			break;
 	}
@@ -691,6 +773,11 @@
 			break;
 		case 'e':
 			prompt_integer(&print_entries, "Enter display entries (lines)");
+			if (print_entries == 0) {
+				sig_winch_handler(SIGWINCH);
+				signal(SIGWINCH, sig_winch_handler);
+			} else
+				signal(SIGWINCH, SIG_DFL);
 			break;
 		case 'E':
 			if (nr_counters > 1) {
@@ -715,9 +802,14 @@
 		case 'F':
 			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
 			break;
+		case 'K':
+			hide_kernel_symbols = !hide_kernel_symbols;
+			break;
 		case 'q':
 		case 'Q':
 			printf("exiting.\n");
+			if (dump_symtab)
+				dsos__fprintf(stderr);
 			exit(0);
 		case 's':
 			prompt_symbol(&sym_filter_entry, "Enter details symbol");
@@ -728,12 +820,15 @@
 			else {
 				struct sym_entry *syme = sym_filter_entry;
 
-				pthread_mutex_lock(&syme->source_lock);
+				pthread_mutex_lock(&syme->src->lock);
 				sym_filter_entry = NULL;
 				__zero_source_counters(syme);
-				pthread_mutex_unlock(&syme->source_lock);
+				pthread_mutex_unlock(&syme->src->lock);
 			}
 			break;
+		case 'U':
+			hide_user_symbols = !hide_user_symbols;
+			break;
 		case 'w':
 			display_weighted = ~display_weighted;
 			break;
@@ -790,7 +885,7 @@
 	NULL
 };
 
-static int symbol_filter(struct dso *self, struct symbol *sym)
+static int symbol_filter(struct map *map, struct symbol *sym)
 {
 	struct sym_entry *syme;
 	const char *name = sym->name;
@@ -812,8 +907,9 @@
 	    strstr(name, "_text_end"))
 		return 1;
 
-	syme = dso__sym_priv(self, sym);
-	pthread_mutex_init(&syme->source_lock, NULL);
+	syme = symbol__priv(sym);
+	syme->map = map;
+	syme->src = NULL;
 	if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
 		sym_filter_entry = syme;
 
@@ -824,75 +920,65 @@
 		}
 	}
 
+	if (!syme->skip)
+		syme->name_len = strlen(sym->name);
+
 	return 0;
 }
 
-static int parse_symbols(void)
+static void event__process_sample(const event_t *self, int counter)
 {
-	struct rb_node *node;
-	struct symbol  *sym;
-	int use_modules = vmlinux_name ? 1 : 0;
+	u64 ip = self->ip.ip;
+	struct sym_entry *syme;
+	struct addr_location al;
+	u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry));
-	if (kernel_dso == NULL)
-		return -1;
-
-	if (dso__load_kernel(kernel_dso, vmlinux_name, symbol_filter, verbose, use_modules) <= 0)
-		goto out_delete_dso;
-
-	node = rb_first(&kernel_dso->syms);
-	sym = rb_entry(node, struct symbol, rb_node);
-	min_ip = sym->start;
-
-	node = rb_last(&kernel_dso->syms);
-	sym = rb_entry(node, struct symbol, rb_node);
-	max_ip = sym->end;
-
-	if (dump_symtab)
-		dso__fprintf(kernel_dso, stderr);
-
-	return 0;
-
-out_delete_dso:
-	dso__delete(kernel_dso);
-	kernel_dso = NULL;
-	return -1;
-}
-
-/*
- * Binary search in the histogram table and record the hit:
- */
-static void record_ip(u64 ip, int counter)
-{
-	struct symbol *sym = dso__find_symbol(kernel_dso, ip);
-
-	if (sym != NULL) {
-		struct sym_entry *syme = dso__sym_priv(kernel_dso, sym);
-
-		if (!syme->skip) {
-			syme->count[counter]++;
-			record_precise_ip(syme, counter, ip);
-			pthread_mutex_lock(&active_symbols_lock);
-			if (list_empty(&syme->node) || !syme->node.next)
-				__list_insert_active_sym(syme);
-			pthread_mutex_unlock(&active_symbols_lock);
+	switch (origin) {
+	case PERF_RECORD_MISC_USER:
+		if (hide_user_symbols)
 			return;
-		}
-	}
-
-	samples--;
-}
-
-static void process_event(u64 ip, int counter, int user)
-{
-	samples++;
-
-	if (user) {
-		userspace_samples++;
+		break;
+	case PERF_RECORD_MISC_KERNEL:
+		if (hide_kernel_symbols)
+			return;
+		break;
+	default:
 		return;
 	}
 
-	record_ip(ip, counter);
+	if (event__preprocess_sample(self, &al, symbol_filter) < 0 ||
+	    al.sym == NULL)
+		return;
+
+	syme = symbol__priv(al.sym);
+	if (!syme->skip) {
+		syme->count[counter]++;
+		syme->origin = origin;
+		record_precise_ip(syme, counter, ip);
+		pthread_mutex_lock(&active_symbols_lock);
+		if (list_empty(&syme->node) || !syme->node.next)
+			__list_insert_active_sym(syme);
+		pthread_mutex_unlock(&active_symbols_lock);
+		if (origin == PERF_RECORD_MISC_USER)
+			++userspace_samples;
+		++samples;
+	}
+}
+
+static int event__process(event_t *event)
+{
+	switch (event->header.type) {
+	case PERF_RECORD_COMM:
+		event__process_comm(event);
+		break;
+	case PERF_RECORD_MMAP:
+		event__process_mmap(event);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
 }
 
 struct mmap_data {
@@ -913,8 +999,6 @@
 	return head;
 }
 
-struct timeval last_read, this_read;
-
 static void mmap_read_counter(struct mmap_data *md)
 {
 	unsigned int head = mmap_read_head(md);
@@ -922,8 +1006,6 @@
 	unsigned char *data = md->base + page_size;
 	int diff;
 
-	gettimeofday(&this_read, NULL);
-
 	/*
 	 * If we're further behind than half the buffer, there's a chance
 	 * the writer will bite our tail and mess up the samples under us.
@@ -934,14 +1016,7 @@
 	 */
 	diff = head - old;
 	if (diff > md->mask / 2 || diff < 0) {
-		struct timeval iv;
-		unsigned long msecs;
-
-		timersub(&this_read, &last_read, &iv);
-		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
-
-		fprintf(stderr, "WARNING: failed to keep up with mmap data."
-				"  Last read %lu msecs ago.\n", msecs);
+		fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
 
 		/*
 		 * head points to a known good entry, start there.
@@ -949,8 +1024,6 @@
 		old = head;
 	}
 
-	last_read = this_read;
-
 	for (; old != head;) {
 		event_t *event = (event_t *)&data[old & md->mask];
 
@@ -978,13 +1051,11 @@
 			event = &event_copy;
 		}
 
+		if (event->header.type == PERF_RECORD_SAMPLE)
+			event__process_sample(event, md->counter);
+		else
+			event__process(event);
 		old += size;
-
-		if (event->header.type == PERF_RECORD_SAMPLE) {
-			int user =
-	(event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER;
-			process_event(event->ip.ip, md->counter, user);
-		}
 	}
 
 	md->prev = old;
@@ -1018,8 +1089,15 @@
 	attr = attrs + counter;
 
 	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
-	attr->freq		= freq;
+
+	if (freq) {
+		attr->sample_type	|= PERF_SAMPLE_PERIOD;
+		attr->freq		= 1;
+		attr->sample_freq	= freq;
+	}
+
 	attr->inherit		= (cpu < 0) && inherit;
+	attr->mmap		= 1;
 
 try_again:
 	fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0);
@@ -1078,6 +1156,11 @@
 	int i, counter;
 	int ret;
 
+	if (target_pid != -1)
+		event__synthesize_thread(target_pid, event__process);
+	else
+		event__synthesize_threads(event__process);
+
 	for (i = 0; i < nr_cpus; i++) {
 		group_fd = -1;
 		for (counter = 0; counter < nr_counters; counter++)
@@ -1133,7 +1216,10 @@
 			    "system-wide collection from all CPUs"),
 	OPT_INTEGER('C', "CPU", &profile_cpu,
 		    "CPU to profile on"),
-	OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
+		    "hide kernel symbols"),
 	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
 		    "number of mmap data pages"),
 	OPT_INTEGER('r', "realtime", &realtime_prio,
@@ -1156,6 +1242,8 @@
 		    "profile at this frequency"),
 	OPT_INTEGER('E', "entries", &print_entries,
 		    "display this many functions"),
+	OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols,
+		    "hide user symbols"),
 	OPT_BOOLEAN('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_END()
@@ -1165,19 +1253,12 @@
 {
 	int counter;
 
-	symbol__init();
-
 	page_size = sysconf(_SC_PAGE_SIZE);
 
 	argc = parse_options(argc, argv, options, top_usage, 0);
 	if (argc)
 		usage_with_options(top_usage, options);
 
-	if (freq) {
-		default_interval = freq;
-		freq = 1;
-	}
-
 	/* CPU and PID are mutually exclusive */
 	if (target_pid != -1 && profile_cpu != -1) {
 		printf("WARNING: PID switch overriding CPU\n");
@@ -1188,13 +1269,31 @@
 	if (!nr_counters)
 		nr_counters = 1;
 
+	symbol_conf.priv_size = (sizeof(struct sym_entry) +
+				 (nr_counters + 1) * sizeof(unsigned long));
+	if (symbol_conf.vmlinux_name == NULL)
+		symbol_conf.try_vmlinux_path = true;
+	if (symbol__init(&symbol_conf) < 0)
+		return -1;
+
 	if (delay_secs < 1)
 		delay_secs = 1;
 
-	parse_symbols();
 	parse_source(sym_filter_entry);
 
 	/*
+	 * User specified count overrides default frequency.
+	 */
+	if (default_interval)
+		freq = 0;
+	else if (freq) {
+		default_interval = freq;
+	} else {
+		fprintf(stderr, "frequency and count are zero, aborting\n");
+		exit(EXIT_FAILURE);
+	}
+
+	/*
 	 * Fill in the ones not specifically initialized via -c:
 	 */
 	for (counter = 0; counter < nr_counters; counter++) {
@@ -1211,5 +1310,11 @@
 	if (target_pid != -1 || profile_cpu != -1)
 		nr_cpus = 1;
 
+	get_term_dimensions(&winsize);
+	if (print_entries == 0) {
+		update_print_entries(&winsize);
+		signal(SIGWINCH, sig_winch_handler);
+	}
+
 	return __cmd_top();
 }
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0c5e4f7..abb914a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -5,6 +5,50 @@
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/header.h"
+#include "util/exec_cmd.h"
+#include "util/trace-event.h"
+
+static char const		*script_name;
+static char const		*generate_script_lang;
+
+static int default_start_script(const char *script __attribute((unused)))
+{
+	return 0;
+}
+
+static int default_stop_script(void)
+{
+	return 0;
+}
+
+static int default_generate_script(const char *outfile __attribute ((unused)))
+{
+	return 0;
+}
+
+static struct scripting_ops default_scripting_ops = {
+	.start_script		= default_start_script,
+	.stop_script		= default_stop_script,
+	.process_event		= print_event,
+	.generate_script	= default_generate_script,
+};
+
+static struct scripting_ops	*scripting_ops;
+
+static void setup_scripting(void)
+{
+	/* make sure PERF_EXEC_PATH is set for scripts */
+	perf_set_argv_exec_path(perf_exec_path());
+
+	setup_perl_scripting();
+
+	scripting_ops = &default_scripting_ops;
+}
+
+static int cleanup_scripting(void)
+{
+	return scripting_ops->stop_script();
+}
 
 #include "util/parse-options.h"
 
@@ -12,59 +56,22 @@
 #include "util/debug.h"
 
 #include "util/trace-event.h"
+#include "util/data_map.h"
+#include "util/exec_cmd.h"
 
-static char		const *input_name = "perf.data";
-static int		input;
-static unsigned long	page_size;
-static unsigned long	mmap_window = 32;
+static char const		*input_name = "perf.data";
 
-static unsigned long	total = 0;
-static unsigned long	total_comm = 0;
+static struct perf_header	*header;
+static u64			sample_type;
 
-static struct rb_root	threads;
-static struct thread	*last_match;
-
-static struct perf_header *header;
-static u64		sample_type;
-
-
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+static int process_sample_event(event_t *event)
 {
-	struct thread *thread;
-
-	thread = threads__findnew(event->comm.pid, &threads, &last_match);
-
-	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->comm.comm, event->comm.pid);
-
-	if (thread == NULL ||
-	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
-		return -1;
-	}
-	total_comm++;
-
-	return 0;
-}
-
-static int
-process_sample_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	char level;
-	int show = 0;
-	struct dso *dso = NULL;
-	struct thread *thread;
 	u64 ip = event->ip.ip;
 	u64 timestamp = -1;
 	u32 cpu = -1;
 	u64 period = 1;
 	void *more_data = event->ip.__more_data;
-	int cpumode;
-
-	thread = threads__findnew(event->ip.pid, &threads, &last_match);
+	struct thread *thread = threads__findnew(event->ip.pid);
 
 	if (sample_type & PERF_SAMPLE_TIME) {
 		timestamp = *(u64 *)more_data;
@@ -82,45 +89,19 @@
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
+	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
 		event->header.misc,
 		event->ip.pid, event->ip.tid,
 		(void *)(long)ip,
 		(long long)period);
 
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
 	if (thread == NULL) {
-		eprintf("problem processing %d event, skipping it.\n",
-			event->header.type);
+		pr_debug("problem processing %d event, skipping it.\n",
+			 event->header.type);
 		return -1;
 	}
 
-	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
-	if (cpumode == PERF_RECORD_MISC_KERNEL) {
-		show = SHOW_KERNEL;
-		level = 'k';
-
-		dso = kernel_dso;
-
-		dump_printf(" ...... dso: %s\n", dso->name);
-
-	} else if (cpumode == PERF_RECORD_MISC_USER) {
-
-		show = SHOW_USER;
-		level = '.';
-
-	} else {
-		show = SHOW_HV;
-		level = 'H';
-
-		dso = hypervisor_dso;
-
-		dump_printf(" ...... dso: [hypervisor]\n");
-	}
+	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
 	if (sample_type & PERF_SAMPLE_RAW) {
 		struct {
@@ -133,128 +114,189 @@
 		 * field, although it should be the same than this perf
 		 * event pid
 		 */
-		print_event(cpu, raw->data, raw->size, timestamp, thread->comm);
+		scripting_ops->process_event(cpu, raw->data, raw->size,
+					     timestamp, thread->comm);
 	}
-	total += period;
+	event__stats.total += period;
 
 	return 0;
 }
 
-static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
+static int sample_type_check(u64 type)
 {
-	trace_event(event);
+	sample_type = type;
 
-	switch (event->header.type) {
-	case PERF_RECORD_MMAP ... PERF_RECORD_LOST:
-		return 0;
-
-	case PERF_RECORD_COMM:
-		return process_comm_event(event, offset, head);
-
-	case PERF_RECORD_EXIT ... PERF_RECORD_READ:
-		return 0;
-
-	case PERF_RECORD_SAMPLE:
-		return process_sample_event(event, offset, head);
-
-	case PERF_RECORD_MAX:
-	default:
+	if (!(sample_type & PERF_SAMPLE_RAW)) {
+		fprintf(stderr,
+			"No trace sample to read. Did you call perf record "
+			"without -R?");
 		return -1;
 	}
 
 	return 0;
 }
 
+static struct perf_file_handler file_handler = {
+	.process_sample_event	= process_sample_event,
+	.process_comm_event	= event__process_comm,
+	.sample_type_check	= sample_type_check,
+};
+
 static int __cmd_trace(void)
 {
-	int ret, rc = EXIT_FAILURE;
-	unsigned long offset = 0;
-	unsigned long head = 0;
-	struct stat perf_stat;
-	event_t *event;
-	uint32_t size;
-	char *buf;
+	register_idle_thread();
+	register_perf_file_handler(&file_handler);
 
-	trace_report();
-	register_idle_thread(&threads, &last_match);
+	return mmap_dispatch_perf_file(&header, input_name,
+				       0, 0, &event__cwdlen, &event__cwd);
+}
 
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		perror("failed to open file");
-		exit(-1);
+struct script_spec {
+	struct list_head	node;
+	struct scripting_ops	*ops;
+	char			spec[0];
+};
+
+LIST_HEAD(script_specs);
+
+static struct script_spec *script_spec__new(const char *spec,
+					    struct scripting_ops *ops)
+{
+	struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
+
+	if (s != NULL) {
+		strcpy(s->spec, spec);
+		s->ops = ops;
 	}
 
-	ret = fstat(input, &perf_stat);
-	if (ret < 0) {
-		perror("failed to stat file");
-		exit(-1);
+	return s;
+}
+
+static void script_spec__delete(struct script_spec *s)
+{
+	free(s->spec);
+	free(s);
+}
+
+static void script_spec__add(struct script_spec *s)
+{
+	list_add_tail(&s->node, &script_specs);
+}
+
+static struct script_spec *script_spec__find(const char *spec)
+{
+	struct script_spec *s;
+
+	list_for_each_entry(s, &script_specs, node)
+		if (strcasecmp(s->spec, spec) == 0)
+			return s;
+	return NULL;
+}
+
+static struct script_spec *script_spec__findnew(const char *spec,
+						struct scripting_ops *ops)
+{
+	struct script_spec *s = script_spec__find(spec);
+
+	if (s)
+		return s;
+
+	s = script_spec__new(spec, ops);
+	if (!s)
+		goto out_delete_spec;
+
+	script_spec__add(s);
+
+	return s;
+
+out_delete_spec:
+	script_spec__delete(s);
+
+	return NULL;
+}
+
+int script_spec_register(const char *spec, struct scripting_ops *ops)
+{
+	struct script_spec *s;
+
+	s = script_spec__find(spec);
+	if (s)
+		return -1;
+
+	s = script_spec__findnew(spec, ops);
+	if (!s)
+		return -1;
+
+	return 0;
+}
+
+static struct scripting_ops *script_spec__lookup(const char *spec)
+{
+	struct script_spec *s = script_spec__find(spec);
+	if (!s)
+		return NULL;
+
+	return s->ops;
+}
+
+static void list_available_languages(void)
+{
+	struct script_spec *s;
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, "Scripting language extensions (used in "
+		"perf trace -s [spec:]script.[spec]):\n\n");
+
+	list_for_each_entry(s, &script_specs, node)
+		fprintf(stderr, "  %-42s [%s]\n", s->spec, s->ops->name);
+
+	fprintf(stderr, "\n");
+}
+
+static int parse_scriptname(const struct option *opt __used,
+			    const char *str, int unset __used)
+{
+	char spec[PATH_MAX];
+	const char *script, *ext;
+	int len;
+
+	if (strcmp(str, "list") == 0) {
+		list_available_languages();
+		return 0;
 	}
 
-	if (!perf_stat.st_size) {
-		fprintf(stderr, "zero-sized file, nothing to do!\n");
-		exit(0);
-	}
-	header = perf_header__read(input);
-	head = header->data_offset;
-	sample_type = perf_header__sample_type(header);
-
-	if (!(sample_type & PERF_SAMPLE_RAW))
-		die("No trace sample to read. Did you call perf record "
-		    "without -R?");
-
-	if (load_kernel() < 0) {
-		perror("failed to load kernel symbols");
-		return EXIT_FAILURE;
+	script = strchr(str, ':');
+	if (script) {
+		len = script - str;
+		if (len >= PATH_MAX) {
+			fprintf(stderr, "invalid language specifier");
+			return -1;
+		}
+		strncpy(spec, str, len);
+		spec[len] = '\0';
+		scripting_ops = script_spec__lookup(spec);
+		if (!scripting_ops) {
+			fprintf(stderr, "invalid language specifier");
+			return -1;
+		}
+		script++;
+	} else {
+		script = str;
+		ext = strchr(script, '.');
+		if (!ext) {
+			fprintf(stderr, "invalid script extension");
+			return -1;
+		}
+		scripting_ops = script_spec__lookup(++ext);
+		if (!scripting_ops) {
+			fprintf(stderr, "invalid script extension");
+			return -1;
+		}
 	}
 
-remap:
-	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-			   MAP_SHARED, input, offset);
-	if (buf == MAP_FAILED) {
-		perror("failed to mmap file");
-		exit(-1);
-	}
+	script_name = strdup(script);
 
-more:
-	event = (event_t *)(buf + head);
-
-	if (head + event->header.size >= page_size * mmap_window) {
-		unsigned long shift = page_size * (head / page_size);
-		int res;
-
-		res = munmap(buf, page_size * mmap_window);
-		assert(res == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-	size = event->header.size;
-
-	if (!size || process_event(event, offset, head) < 0) {
-
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
-	}
-
-	head += size;
-
-	if (offset + head < (unsigned long)perf_stat.st_size)
-		goto more;
-
-	rc = EXIT_SUCCESS;
-	close(input);
-
-	return rc;
+	return 0;
 }
 
 static const char * const annotate_usage[] = {
@@ -267,13 +309,24 @@
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('l', "latency", &latency_format,
+		    "show latency attributes (irqs/preemption disabled, etc)"),
+	OPT_CALLBACK('s', "script", NULL, "name",
+		     "script file name (lang:script name, script name, or *)",
+		     parse_scriptname),
+	OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
+		   "generate perf-trace.xx script in specified language"),
+
 	OPT_END()
 };
 
 int cmd_trace(int argc, const char **argv, const char *prefix __used)
 {
-	symbol__init();
-	page_size = getpagesize();
+	int err;
+
+	symbol__init(0);
+
+	setup_scripting();
 
 	argc = parse_options(argc, argv, options, annotate_usage, 0);
 	if (argc) {
@@ -287,5 +340,50 @@
 
 	setup_pager();
 
-	return __cmd_trace();
+	if (generate_script_lang) {
+		struct stat perf_stat;
+
+		int input = open(input_name, O_RDONLY);
+		if (input < 0) {
+			perror("failed to open file");
+			exit(-1);
+		}
+
+		err = fstat(input, &perf_stat);
+		if (err < 0) {
+			perror("failed to stat file");
+			exit(-1);
+		}
+
+		if (!perf_stat.st_size) {
+			fprintf(stderr, "zero-sized file, nothing to do!\n");
+			exit(0);
+		}
+
+		scripting_ops = script_spec__lookup(generate_script_lang);
+		if (!scripting_ops) {
+			fprintf(stderr, "invalid language specifier");
+			return -1;
+		}
+
+		header = perf_header__new();
+		if (header == NULL)
+			return -1;
+
+		perf_header__read(header, input);
+		err = scripting_ops->generate_script("perf-trace");
+		goto out;
+	}
+
+	if (script_name) {
+		err = scripting_ops->start_script(script_name);
+		if (err)
+			goto out;
+	}
+
+	err = __cmd_trace();
+
+	cleanup_scripting();
+out:
+	return err;
 }
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index e11d8d2..a3d8bf6 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -15,6 +15,8 @@
 extern int check_pager_config(const char *cmd);
 
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
+extern int cmd_bench(int argc, const char **argv, const char *prefix);
+extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
 extern int cmd_help(int argc, const char **argv, const char *prefix);
 extern int cmd_sched(int argc, const char **argv, const char *prefix);
 extern int cmd_list(int argc, const char **argv, const char *prefix);
@@ -25,5 +27,7 @@
 extern int cmd_top(int argc, const char **argv, const char *prefix);
 extern int cmd_trace(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
+extern int cmd_probe(int argc, const char **argv, const char *prefix);
+extern int cmd_kmem(int argc, const char **argv, const char *prefix);
 
 #endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 00326e2..02b09ea 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -3,6 +3,8 @@
 # command name			category [deprecated] [common]
 #
 perf-annotate			mainporcelain common
+perf-bench			mainporcelain common
+perf-buildid-list		mainporcelain common
 perf-list			mainporcelain common
 perf-sched			mainporcelain common
 perf-record			mainporcelain common
@@ -11,3 +13,5 @@
 perf-timechart			mainporcelain common
 perf-top			mainporcelain common
 perf-trace			mainporcelain common
+perf-probe			mainporcelain common
+perf-kmem			mainporcelain common
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index fdd42a8..f000c30 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -137,6 +137,8 @@
 	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS	= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS	= 8,
 };
 
 Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 19fc7fe..cf64049 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -14,6 +14,7 @@
 #include "util/run-command.h"
 #include "util/parse-events.h"
 #include "util/string.h"
+#include "util/debugfs.h"
 
 const char perf_usage_string[] =
 	"perf [--version] [--help] COMMAND [ARGS]";
@@ -89,8 +90,8 @@
 		/*
 		 * Check remaining flags.
 		 */
-		if (!prefixcmp(cmd, "--exec-path")) {
-			cmd += 11;
+		if (!prefixcmp(cmd, CMD_EXEC_PATH)) {
+			cmd += strlen(CMD_EXEC_PATH);
 			if (*cmd == '=')
 				perf_set_argv_exec_path(cmd + 1);
 			else {
@@ -117,8 +118,8 @@
 			(*argv)++;
 			(*argc)--;
 			handled++;
-		} else if (!prefixcmp(cmd, "--perf-dir=")) {
-			setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1);
+		} else if (!prefixcmp(cmd, CMD_PERF_DIR)) {
+			setenv(PERF_DIR_ENVIRONMENT, cmd + strlen(CMD_PERF_DIR), 1);
 			if (envchanged)
 				*envchanged = 1;
 		} else if (!strcmp(cmd, "--work-tree")) {
@@ -131,8 +132,8 @@
 				*envchanged = 1;
 			(*argv)++;
 			(*argc)--;
-		} else if (!prefixcmp(cmd, "--work-tree=")) {
-			setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1);
+		} else if (!prefixcmp(cmd, CMD_WORK_TREE)) {
+			setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + strlen(CMD_WORK_TREE), 1);
 			if (envchanged)
 				*envchanged = 1;
 		} else if (!strcmp(cmd, "--debugfs-dir")) {
@@ -146,8 +147,8 @@
 				*envchanged = 1;
 			(*argv)++;
 			(*argc)--;
-		} else if (!prefixcmp(cmd, "--debugfs-dir=")) {
-			strncpy(debugfs_mntpt, cmd + 14, MAXPATHLEN);
+		} else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
+			strncpy(debugfs_mntpt, cmd + strlen(CMD_DEBUGFS_DIR), MAXPATHLEN);
 			debugfs_mntpt[MAXPATHLEN - 1] = '\0';
 			if (envchanged)
 				*envchanged = 1;
@@ -284,17 +285,21 @@
 {
 	const char *cmd = argv[0];
 	static struct cmd_struct commands[] = {
-		{ "help", cmd_help, 0 },
-		{ "list", cmd_list, 0 },
-		{ "record", cmd_record, 0 },
-		{ "report", cmd_report, 0 },
-		{ "stat", cmd_stat, 0 },
-		{ "timechart", cmd_timechart, 0 },
-		{ "top", cmd_top, 0 },
-		{ "annotate", cmd_annotate, 0 },
-		{ "version", cmd_version, 0 },
-		{ "trace", cmd_trace, 0 },
-		{ "sched", cmd_sched, 0 },
+		{ "buildid-list", cmd_buildid_list, 0 },
+		{ "help",	cmd_help,	0 },
+		{ "list",	cmd_list,	0 },
+		{ "record",	cmd_record,	0 },
+		{ "report",	cmd_report,	0 },
+		{ "bench",	cmd_bench,	0 },
+		{ "stat",	cmd_stat,	0 },
+		{ "timechart",	cmd_timechart,	0 },
+		{ "top",	cmd_top,	0 },
+		{ "annotate",	cmd_annotate,	0 },
+		{ "version",	cmd_version,	0 },
+		{ "trace",	cmd_trace,	0 },
+		{ "sched",	cmd_sched,	0 },
+		{ "probe",	cmd_probe,	0 },
+		{ "kmem",	cmd_kmem,	0 },
 	};
 	unsigned int i;
 	static const char ext[] = STRIP_EXTENSION;
@@ -382,45 +387,12 @@
 /* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */
 static void get_debugfs_mntpt(void)
 {
-	FILE *file;
-	char fs_type[100];
-	char debugfs[MAXPATHLEN];
+	const char *path = debugfs_find_mountpoint();
 
-	/*
-	 * try the standard location
-	 */
-	if (valid_debugfs_mount("/sys/kernel/debug/") == 0) {
-		strcpy(debugfs_mntpt, "/sys/kernel/debug/");
-		return;
-	}
-
-	/*
-	 * try the sane location
-	 */
-	if (valid_debugfs_mount("/debug/") == 0) {
-		strcpy(debugfs_mntpt, "/debug/");
-		return;
-	}
-
-	/*
-	 * give up and parse /proc/mounts
-	 */
-	file = fopen("/proc/mounts", "r");
-	if (file == NULL)
-		return;
-
-	while (fscanf(file, "%*s %"
-		      STR(MAXPATHLEN)
-		      "s %99s %*s %*d %*d\n",
-		      debugfs, fs_type) == 2) {
-		if (strcmp(fs_type, "debugfs") == 0)
-			break;
-	}
-	fclose(file);
-	if (strcmp(fs_type, "debugfs") == 0) {
-		strncpy(debugfs_mntpt, debugfs, MAXPATHLEN);
-		debugfs_mntpt[MAXPATHLEN - 1] = '\0';
-	}
+	if (path)
+		strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt));
+	else
+		debugfs_mntpt[0] = '\0';
 }
 
 int main(int argc, const char **argv)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 8cc4623..454d5d5 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -47,6 +47,18 @@
 #define cpu_relax()	asm volatile("":::"memory")
 #endif
 
+#ifdef __alpha__
+#include "../../arch/alpha/include/asm/unistd.h"
+#define rmb()		asm volatile("mb" ::: "memory")
+#define cpu_relax()	asm volatile("" ::: "memory")
+#endif
+
+#ifdef __ia64__
+#include "../../arch/ia64/include/asm/unistd.h"
+#define rmb()		asm volatile ("mf" ::: "memory")
+#define cpu_relax()	asm volatile ("hint @pause" ::: "memory")
+#endif
+
 #include <time.h>
 #include <unistd.h>
 #include <sys/types.h>
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
new file mode 100644
index 0000000..af78d9a
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
@@ -0,0 +1,134 @@
+/*
+ * This file was generated automatically by ExtUtils::ParseXS version 2.18_02 from the
+ * contents of Context.xs. Do not edit this file, edit Context.xs instead.
+ *
+ *	ANY CHANGES MADE HERE WILL BE LOST! 
+ *
+ */
+
+#line 1 "Context.xs"
+/*
+ * Context.xs.  XS interfaces for perf trace.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "../../../util/trace-event-perl.h"
+
+#ifndef PERL_UNUSED_VAR
+#  define PERL_UNUSED_VAR(var) if (0) var = var
+#endif
+
+#line 41 "Context.c"
+
+XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_pc)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    if (items != 1)
+       Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_pc", "context");
+    PERL_UNUSED_VAR(cv); /* -W */
+    {
+	struct scripting_context *	context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+	int	RETVAL;
+	dXSTARG;
+
+	RETVAL = common_pc(context);
+	XSprePUSH; PUSHi((IV)RETVAL);
+    }
+    XSRETURN(1);
+}
+
+
+XS(XS_Perf__Trace__Context_common_flags); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_flags)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    if (items != 1)
+       Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_flags", "context");
+    PERL_UNUSED_VAR(cv); /* -W */
+    {
+	struct scripting_context *	context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+	int	RETVAL;
+	dXSTARG;
+
+	RETVAL = common_flags(context);
+	XSprePUSH; PUSHi((IV)RETVAL);
+    }
+    XSRETURN(1);
+}
+
+
+XS(XS_Perf__Trace__Context_common_lock_depth); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_lock_depth)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    if (items != 1)
+       Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_lock_depth", "context");
+    PERL_UNUSED_VAR(cv); /* -W */
+    {
+	struct scripting_context *	context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+	int	RETVAL;
+	dXSTARG;
+
+	RETVAL = common_lock_depth(context);
+	XSprePUSH; PUSHi((IV)RETVAL);
+    }
+    XSRETURN(1);
+}
+
+#ifdef __cplusplus
+extern "C"
+#endif
+XS(boot_Perf__Trace__Context); /* prototype to pass -Wmissing-prototypes */
+XS(boot_Perf__Trace__Context)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    const char* file = __FILE__;
+
+    PERL_UNUSED_VAR(cv); /* -W */
+    PERL_UNUSED_VAR(items); /* -W */
+    XS_VERSION_BOOTCHECK ;
+
+        newXSproto("Perf::Trace::Context::common_pc", XS_Perf__Trace__Context_common_pc, file, "$");
+        newXSproto("Perf::Trace::Context::common_flags", XS_Perf__Trace__Context_common_flags, file, "$");
+        newXSproto("Perf::Trace::Context::common_lock_depth", XS_Perf__Trace__Context_common_lock_depth, file, "$");
+    if (PL_unitcheckav)
+         call_list(PL_scopestack_ix, PL_unitcheckav);
+    XSRETURN_YES;
+}
+
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
new file mode 100644
index 0000000..fb78006
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
@@ -0,0 +1,41 @@
+/*
+ * Context.xs.  XS interfaces for perf trace.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "../../../util/trace-event-perl.h"
+
+MODULE = Perf::Trace::Context		PACKAGE = Perf::Trace::Context
+PROTOTYPES: ENABLE
+
+int
+common_pc(context)
+	struct scripting_context * context
+
+int
+common_flags(context)
+	struct scripting_context * context
+
+int
+common_lock_depth(context)
+	struct scripting_context * context
+
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL
new file mode 100644
index 0000000..decdeb0
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL
@@ -0,0 +1,17 @@
+use 5.010000;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+    NAME              => 'Perf::Trace::Context',
+    VERSION_FROM      => 'lib/Perf/Trace/Context.pm', # finds $VERSION
+    PREREQ_PM         => {}, # e.g., Module::Name => 1.1
+    ($] >= 5.005 ?     ## Add these new keywords supported since 5.005
+      (ABSTRACT_FROM  => 'lib/Perf/Trace/Context.pm', # retrieve abstract from module
+       AUTHOR         => 'Tom Zanussi <tzanussi@gmail.com>') : ()),
+    LIBS              => [''], # e.g., '-lm'
+    DEFINE            => '-I ../..', # e.g., '-DHAVE_SOMETHING'
+    INC               => '-I.', # e.g., '-I. -I/usr/include/other'
+	# Un-comment this if you add C files to link with later:
+    OBJECT            => 'Context.o', # link all the C files too
+);
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README
new file mode 100644
index 0000000..9a97076
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/README
@@ -0,0 +1,59 @@
+Perf-Trace-Util version 0.01
+============================
+
+This module contains utility functions for use with perf trace.
+
+Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
+that the core perf support for Perl calls on and should always be
+'used', while Util.pm contains useful but optional utility functions
+that scripts may want to use.  Context.pm contains the Perl->C
+interface that allows scripts to access data in the embedding perf
+executable; scripts wishing to do that should 'use Context.pm'.
+
+The Perl->C perf interface is completely driven by Context.xs.  If you
+want to add new Perl functions that end up accessing C data in the
+perf executable, you add desciptions of the new functions here.
+scripting_context is a pointer to the perf data in the perf executable
+that you want to access - it's passed as the second parameter,
+$context, to all handler functions.
+
+After you do that:
+
+  perl Makefile.PL   # to create a Makefile for the next step
+  make               # to create Context.c
+
+  edit Context.c to add const to the char* file = __FILE__ line in
+  XS(boot_Perf__Trace__Context) to silence a warning/error.
+
+  You can delete the Makefile, object files and anything else that was
+  generated e.g. blib and shared library, etc, except for of course
+  Context.c
+
+  You should then be able to run the normal perf make as usual.
+
+INSTALLATION
+
+Building perf with perf trace Perl scripting should install this
+module in the right place.
+
+You should make sure libperl and ExtUtils/Embed.pm are installed first
+e.g. apt-get install libperl-dev or yum install perl-ExtUtils-Embed.
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+  None
+
+COPYRIGHT AND LICENCE
+
+Copyright (C) 2009 by Tom Zanussi <tzanussi@gmail.com>
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
new file mode 100644
index 0000000..6c7f365
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
@@ -0,0 +1,55 @@
+package Perf::Trace::Context;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+	common_pc common_flags common_lock_depth
+);
+
+our $VERSION = '0.01';
+
+require XSLoader;
+XSLoader::load('Perf::Trace::Context', $VERSION);
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Context - Perl extension for accessing functions in perf.
+
+=head1 SYNOPSIS
+
+  use Perf::Trace::Context;
+
+=head1 SEE ALSO
+
+Perf (trace) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
new file mode 100644
index 0000000..9df376a
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
@@ -0,0 +1,192 @@
+package Perf::Trace::Core;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+define_flag_field define_flag_value flag_str dump_flag_fields
+define_symbolic_field define_symbolic_value symbol_str dump_symbolic_fields
+trace_flag_str
+);
+
+our $VERSION = '0.01';
+
+my %trace_flags = (0x00 => "NONE",
+		   0x01 => "IRQS_OFF",
+		   0x02 => "IRQS_NOSUPPORT",
+		   0x04 => "NEED_RESCHED",
+		   0x08 => "HARDIRQ",
+		   0x10 => "SOFTIRQ");
+
+sub trace_flag_str
+{
+    my ($value) = @_;
+
+    my $string;
+
+    my $print_delim = 0;
+
+    foreach my $idx (sort {$a <=> $b} keys %trace_flags) {
+	if (!$value && !$idx) {
+	    $string .= "NONE";
+	    last;
+	}
+
+	if ($idx && ($value & $idx) == $idx) {
+	    if ($print_delim) {
+		$string .= " | ";
+	    }
+	    $string .= "$trace_flags{$idx}";
+	    $print_delim = 1;
+	    $value &= ~$idx;
+	}
+    }
+
+    return $string;
+}
+
+my %flag_fields;
+my %symbolic_fields;
+
+sub flag_str
+{
+    my ($event_name, $field_name, $value) = @_;
+
+    my $string;
+
+    if ($flag_fields{$event_name}{$field_name}) {
+	my $print_delim = 0;
+	foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event_name}{$field_name}{"values"}}) {
+	    if (!$value && !$idx) {
+		$string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
+		last;
+	    }
+	    if ($idx && ($value & $idx) == $idx) {
+		if ($print_delim && $flag_fields{$event_name}{$field_name}{'delim'}) {
+		    $string .= " $flag_fields{$event_name}{$field_name}{'delim'} ";
+		}
+		$string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
+		$print_delim = 1;
+		$value &= ~$idx;
+	    }
+	}
+    }
+
+    return $string;
+}
+
+sub define_flag_field
+{
+    my ($event_name, $field_name, $delim) = @_;
+
+    $flag_fields{$event_name}{$field_name}{"delim"} = $delim;
+}
+
+sub define_flag_value
+{
+    my ($event_name, $field_name, $value, $field_str) = @_;
+
+    $flag_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
+}
+
+sub dump_flag_fields
+{
+    for my $event (keys %flag_fields) {
+	print "event $event:\n";
+	for my $field (keys %{$flag_fields{$event}}) {
+	    print "    field: $field:\n";
+	    print "        delim: $flag_fields{$event}{$field}{'delim'}\n";
+	    foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event}{$field}{"values"}}) {
+		print "        value $idx: $flag_fields{$event}{$field}{'values'}{$idx}\n";
+	    }
+	}
+    }
+}
+
+sub symbol_str
+{
+    my ($event_name, $field_name, $value) = @_;
+
+    if ($symbolic_fields{$event_name}{$field_name}) {
+	foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event_name}{$field_name}{"values"}}) {
+	    if (!$value && !$idx) {
+		return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
+		last;
+	    }
+	    if ($value == $idx) {
+		return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
+	    }
+	}
+    }
+
+    return undef;
+}
+
+sub define_symbolic_field
+{
+    my ($event_name, $field_name) = @_;
+
+    # nothing to do, really
+}
+
+sub define_symbolic_value
+{
+    my ($event_name, $field_name, $value, $field_str) = @_;
+
+    $symbolic_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
+}
+
+sub dump_symbolic_fields
+{
+    for my $event (keys %symbolic_fields) {
+	print "event $event:\n";
+	for my $field (keys %{$symbolic_fields{$event}}) {
+	    print "    field: $field:\n";
+	    foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event}{$field}{"values"}}) {
+		print "        value $idx: $symbolic_fields{$event}{$field}{'values'}{$idx}\n";
+	    }
+	}
+    }
+}
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Core - Perl extension for perf trace
+
+=head1 SYNOPSIS
+
+  use Perf::Trace::Core
+
+=head1 SEE ALSO
+
+Perf (trace) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
new file mode 100644
index 0000000..052f132
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -0,0 +1,88 @@
+package Perf::Trace::Util;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+avg nsecs nsecs_secs nsecs_nsecs nsecs_usecs print_nsecs
+);
+
+our $VERSION = '0.01';
+
+sub avg
+{
+    my ($total, $n) = @_;
+
+    return $total / $n;
+}
+
+my $NSECS_PER_SEC    = 1000000000;
+
+sub nsecs
+{
+    my ($secs, $nsecs) = @_;
+
+    return $secs * $NSECS_PER_SEC + $nsecs;
+}
+
+sub nsecs_secs {
+    my ($nsecs) = @_;
+
+    return $nsecs / $NSECS_PER_SEC;
+}
+
+sub nsecs_nsecs {
+    my ($nsecs) = @_;
+
+    return $nsecs - nsecs_secs($nsecs);
+}
+
+sub nsecs_str {
+    my ($nsecs) = @_;
+
+    my $str = sprintf("%5u.%09u", nsecs_secs($nsecs), nsecs_nsecs($nsecs));
+
+    return $str;
+}
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Util - Perl extension for perf trace
+
+=head1 SYNOPSIS
+
+  use Perf::Trace::Util;
+
+=head1 SEE ALSO
+
+Perf (trace) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/typemap b/tools/perf/scripts/perl/Perf-Trace-Util/typemap
new file mode 100644
index 0000000..8408368
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/typemap
@@ -0,0 +1 @@
+struct scripting_context * T_PTR
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record
new file mode 100644
index 0000000..c7ec5de
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/check-perf-trace-record
@@ -0,0 +1,7 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry
+
+
+
+
+
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-report b/tools/perf/scripts/perl/bin/check-perf-trace-report
new file mode 100644
index 0000000..89948b0
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/check-perf-trace-report
@@ -0,0 +1,5 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/check-perf-trace.pl
+
+
+
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-record b/tools/perf/scripts/perl/bin/rw-by-file-record
new file mode 100644
index 0000000..b25056e
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-file-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
new file mode 100644
index 0000000..f5dcf9c
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-file-report
@@ -0,0 +1,5 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl
+
+
+
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-record b/tools/perf/scripts/perl/bin/rw-by-pid-record
new file mode 100644
index 0000000..8903979
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
new file mode 100644
index 0000000..cea16f7
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-report
@@ -0,0 +1,5 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl
+
+
+
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-record b/tools/perf/scripts/perl/bin/wakeup-latency-record
new file mode 100644
index 0000000..6abedda
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-record
@@ -0,0 +1,6 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup
+
+
+
+
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
new file mode 100644
index 0000000..85769dc
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-report
@@ -0,0 +1,5 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl
+
+
+
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record
new file mode 100644
index 0000000..fce6637
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
new file mode 100644
index 0000000..aa68435
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-report
@@ -0,0 +1,6 @@
+#!/bin/bash
+perf trace -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl
+
+
+
+
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
new file mode 100644
index 0000000..4e7dc0a
--- /dev/null
+++ b/tools/perf/scripts/perl/check-perf-trace.pl
@@ -0,0 +1,106 @@
+# perf trace event handlers, generated by perf trace -g perl
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# This script tests basic functionality such as flag and symbol
+# strings, common_xxx() calls back into perf, begin, end, unhandled
+# events, etc.  Basically, if this script runs successfully and
+# displays expected results, perl scripting support should be ok.
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Context;
+use Perf::Trace::Util;
+
+sub trace_begin
+{
+    print "trace_begin\n";
+}
+
+sub trace_end
+{
+    print "trace_end\n";
+
+    print_unhandled();
+}
+
+sub irq::softirq_entry
+{
+	my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	    $common_pid, $common_comm,
+	    $vec) = @_;
+
+	print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
+		     $common_pid, $common_comm);
+
+	print_uncommon($context);
+
+	printf("vec=%s\n",
+	       symbol_str("irq::softirq_entry", "vec", $vec));
+}
+
+sub kmem::kmalloc
+{
+	my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	    $common_pid, $common_comm,
+	    $call_site, $ptr, $bytes_req, $bytes_alloc,
+	    $gfp_flags) = @_;
+
+	print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
+		     $common_pid, $common_comm);
+
+	print_uncommon($context);
+
+	printf("call_site=%p, ptr=%p, bytes_req=%u, bytes_alloc=%u, ".
+	       "gfp_flags=%s\n",
+	       $call_site, $ptr, $bytes_req, $bytes_alloc,
+
+	       flag_str("kmem::kmalloc", "gfp_flags", $gfp_flags));
+}
+
+# print trace fields not included in handler args
+sub print_uncommon
+{
+    my ($context) = @_;
+
+    printf("common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, ",
+	   common_pc($context), trace_flag_str(common_flags($context)),
+	   common_lock_depth($context));
+
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
+
+sub print_header
+{
+	my ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;
+
+	printf("%-20s %5u %05u.%09u %8u %-20s ",
+	       $event_name, $cpu, $secs, $nsecs, $pid, $comm);
+}
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
new file mode 100644
index 0000000..61f9156
--- /dev/null
+++ b/tools/perf/scripts/perl/rw-by-file.pl
@@ -0,0 +1,105 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display r/w activity for files read/written to for a given program
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the status files.  Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+# change this to the comm of the program you're interested in
+my $for_comm = "perf";
+
+my %reads;
+my %writes;
+
+sub syscalls::sys_enter_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+
+    if ($common_comm eq $for_comm) {
+	$reads{$fd}{bytes_requested} += $count;
+	$reads{$fd}{total_reads}++;
+    }
+}
+
+sub syscalls::sys_enter_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+
+    if ($common_comm eq $for_comm) {
+	$writes{$fd}{bytes_written} += $count;
+	$writes{$fd}{total_writes}++;
+    }
+}
+
+sub trace_end
+{
+    printf("file read counts for $for_comm:\n\n");
+
+    printf("%6s  %10s  %10s\n", "fd", "# reads", "bytes_requested");
+    printf("%6s  %10s  %10s\n", "------", "----------", "-----------");
+
+    foreach my $fd (sort {$reads{$b}{bytes_requested} <=>
+			      $reads{$a}{bytes_requested}} keys %reads) {
+	my $total_reads = $reads{$fd}{total_reads};
+	my $bytes_requested = $reads{$fd}{bytes_requested};
+	printf("%6u  %10u  %10u\n", $fd, $total_reads, $bytes_requested);
+    }
+
+    printf("\nfile write counts for $for_comm:\n\n");
+
+    printf("%6s  %10s  %10s\n", "fd", "# writes", "bytes_written");
+    printf("%6s  %10s  %10s\n", "------", "----------", "-----------");
+
+    foreach my $fd (sort {$writes{$b}{bytes_written} <=>
+			      $writes{$a}{bytes_written}} keys %writes) {
+	my $total_writes = $writes{$fd}{total_writes};
+	my $bytes_written = $writes{$fd}{bytes_written};
+	printf("%6u  %10u  %10u\n", $fd, $total_writes, $bytes_written);
+    }
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
+
+
diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl
new file mode 100644
index 0000000..da601fa
--- /dev/null
+++ b/tools/perf/scripts/perl/rw-by-pid.pl
@@ -0,0 +1,170 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display r/w activity for all processes
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the status files.  Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my %reads;
+my %writes;
+
+sub syscalls::sys_exit_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $ret) = @_;
+
+    if ($ret > 0) {
+	$reads{$common_pid}{bytes_read} += $ret;
+    } else {
+	if (!defined ($reads{$common_pid}{bytes_read})) {
+	    $reads{$common_pid}{bytes_read} = 0;
+	}
+	$reads{$common_pid}{errors}{$ret}++;
+    }
+}
+
+sub syscalls::sys_enter_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $fd, $buf, $count) = @_;
+
+    $reads{$common_pid}{bytes_requested} += $count;
+    $reads{$common_pid}{total_reads}++;
+    $reads{$common_pid}{comm} = $common_comm;
+}
+
+sub syscalls::sys_exit_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $ret) = @_;
+
+    if ($ret <= 0) {
+	$writes{$common_pid}{errors}{$ret}++;
+    }
+}
+
+sub syscalls::sys_enter_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $fd, $buf, $count) = @_;
+
+    $writes{$common_pid}{bytes_written} += $count;
+    $writes{$common_pid}{total_writes}++;
+    $writes{$common_pid}{comm} = $common_comm;
+}
+
+sub trace_end
+{
+    printf("read counts by pid:\n\n");
+
+    printf("%6s  %20s  %10s  %10s  %10s\n", "pid", "comm",
+	   "# reads", "bytes_requested", "bytes_read");
+    printf("%6s  %-20s  %10s  %10s  %10s\n", "------", "--------------------",
+	   "-----------", "----------", "----------");
+
+    foreach my $pid (sort {$reads{$b}{bytes_read} <=>
+			       $reads{$a}{bytes_read}} keys %reads) {
+	my $comm = $reads{$pid}{comm};
+	my $total_reads = $reads{$pid}{total_reads};
+	my $bytes_requested = $reads{$pid}{bytes_requested};
+	my $bytes_read = $reads{$pid}{bytes_read};
+
+	printf("%6s  %-20s  %10s  %10s  %10s\n", $pid, $comm,
+	       $total_reads, $bytes_requested, $bytes_read);
+    }
+
+    printf("\nfailed reads by pid:\n\n");
+
+    printf("%6s  %20s  %6s  %10s\n", "pid", "comm", "error #", "# errors");
+    printf("%6s  %20s  %6s  %10s\n", "------", "--------------------",
+	   "------", "----------");
+
+    foreach my $pid (keys %reads) {
+	my $comm = $reads{$pid}{comm};
+	foreach my $err (sort {$reads{$b}{comm} cmp $reads{$a}{comm}}
+			 keys %{$reads{$pid}{errors}}) {
+	    my $errors = $reads{$pid}{errors}{$err};
+
+	    printf("%6d  %-20s  %6d  %10s\n", $pid, $comm, $err, $errors);
+	}
+    }
+
+    printf("\nwrite counts by pid:\n\n");
+
+    printf("%6s  %20s  %10s  %10s\n", "pid", "comm",
+	   "# writes", "bytes_written");
+    printf("%6s  %-20s  %10s  %10s\n", "------", "--------------------",
+	   "-----------", "----------");
+
+    foreach my $pid (sort {$writes{$b}{bytes_written} <=>
+			       $writes{$a}{bytes_written}} keys %writes) {
+	my $comm = $writes{$pid}{comm};
+	my $total_writes = $writes{$pid}{total_writes};
+	my $bytes_written = $writes{$pid}{bytes_written};
+
+	printf("%6s  %-20s  %10s  %10s\n", $pid, $comm,
+	       $total_writes, $bytes_written);
+    }
+
+    printf("\nfailed writes by pid:\n\n");
+
+    printf("%6s  %20s  %6s  %10s\n", "pid", "comm", "error #", "# errors");
+    printf("%6s  %20s  %6s  %10s\n", "------", "--------------------",
+	   "------", "----------");
+
+    foreach my $pid (keys %writes) {
+	my $comm = $writes{$pid}{comm};
+	foreach my $err (sort {$writes{$b}{comm} cmp $writes{$a}{comm}}
+			 keys %{$writes{$pid}{errors}}) {
+	    my $errors = $writes{$pid}{errors}{$err};
+
+	    printf("%6d  %-20s  %6d  %10s\n", $pid, $comm, $err, $errors);
+	}
+    }
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl
new file mode 100644
index 0000000..ed58ef2
--- /dev/null
+++ b/tools/perf/scripts/perl/wakeup-latency.pl
@@ -0,0 +1,103 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display avg/min/max wakeup latency
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the status files.  Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my %last_wakeup;
+
+my $max_wakeup_latency;
+my $min_wakeup_latency;
+my $total_wakeup_latency;
+my $total_wakeups;
+
+sub sched::sched_switch
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid,
+	$next_prio) = @_;
+
+    my $wakeup_ts = $last_wakeup{$common_cpu}{ts};
+    if ($wakeup_ts) {
+	my $switch_ts = nsecs($common_secs, $common_nsecs);
+	my $wakeup_latency = $switch_ts - $wakeup_ts;
+	if ($wakeup_latency > $max_wakeup_latency) {
+	    $max_wakeup_latency = $wakeup_latency;
+	}
+	if ($wakeup_latency < $min_wakeup_latency) {
+	    $min_wakeup_latency = $wakeup_latency;
+	}
+	$total_wakeup_latency += $wakeup_latency;
+	$total_wakeups++;
+    }
+    $last_wakeup{$common_cpu}{ts} = 0;
+}
+
+sub sched::sched_wakeup
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$comm, $pid, $prio, $success, $target_cpu) = @_;
+
+    $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs);
+}
+
+sub trace_begin
+{
+    $min_wakeup_latency = 1000000000;
+    $max_wakeup_latency = 0;
+}
+
+sub trace_end
+{
+    printf("wakeup_latency stats:\n\n");
+    print "total_wakeups: $total_wakeups\n";
+    printf("avg_wakeup_latency (ns): %u\n",
+	   avg($total_wakeup_latency, $total_wakeups));
+    printf("min_wakeup_latency (ns): %u\n", $min_wakeup_latency);
+    printf("max_wakeup_latency (ns): %u\n", $max_wakeup_latency);
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl
new file mode 100644
index 0000000..511302c
--- /dev/null
+++ b/tools/perf/scripts/perl/workqueue-stats.pl
@@ -0,0 +1,129 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Displays workqueue stats
+#
+# Usage:
+#
+#   perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e
+#     workqueue:workqueue_destruction -e workqueue:workqueue_execution
+#     -e workqueue:workqueue_insertion
+#
+#   perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my @cpus;
+
+sub workqueue::workqueue_destruction
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$thread_comm, $thread_pid) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{destroyed}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub workqueue::workqueue_creation
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$thread_comm, $thread_pid, $cpu) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{created}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub workqueue::workqueue_execution
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$thread_comm, $thread_pid, $func) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{executed}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub workqueue::workqueue_insertion
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$thread_comm, $thread_pid, $func) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{inserted}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub trace_end
+{
+    print "workqueue work stats:\n\n";
+    my $cpu = 0;
+    printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name");
+    printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----");
+    foreach my $pidhash (@cpus) {
+	while ((my $pid, my $wqhash) = each %$pidhash) {
+	    my $ins = $$wqhash{'inserted'};
+	    my $exe = $$wqhash{'executed'};
+	    my $comm = $$wqhash{'comm'};
+	    if ($ins || $exe) {
+		printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm);
+	    }
+	}
+	$cpu++;
+    }
+
+    $cpu = 0;
+    print "\nworkqueue lifecycle stats:\n\n";
+    printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name");
+    printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----");
+    foreach my $pidhash (@cpus) {
+	while ((my $pid, my $wqhash) = each %$pidhash) {
+	    my $created = $$wqhash{'created'};
+	    my $destroyed = $$wqhash{'destroyed'};
+	    my $comm = $$wqhash{'comm'};
+	    if ($created || $destroyed) {
+		printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed,
+		       $comm);
+	    }
+	}
+	$cpu++;
+    }
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 6f8ea9d..918eb37 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -1,10 +1,15 @@
-#ifndef CACHE_H
-#define CACHE_H
+#ifndef __PERF_CACHE_H
+#define __PERF_CACHE_H
 
 #include "util.h"
 #include "strbuf.h"
 #include "../perf.h"
 
+#define CMD_EXEC_PATH "--exec-path"
+#define CMD_PERF_DIR "--perf-dir="
+#define CMD_WORK_TREE "--work-tree="
+#define CMD_DEBUGFS_DIR "--debugfs-dir="
+
 #define PERF_DIR_ENVIRONMENT "PERF_DIR"
 #define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
 #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
@@ -117,4 +122,4 @@
 
 extern size_t strlcpy(char *dest, const char *src, size_t size);
 
-#endif /* CACHE_H */
+#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 3b8380f..b3b7125 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -206,7 +206,7 @@
 	}
 	node->val_nr = chain->nr - start;
 	if (!node->val_nr)
-		printf("Warning: empty node in callchain tree\n");
+		pr_warning("Warning: empty node in callchain tree\n");
 }
 
 static void
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 43cf3ea..ad4626d 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -58,4 +58,4 @@
 int register_callchain_param(struct callchain_param *param);
 void append_chain(struct callchain_node *root, struct ip_callchain *chain,
 		  struct symbol **syms);
-#endif
+#endif	/* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 58d5975..24e8809 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -1,5 +1,5 @@
-#ifndef COLOR_H
-#define COLOR_H
+#ifndef __PERF_COLOR_H
+#define __PERF_COLOR_H
 
 /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
 #define COLOR_MAXLEN 24
@@ -39,4 +39,4 @@
 int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
 const char *get_percent_color(double percent);
 
-#endif /* COLOR_H */
+#endif /* __PERF_COLOR_H */
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index 0b791bd..3507362 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c
@@ -29,3 +29,11 @@
 	A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0,		/* 112..127 */
 	/* Nothing in the 128.. range */
 };
+
+const char *graph_line =
+	"_____________________________________________________________________"
+	"_____________________________________________________________________";
+const char *graph_dotted_line =
+	"---------------------------------------------------------------------"
+	"---------------------------------------------------------------------"
+	"---------------------------------------------------------------------";
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c
new file mode 100644
index 0000000..ca0bedf
--- /dev/null
+++ b/tools/perf/util/data_map.c
@@ -0,0 +1,291 @@
+#include "data_map.h"
+#include "symbol.h"
+#include "util.h"
+#include "debug.h"
+
+
+static struct perf_file_handler *curr_handler;
+static unsigned long	mmap_window = 32;
+static char		__cwd[PATH_MAX];
+
+static int process_event_stub(event_t *event __used)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+void register_perf_file_handler(struct perf_file_handler *handler)
+{
+	if (!handler->process_sample_event)
+		handler->process_sample_event = process_event_stub;
+	if (!handler->process_mmap_event)
+		handler->process_mmap_event = process_event_stub;
+	if (!handler->process_comm_event)
+		handler->process_comm_event = process_event_stub;
+	if (!handler->process_fork_event)
+		handler->process_fork_event = process_event_stub;
+	if (!handler->process_exit_event)
+		handler->process_exit_event = process_event_stub;
+	if (!handler->process_lost_event)
+		handler->process_lost_event = process_event_stub;
+	if (!handler->process_read_event)
+		handler->process_read_event = process_event_stub;
+	if (!handler->process_throttle_event)
+		handler->process_throttle_event = process_event_stub;
+	if (!handler->process_unthrottle_event)
+		handler->process_unthrottle_event = process_event_stub;
+
+	curr_handler = handler;
+}
+
+static const char *event__name[] = {
+	[0]			 = "TOTAL",
+	[PERF_RECORD_MMAP]	 = "MMAP",
+	[PERF_RECORD_LOST]	 = "LOST",
+	[PERF_RECORD_COMM]	 = "COMM",
+	[PERF_RECORD_EXIT]	 = "EXIT",
+	[PERF_RECORD_THROTTLE]	 = "THROTTLE",
+	[PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
+	[PERF_RECORD_FORK]	 = "FORK",
+	[PERF_RECORD_READ]	 = "READ",
+	[PERF_RECORD_SAMPLE]	 = "SAMPLE",
+};
+
+unsigned long event__total[PERF_RECORD_MAX];
+
+void event__print_totals(void)
+{
+	int i;
+	for (i = 0; i < PERF_RECORD_MAX; ++i)
+		pr_info("%10s events: %10ld\n",
+			event__name[i], event__total[i]);
+}
+
+static int
+process_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	trace_event(event);
+
+	if (event->header.type < PERF_RECORD_MAX) {
+		dump_printf("%p [%p]: PERF_RECORD_%s",
+			    (void *)(offset + head),
+			    (void *)(long)(event->header.size),
+			    event__name[event->header.type]);
+		++event__total[0];
+		++event__total[event->header.type];
+	}
+
+	switch (event->header.type) {
+	case PERF_RECORD_SAMPLE:
+		return curr_handler->process_sample_event(event);
+	case PERF_RECORD_MMAP:
+		return curr_handler->process_mmap_event(event);
+	case PERF_RECORD_COMM:
+		return curr_handler->process_comm_event(event);
+	case PERF_RECORD_FORK:
+		return curr_handler->process_fork_event(event);
+	case PERF_RECORD_EXIT:
+		return curr_handler->process_exit_event(event);
+	case PERF_RECORD_LOST:
+		return curr_handler->process_lost_event(event);
+	case PERF_RECORD_READ:
+		return curr_handler->process_read_event(event);
+	case PERF_RECORD_THROTTLE:
+		return curr_handler->process_throttle_event(event);
+	case PERF_RECORD_UNTHROTTLE:
+		return curr_handler->process_unthrottle_event(event);
+	default:
+		curr_handler->total_unknown++;
+		return -1;
+	}
+}
+
+int perf_header__read_build_ids(int input, off_t offset, off_t size)
+{
+	struct build_id_event bev;
+	char filename[PATH_MAX];
+	off_t limit = offset + size;
+	int err = -1;
+
+	while (offset < limit) {
+		struct dso *dso;
+		ssize_t len;
+
+		if (read(input, &bev, sizeof(bev)) != sizeof(bev))
+			goto out;
+
+		len = bev.header.size - sizeof(bev);
+		if (read(input, filename, len) != len)
+			goto out;
+
+		dso = dsos__findnew(filename);
+		if (dso != NULL)
+			dso__set_build_id(dso, &bev.build_id);
+
+		offset += bev.header.size;
+	}
+	err = 0;
+out:
+	return err;
+}
+
+int mmap_dispatch_perf_file(struct perf_header **pheader,
+			    const char *input_name,
+			    int force,
+			    int full_paths,
+			    int *cwdlen,
+			    char **cwd)
+{
+	int err;
+	struct perf_header *header;
+	unsigned long head, shift;
+	unsigned long offset = 0;
+	struct stat input_stat;
+	size_t	page_size;
+	u64 sample_type;
+	event_t *event;
+	uint32_t size;
+	int input;
+	char *buf;
+
+	if (curr_handler == NULL) {
+		pr_debug("Forgot to register perf file handler\n");
+		return -EINVAL;
+	}
+
+	page_size = getpagesize();
+
+	input = open(input_name, O_RDONLY);
+	if (input < 0) {
+		pr_err("Failed to open file: %s", input_name);
+		if (!strcmp(input_name, "perf.data"))
+			pr_err("  (try 'perf record' first)");
+		pr_err("\n");
+		return -errno;
+	}
+
+	if (fstat(input, &input_stat) < 0) {
+		pr_err("failed to stat file");
+		err = -errno;
+		goto out_close;
+	}
+
+	err = -EACCES;
+	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
+		pr_err("file: %s not owned by current user or root\n",
+			input_name);
+		goto out_close;
+	}
+
+	if (input_stat.st_size == 0) {
+		pr_info("zero-sized file, nothing to do!\n");
+		goto done;
+	}
+
+	err = -ENOMEM;
+	header = perf_header__new();
+	if (header == NULL)
+		goto out_close;
+
+	err = perf_header__read(header, input);
+	if (err < 0)
+		goto out_delete;
+	*pheader = header;
+	head = header->data_offset;
+
+	sample_type = perf_header__sample_type(header);
+
+	err = -EINVAL;
+	if (curr_handler->sample_type_check &&
+	    curr_handler->sample_type_check(sample_type) < 0)
+		goto out_delete;
+
+	if (!full_paths) {
+		if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
+			pr_err("failed to get the current directory\n");
+			err = -errno;
+			goto out_delete;
+		}
+		*cwd = __cwd;
+		*cwdlen = strlen(*cwd);
+	} else {
+		*cwd = NULL;
+		*cwdlen = 0;
+	}
+
+	shift = page_size * (head / page_size);
+	offset += shift;
+	head -= shift;
+
+remap:
+	buf = mmap(NULL, page_size * mmap_window, PROT_READ,
+		   MAP_SHARED, input, offset);
+	if (buf == MAP_FAILED) {
+		pr_err("failed to mmap file\n");
+		err = -errno;
+		goto out_delete;
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	size = event->header.size;
+	if (!size)
+		size = 8;
+
+	if (head + event->header.size >= page_size * mmap_window) {
+		int munmap_ret;
+
+		shift = page_size * (head / page_size);
+
+		munmap_ret = munmap(buf, page_size * mmap_window);
+		assert(munmap_ret == 0);
+
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+
+	size = event->header.size;
+
+	dump_printf("\n%p [%p]: event: %d\n",
+			(void *)(offset + head),
+			(void *)(long)event->header.size,
+			event->header.type);
+
+	if (!size || process_event(event, offset, head) < 0) {
+
+		dump_printf("%p [%p]: skipping unknown header type: %d\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->header.type);
+
+		/*
+		 * assume we lost track of the stream, check alignment, and
+		 * increment a single u64 in the hope to catch on again 'soon'.
+		 */
+
+		if (unlikely(head & 7))
+			head &= ~7ULL;
+
+		size = 8;
+	}
+
+	head += size;
+
+	if (offset + head >= header->data_offset + header->data_size)
+		goto done;
+
+	if (offset + head < (unsigned long)input_stat.st_size)
+		goto more;
+
+done:
+	err = 0;
+out_close:
+	close(input);
+
+	return err;
+out_delete:
+	perf_header__delete(header);
+	goto out_close;
+}
diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h
new file mode 100644
index 0000000..3180ff7
--- /dev/null
+++ b/tools/perf/util/data_map.h
@@ -0,0 +1,32 @@
+#ifndef __PERF_DATAMAP_H
+#define __PERF_DATAMAP_H
+
+#include "event.h"
+#include "header.h"
+
+typedef int (*event_type_handler_t)(event_t *);
+
+struct perf_file_handler {
+	event_type_handler_t	process_sample_event;
+	event_type_handler_t	process_mmap_event;
+	event_type_handler_t	process_comm_event;
+	event_type_handler_t	process_fork_event;
+	event_type_handler_t	process_exit_event;
+	event_type_handler_t	process_lost_event;
+	event_type_handler_t	process_read_event;
+	event_type_handler_t	process_throttle_event;
+	event_type_handler_t	process_unthrottle_event;
+	int			(*sample_type_check)(u64 sample_type);
+	unsigned long		total_unknown;
+};
+
+void register_perf_file_handler(struct perf_file_handler *handler);
+int mmap_dispatch_perf_file(struct perf_header **pheader,
+			    const char *input_name,
+			    int force,
+			    int full_paths,
+			    int *cwdlen,
+			    char **cwd);
+int perf_header__read_build_ids(int input, off_t offset, off_t file_size);
+
+#endif
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index e8ca98f..28d520d 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -13,12 +13,12 @@
 int verbose = 0;
 int dump_trace = 0;
 
-int eprintf(const char *fmt, ...)
+int eprintf(int level, const char *fmt, ...)
 {
 	va_list args;
 	int ret = 0;
 
-	if (verbose) {
+	if (verbose >= level) {
 		va_start(args, fmt);
 		ret = vfprintf(stderr, fmt, args);
 		va_end(args);
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 437eea5..c6c24c5 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -1,8 +1,15 @@
 /* For debugging general purposes */
+#ifndef __PERF_DEBUG_H
+#define __PERF_DEBUG_H
+
+#include "event.h"
 
 extern int verbose;
 extern int dump_trace;
 
-int eprintf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+int eprintf(int level,
+	    const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
 void trace_event(event_t *event);
+
+#endif	/* __PERF_DEBUG_H */
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
new file mode 100644
index 0000000..06b73ee
--- /dev/null
+++ b/tools/perf/util/debugfs.c
@@ -0,0 +1,241 @@
+#include "util.h"
+#include "debugfs.h"
+#include "cache.h"
+
+static int debugfs_premounted;
+static char debugfs_mountpoint[MAX_PATH+1];
+
+static const char *debugfs_known_mountpoints[] = {
+	"/sys/kernel/debug/",
+	"/debug/",
+	0,
+};
+
+/* use this to force a umount */
+void debugfs_force_cleanup(void)
+{
+	debugfs_find_mountpoint();
+	debugfs_premounted = 0;
+	debugfs_umount();
+}
+
+/* construct a full path to a debugfs element */
+int debugfs_make_path(const char *element, char *buffer, int size)
+{
+	int len;
+
+	if (strlen(debugfs_mountpoint) == 0) {
+		buffer[0] = '\0';
+		return -1;
+	}
+
+	len = strlen(debugfs_mountpoint) + strlen(element) + 1;
+	if (len >= size)
+		return len+1;
+
+	snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element);
+	return 0;
+}
+
+static int debugfs_found;
+
+/* find the path to the mounted debugfs */
+const char *debugfs_find_mountpoint(void)
+{
+	const char **ptr;
+	char type[100];
+	FILE *fp;
+
+	if (debugfs_found)
+		return (const char *) debugfs_mountpoint;
+
+	ptr = debugfs_known_mountpoints;
+	while (*ptr) {
+		if (debugfs_valid_mountpoint(*ptr) == 0) {
+			debugfs_found = 1;
+			strcpy(debugfs_mountpoint, *ptr);
+			return debugfs_mountpoint;
+		}
+		ptr++;
+	}
+
+	/* give up and parse /proc/mounts */
+	fp = fopen("/proc/mounts", "r");
+	if (fp == NULL)
+		die("Can't open /proc/mounts for read");
+
+	while (fscanf(fp, "%*s %"
+		      STR(MAX_PATH)
+		      "s %99s %*s %*d %*d\n",
+		      debugfs_mountpoint, type) == 2) {
+		if (strcmp(type, "debugfs") == 0)
+			break;
+	}
+	fclose(fp);
+
+	if (strcmp(type, "debugfs") != 0)
+		return NULL;
+
+	debugfs_found = 1;
+
+	return debugfs_mountpoint;
+}
+
+/* verify that a mountpoint is actually a debugfs instance */
+
+int debugfs_valid_mountpoint(const char *debugfs)
+{
+	struct statfs st_fs;
+
+	if (statfs(debugfs, &st_fs) < 0)
+		return -ENOENT;
+	else if (st_fs.f_type != (long) DEBUGFS_MAGIC)
+		return -ENOENT;
+
+	return 0;
+}
+
+
+int debugfs_valid_entry(const char *path)
+{
+	struct stat st;
+
+	if (stat(path, &st))
+		return -errno;
+
+	return 0;
+}
+
+/* mount the debugfs somewhere */
+
+int debugfs_mount(const char *mountpoint)
+{
+	char mountcmd[128];
+
+	/* see if it's already mounted */
+	if (debugfs_find_mountpoint()) {
+		debugfs_premounted = 1;
+		return 0;
+	}
+
+	/* if not mounted and no argument */
+	if (mountpoint == NULL) {
+		/* see if environment variable set */
+		mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT);
+		/* if no environment variable, use default */
+		if (mountpoint == NULL)
+			mountpoint = "/sys/kernel/debug";
+	}
+
+	/* save the mountpoint */
+	strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
+
+	/* mount it */
+	snprintf(mountcmd, sizeof(mountcmd),
+		 "/bin/mount -t debugfs debugfs %s", mountpoint);
+	return system(mountcmd);
+}
+
+/* umount the debugfs */
+
+int debugfs_umount(void)
+{
+	char umountcmd[128];
+	int ret;
+
+	/* if it was already mounted, leave it */
+	if (debugfs_premounted)
+		return 0;
+
+	/* make sure it's a valid mount point */
+	ret = debugfs_valid_mountpoint(debugfs_mountpoint);
+	if (ret)
+		return ret;
+
+	snprintf(umountcmd, sizeof(umountcmd),
+		 "/bin/umount %s", debugfs_mountpoint);
+	return system(umountcmd);
+}
+
+int debugfs_write(const char *entry, const char *value)
+{
+	char path[MAX_PATH+1];
+	int ret, count;
+	int fd;
+
+	/* construct the path */
+	snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
+
+	/* verify that it exists */
+	ret = debugfs_valid_entry(path);
+	if (ret)
+		return ret;
+
+	/* get how many chars we're going to write */
+	count = strlen(value);
+
+	/* open the debugfs entry */
+	fd = open(path, O_RDWR);
+	if (fd < 0)
+		return -errno;
+
+	while (count > 0) {
+		/* write it */
+		ret = write(fd, value, count);
+		if (ret <= 0) {
+			if (ret == EAGAIN)
+				continue;
+			close(fd);
+			return -errno;
+		}
+		count -= ret;
+	}
+
+	/* close it */
+	close(fd);
+
+	/* return success */
+	return 0;
+}
+
+/*
+ * read a debugfs entry
+ * returns the number of chars read or a negative errno
+ */
+int debugfs_read(const char *entry, char *buffer, size_t size)
+{
+	char path[MAX_PATH+1];
+	int ret;
+	int fd;
+
+	/* construct the path */
+	snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
+
+	/* verify that it exists */
+	ret = debugfs_valid_entry(path);
+	if (ret)
+		return ret;
+
+	/* open the debugfs entry */
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return -errno;
+
+	do {
+		/* read it */
+		ret = read(fd, buffer, size);
+		if (ret == 0) {
+			close(fd);
+			return EOF;
+		}
+	} while (ret < 0 && errno == EAGAIN);
+
+	/* close it */
+	close(fd);
+
+	/* make *sure* there's a null character at the end */
+	buffer[ret] = '\0';
+
+	/* return the number of chars read */
+	return ret;
+}
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
new file mode 100644
index 0000000..3cd14f9
--- /dev/null
+++ b/tools/perf/util/debugfs.h
@@ -0,0 +1,25 @@
+#ifndef __DEBUGFS_H__
+#define __DEBUGFS_H__
+
+#include <sys/mount.h>
+
+#ifndef MAX_PATH
+# define MAX_PATH 256
+#endif
+
+#ifndef STR
+# define _STR(x) #x
+# define STR(x) _STR(x)
+#endif
+
+extern const char *debugfs_find_mountpoint(void);
+extern int debugfs_valid_mountpoint(const char *debugfs);
+extern int debugfs_valid_entry(const char *path);
+extern int debugfs_mount(const char *mountpoint);
+extern int debugfs_umount(void);
+extern int debugfs_write(const char *entry, const char *value);
+extern int debugfs_read(const char *entry, char *buffer, size_t size);
+extern void debugfs_force_cleanup(void);
+extern int debugfs_make_path(const char *element, char *buffer, int size);
+
+#endif /* __DEBUGFS_H__ */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
new file mode 100644
index 0000000..414b89d
--- /dev/null
+++ b/tools/perf/util/event.c
@@ -0,0 +1,312 @@
+#include <linux/types.h>
+#include "event.h"
+#include "debug.h"
+#include "string.h"
+#include "thread.h"
+
+static pid_t event__synthesize_comm(pid_t pid, int full,
+				    int (*process)(event_t *event))
+{
+	event_t ev;
+	char filename[PATH_MAX];
+	char bf[BUFSIZ];
+	FILE *fp;
+	size_t size = 0;
+	DIR *tasks;
+	struct dirent dirent, *next;
+	pid_t tgid = 0;
+
+	snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
+
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+out_race:
+		/*
+		 * We raced with a task exiting - just return:
+		 */
+		pr_debug("couldn't open %s\n", filename);
+		return 0;
+	}
+
+	memset(&ev.comm, 0, sizeof(ev.comm));
+	while (!ev.comm.comm[0] || !ev.comm.pid) {
+		if (fgets(bf, sizeof(bf), fp) == NULL)
+			goto out_failure;
+
+		if (memcmp(bf, "Name:", 5) == 0) {
+			char *name = bf + 5;
+			while (*name && isspace(*name))
+				++name;
+			size = strlen(name) - 1;
+			memcpy(ev.comm.comm, name, size++);
+		} else if (memcmp(bf, "Tgid:", 5) == 0) {
+			char *tgids = bf + 5;
+			while (*tgids && isspace(*tgids))
+				++tgids;
+			tgid = ev.comm.pid = atoi(tgids);
+		}
+	}
+
+	ev.comm.header.type = PERF_RECORD_COMM;
+	size = ALIGN(size, sizeof(u64));
+	ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size);
+
+	if (!full) {
+		ev.comm.tid = pid;
+
+		process(&ev);
+		goto out_fclose;
+	}
+
+	snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
+
+	tasks = opendir(filename);
+	if (tasks == NULL)
+		goto out_race;
+
+	while (!readdir_r(tasks, &dirent, &next) && next) {
+		char *end;
+		pid = strtol(dirent.d_name, &end, 10);
+		if (*end)
+			continue;
+
+		ev.comm.tid = pid;
+
+		process(&ev);
+	}
+	closedir(tasks);
+
+out_fclose:
+	fclose(fp);
+	return tgid;
+
+out_failure:
+	pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
+	return -1;
+}
+
+static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
+					 int (*process)(event_t *event))
+{
+	char filename[PATH_MAX];
+	FILE *fp;
+
+	snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
+
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+		/*
+		 * We raced with a task exiting - just return:
+		 */
+		pr_debug("couldn't open %s\n", filename);
+		return -1;
+	}
+
+	while (1) {
+		char bf[BUFSIZ], *pbf = bf;
+		event_t ev = {
+			.header = { .type = PERF_RECORD_MMAP },
+		};
+		int n;
+		size_t size;
+		if (fgets(bf, sizeof(bf), fp) == NULL)
+			break;
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		n = hex2u64(pbf, &ev.mmap.start);
+		if (n < 0)
+			continue;
+		pbf += n + 1;
+		n = hex2u64(pbf, &ev.mmap.len);
+		if (n < 0)
+			continue;
+		pbf += n + 3;
+		if (*pbf == 'x') { /* vm_exec */
+			char *execname = strchr(bf, '/');
+
+			/* Catch VDSO */
+			if (execname == NULL)
+				execname = strstr(bf, "[vdso]");
+
+			if (execname == NULL)
+				continue;
+
+			size = strlen(execname);
+			execname[size - 1] = '\0'; /* Remove \n */
+			memcpy(ev.mmap.filename, execname, size);
+			size = ALIGN(size, sizeof(u64));
+			ev.mmap.len -= ev.mmap.start;
+			ev.mmap.header.size = (sizeof(ev.mmap) -
+					       (sizeof(ev.mmap.filename) - size));
+			ev.mmap.pid = tgid;
+			ev.mmap.tid = pid;
+
+			process(&ev);
+		}
+	}
+
+	fclose(fp);
+	return 0;
+}
+
+int event__synthesize_thread(pid_t pid, int (*process)(event_t *event))
+{
+	pid_t tgid = event__synthesize_comm(pid, 1, process);
+	if (tgid == -1)
+		return -1;
+	return event__synthesize_mmap_events(pid, tgid, process);
+}
+
+void event__synthesize_threads(int (*process)(event_t *event))
+{
+	DIR *proc;
+	struct dirent dirent, *next;
+
+	proc = opendir("/proc");
+
+	while (!readdir_r(proc, &dirent, &next) && next) {
+		char *end;
+		pid_t pid = strtol(dirent.d_name, &end, 10);
+
+		if (*end) /* only interested in proper numerical dirents */
+			continue;
+
+		event__synthesize_thread(pid, process);
+	}
+
+	closedir(proc);
+}
+
+char *event__cwd;
+int  event__cwdlen;
+
+struct events_stats event__stats;
+
+int event__process_comm(event_t *self)
+{
+	struct thread *thread = threads__findnew(self->comm.pid);
+
+	dump_printf(": %s:%d\n", self->comm.comm, self->comm.pid);
+
+	if (thread == NULL || thread__set_comm(thread, self->comm.comm)) {
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int event__process_lost(event_t *self)
+{
+	dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
+	event__stats.lost += self->lost.lost;
+	return 0;
+}
+
+int event__process_mmap(event_t *self)
+{
+	struct thread *thread = threads__findnew(self->mmap.pid);
+	struct map *map = map__new(&self->mmap, MAP__FUNCTION,
+				   event__cwd, event__cwdlen);
+
+	dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n",
+		    self->mmap.pid, self->mmap.tid,
+		    (void *)(long)self->mmap.start,
+		    (void *)(long)self->mmap.len,
+		    (void *)(long)self->mmap.pgoff,
+		    self->mmap.filename);
+
+	if (thread == NULL || map == NULL)
+		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
+	else
+		thread__insert_map(thread, map);
+
+	return 0;
+}
+
+int event__process_task(event_t *self)
+{
+	struct thread *thread = threads__findnew(self->fork.pid);
+	struct thread *parent = threads__findnew(self->fork.ppid);
+
+	dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid,
+		    self->fork.ppid, self->fork.ptid);
+	/*
+	 * A thread clone will have the same PID for both parent and child.
+	 */
+	if (thread == parent)
+		return 0;
+
+	if (self->header.type == PERF_RECORD_EXIT)
+		return 0;
+
+	if (thread == NULL || parent == NULL ||
+	    thread__fork(thread, parent) < 0) {
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void thread__find_addr_location(struct thread *self, u8 cpumode,
+				enum map_type type, u64 addr,
+				struct addr_location *al,
+				symbol_filter_t filter)
+{
+	struct thread *thread = al->thread = self;
+
+	al->addr = addr;
+
+	if (cpumode & PERF_RECORD_MISC_KERNEL) {
+		al->level = 'k';
+		thread = kthread;
+	} else if (cpumode & PERF_RECORD_MISC_USER)
+		al->level = '.';
+	else {
+		al->level = 'H';
+		al->map = NULL;
+		al->sym = NULL;
+		return;
+	}
+try_again:
+	al->map = thread__find_map(thread, type, al->addr);
+	if (al->map == NULL) {
+		/*
+		 * If this is outside of all known maps, and is a negative
+		 * address, try to look it up in the kernel dso, as it might be
+		 * a vsyscall or vdso (which executes in user-mode).
+		 *
+		 * XXX This is nasty, we should have a symbol list in the
+		 * "[vdso]" dso, but for now lets use the old trick of looking
+		 * in the whole kernel symbol list.
+		 */
+		if ((long long)al->addr < 0 && thread != kthread) {
+			thread = kthread;
+			goto try_again;
+		}
+		al->sym = NULL;
+	} else {
+		al->addr = al->map->map_ip(al->map, al->addr);
+		al->sym = map__find_symbol(al->map, al->addr, filter);
+	}
+}
+
+int event__preprocess_sample(const event_t *self, struct addr_location *al,
+			     symbol_filter_t filter)
+{
+	u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	struct thread *thread = threads__findnew(self->ip.pid);
+
+	if (thread == NULL)
+		return -1;
+
+	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
+				   self->ip.ip, al, filter);
+	dump_printf(" ...... dso: %s\n",
+		    al->map ? al->map->dso->long_name :
+			al->level == 'H' ? "[hypervisor]" : "<not found>");
+	return 0;
+}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 2c9c26d6..a4cc810 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -1,14 +1,10 @@
 #ifndef __PERF_RECORD_H
 #define __PERF_RECORD_H
+
 #include "../perf.h"
 #include "util.h"
 #include <linux/list.h>
-
-enum {
-	SHOW_KERNEL	= 1,
-	SHOW_USER	= 2,
-	SHOW_HV		= 4,
-};
+#include <linux/rbtree.h>
 
 /*
  * PERF_SAMPLE_IP | PERF_SAMPLE_TID | *
@@ -65,6 +61,13 @@
 	u64 array[];
 };
 
+#define BUILD_ID_SIZE 20
+
+struct build_id_event {
+	struct perf_event_header header;
+	u8			 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+	char			 filename[];
+};
 
 typedef union event_union {
 	struct perf_event_header	header;
@@ -77,12 +80,30 @@
 	struct sample_event		sample;
 } event_t;
 
+struct events_stats {
+	unsigned long total;
+	unsigned long lost;
+};
+
+void event__print_totals(void);
+
+enum map_type {
+	MAP__FUNCTION = 0,
+
+	MAP__NR_TYPES,
+};
+
 struct map {
-	struct list_head	node;
+	union {
+		struct rb_node	rb_node;
+		struct list_head node;
+	};
 	u64			start;
 	u64			end;
+	enum map_type		type;
 	u64			pgoff;
 	u64			(*map_ip)(struct map *, u64);
+	u64			(*unmap_ip)(struct map *, u64);
 	struct dso		*dso;
 };
 
@@ -91,14 +112,48 @@
 	return ip - map->start + map->pgoff;
 }
 
-static inline u64 vdso__map_ip(struct map *map __used, u64 ip)
+static inline u64 map__unmap_ip(struct map *map, u64 ip)
+{
+	return ip + map->start - map->pgoff;
+}
+
+static inline u64 identity__map_ip(struct map *map __used, u64 ip)
 {
 	return ip;
 }
 
-struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen);
+struct symbol;
+
+typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
+
+void map__init(struct map *self, enum map_type type,
+	       u64 start, u64 end, u64 pgoff, struct dso *dso);
+struct map *map__new(struct mmap_event *event, enum map_type,
+		     char *cwd, int cwdlen);
+void map__delete(struct map *self);
 struct map *map__clone(struct map *self);
 int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *self, FILE *fp);
+struct symbol *map__find_symbol(struct map *self, u64 addr,
+				symbol_filter_t filter);
+void map__fixup_start(struct map *self);
+void map__fixup_end(struct map *self);
 
-#endif
+int event__synthesize_thread(pid_t pid, int (*process)(event_t *event));
+void event__synthesize_threads(int (*process)(event_t *event));
+
+extern char *event__cwd;
+extern int  event__cwdlen;
+extern struct events_stats event__stats;
+extern unsigned long event__total[PERF_RECORD_MAX];
+
+int event__process_comm(event_t *self);
+int event__process_lost(event_t *self);
+int event__process_mmap(event_t *self);
+int event__process_task(event_t *self);
+
+struct addr_location;
+int event__preprocess_sample(const event_t *self, struct addr_location *al,
+			     symbol_filter_t filter);
+
+#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h
index effe25e..31647ac 100644
--- a/tools/perf/util/exec_cmd.h
+++ b/tools/perf/util/exec_cmd.h
@@ -1,5 +1,5 @@
-#ifndef PERF_EXEC_CMD_H
-#define PERF_EXEC_CMD_H
+#ifndef __PERF_EXEC_CMD_H
+#define __PERF_EXEC_CMD_H
 
 extern void perf_set_argv_exec_path(const char *exec_path);
 extern const char *perf_extract_argv0_path(const char *path);
@@ -10,4 +10,4 @@
 extern int execl_perf_cmd(const char *cmd, ...);
 extern const char *system_path(const char *path);
 
-#endif /* PERF_EXEC_CMD_H */
+#endif /* __PERF_EXEC_CMD_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e306857..4805e6d 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2,9 +2,15 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <linux/list.h>
 
 #include "util.h"
 #include "header.h"
+#include "../perf.h"
+#include "trace-event.h"
+#include "symbol.h"
+#include "data_map.h"
+#include "debug.h"
 
 /*
  * Create new perf.data header attribute:
@@ -13,32 +19,43 @@
 {
 	struct perf_header_attr *self = malloc(sizeof(*self));
 
-	if (!self)
-		die("nomem");
-
-	self->attr = *attr;
-	self->ids = 0;
-	self->size = 1;
-	self->id = malloc(sizeof(u64));
-
-	if (!self->id)
-		die("nomem");
+	if (self != NULL) {
+		self->attr = *attr;
+		self->ids  = 0;
+		self->size = 1;
+		self->id   = malloc(sizeof(u64));
+		if (self->id == NULL) {
+			free(self);
+			self = NULL;
+		}
+	}
 
 	return self;
 }
 
-void perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
+void perf_header_attr__delete(struct perf_header_attr *self)
+{
+	free(self->id);
+	free(self);
+}
+
+int perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
 {
 	int pos = self->ids;
 
 	self->ids++;
 	if (self->ids > self->size) {
-		self->size *= 2;
-		self->id = realloc(self->id, self->size * sizeof(u64));
-		if (!self->id)
-			die("nomem");
+		int nsize = self->size * 2;
+		u64 *nid = realloc(self->id, nsize * sizeof(u64));
+
+		if (nid == NULL)
+			return -1;
+
+		self->size = nsize;
+		self->id = nid;
 	}
 	self->id[pos] = id;
+	return 0;
 }
 
 /*
@@ -46,42 +63,52 @@
  */
 struct perf_header *perf_header__new(void)
 {
-	struct perf_header *self = malloc(sizeof(*self));
+	struct perf_header *self = zalloc(sizeof(*self));
 
-	if (!self)
-		die("nomem");
+	if (self != NULL) {
+		self->size = 1;
+		self->attr = malloc(sizeof(void *));
 
-	self->frozen = 0;
-
-	self->attrs = 0;
-	self->size = 1;
-	self->attr = malloc(sizeof(void *));
-
-	if (!self->attr)
-		die("nomem");
-
-	self->data_offset = 0;
-	self->data_size = 0;
+		if (self->attr == NULL) {
+			free(self);
+			self = NULL;
+		}
+	}
 
 	return self;
 }
 
-void perf_header__add_attr(struct perf_header *self,
-			   struct perf_header_attr *attr)
+void perf_header__delete(struct perf_header *self)
 {
-	int pos = self->attrs;
+	int i;
 
+	for (i = 0; i < self->attrs; ++i)
+		perf_header_attr__delete(self->attr[i]);
+
+	free(self->attr);
+	free(self);
+}
+
+int perf_header__add_attr(struct perf_header *self,
+			  struct perf_header_attr *attr)
+{
 	if (self->frozen)
-		die("frozen");
+		return -1;
 
-	self->attrs++;
-	if (self->attrs > self->size) {
-		self->size *= 2;
-		self->attr = realloc(self->attr, self->size * sizeof(void *));
-		if (!self->attr)
-			die("nomem");
+	if (self->attrs == self->size) {
+		int nsize = self->size * 2;
+		struct perf_header_attr **nattr;
+
+		nattr = realloc(self->attr, nsize * sizeof(void *));
+		if (nattr == NULL)
+			return -1;
+
+		self->size = nsize;
+		self->attr = nattr;
 	}
-	self->attr[pos] = attr;
+
+	self->attr[self->attrs++] = attr;
+	return 0;
 }
 
 #define MAX_EVENT_NAME 64
@@ -97,7 +124,7 @@
 void perf_header__push_event(u64 id, const char *name)
 {
 	if (strlen(name) > MAX_EVENT_NAME)
-		printf("Event %s will be truncated\n", name);
+		pr_warning("Event %s will be truncated\n", name);
 
 	if (!events) {
 		events = malloc(sizeof(struct perf_trace_event_type));
@@ -128,44 +155,137 @@
 
 #define PERF_MAGIC	(*(u64 *)__perf_magic)
 
-struct perf_file_section {
-	u64 offset;
-	u64 size;
-};
-
 struct perf_file_attr {
 	struct perf_event_attr	attr;
 	struct perf_file_section	ids;
 };
 
-struct perf_file_header {
-	u64				magic;
-	u64				size;
-	u64				attr_size;
-	struct perf_file_section	attrs;
-	struct perf_file_section	data;
-	struct perf_file_section	event_types;
-};
+void perf_header__set_feat(struct perf_header *self, int feat)
+{
+	set_bit(feat, self->adds_features);
+}
 
-static void do_write(int fd, void *buf, size_t size)
+bool perf_header__has_feat(const struct perf_header *self, int feat)
+{
+	return test_bit(feat, self->adds_features);
+}
+
+static int do_write(int fd, const void *buf, size_t size)
 {
 	while (size) {
 		int ret = write(fd, buf, size);
 
 		if (ret < 0)
-			die("failed to write");
+			return -errno;
 
 		size -= ret;
 		buf += ret;
 	}
+
+	return 0;
 }
 
-void perf_header__write(struct perf_header *self, int fd)
+static int __dsos__write_buildid_table(struct list_head *head, int fd)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, head, node) {
+		int err;
+		struct build_id_event b;
+		size_t len;
+
+		if (!pos->has_build_id)
+			continue;
+		len = pos->long_name_len + 1;
+		len = ALIGN(len, 64);
+		memset(&b, 0, sizeof(b));
+		memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
+		b.header.size = sizeof(b) + len;
+		err = do_write(fd, &b, sizeof(b));
+		if (err < 0)
+			return err;
+		err = do_write(fd, pos->long_name, len);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int dsos__write_buildid_table(int fd)
+{
+	int err = __dsos__write_buildid_table(&dsos__kernel, fd);
+	if (err == 0)
+		err = __dsos__write_buildid_table(&dsos__user, fd);
+	return err;
+}
+
+static int perf_header__adds_write(struct perf_header *self, int fd)
+{
+	int nr_sections;
+	struct perf_file_section *feat_sec;
+	int sec_size;
+	u64 sec_start;
+	int idx = 0, err;
+
+	if (dsos__read_build_ids())
+		perf_header__set_feat(self, HEADER_BUILD_ID);
+
+	nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
+	if (!nr_sections)
+		return 0;
+
+	feat_sec = calloc(sizeof(*feat_sec), nr_sections);
+	if (feat_sec == NULL)
+		return -ENOMEM;
+
+	sec_size = sizeof(*feat_sec) * nr_sections;
+
+	sec_start = self->data_offset + self->data_size;
+	lseek(fd, sec_start + sec_size, SEEK_SET);
+
+	if (perf_header__has_feat(self, HEADER_TRACE_INFO)) {
+		struct perf_file_section *trace_sec;
+
+		trace_sec = &feat_sec[idx++];
+
+		/* Write trace info */
+		trace_sec->offset = lseek(fd, 0, SEEK_CUR);
+		read_tracing_data(fd, attrs, nr_counters);
+		trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
+	}
+
+
+	if (perf_header__has_feat(self, HEADER_BUILD_ID)) {
+		struct perf_file_section *buildid_sec;
+
+		buildid_sec = &feat_sec[idx++];
+
+		/* Write build-ids */
+		buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
+		err = dsos__write_buildid_table(fd);
+		if (err < 0) {
+			pr_debug("failed to write buildid table\n");
+			goto out_free;
+		}
+		buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset;
+	}
+
+	lseek(fd, sec_start, SEEK_SET);
+	err = do_write(fd, feat_sec, sec_size);
+	if (err < 0)
+		pr_debug("failed to write feature section\n");
+out_free:
+	free(feat_sec);
+	return err;
+}
+
+int perf_header__write(struct perf_header *self, int fd, bool at_exit)
 {
 	struct perf_file_header f_header;
 	struct perf_file_attr   f_attr;
 	struct perf_header_attr	*attr;
-	int i;
+	int i, err;
 
 	lseek(fd, sizeof(f_header), SEEK_SET);
 
@@ -174,7 +294,11 @@
 		attr = self->attr[i];
 
 		attr->id_offset = lseek(fd, 0, SEEK_CUR);
-		do_write(fd, attr->id, attr->ids * sizeof(u64));
+		err = do_write(fd, attr->id, attr->ids * sizeof(u64));
+		if (err < 0) {
+			pr_debug("failed to write perf header\n");
+			return err;
+		}
 	}
 
 
@@ -190,17 +314,31 @@
 				.size   = attr->ids * sizeof(u64),
 			}
 		};
-		do_write(fd, &f_attr, sizeof(f_attr));
+		err = do_write(fd, &f_attr, sizeof(f_attr));
+		if (err < 0) {
+			pr_debug("failed to write perf header attribute\n");
+			return err;
+		}
 	}
 
 	self->event_offset = lseek(fd, 0, SEEK_CUR);
 	self->event_size = event_count * sizeof(struct perf_trace_event_type);
-	if (events)
-		do_write(fd, events, self->event_size);
-
+	if (events) {
+		err = do_write(fd, events, self->event_size);
+		if (err < 0) {
+			pr_debug("failed to write perf header events\n");
+			return err;
+		}
+	}
 
 	self->data_offset = lseek(fd, 0, SEEK_CUR);
 
+	if (at_exit) {
+		err = perf_header__adds_write(self, fd);
+		if (err < 0)
+			return err;
+	}
+
 	f_header = (struct perf_file_header){
 		.magic	   = PERF_MAGIC,
 		.size	   = sizeof(f_header),
@@ -219,11 +357,18 @@
 		},
 	};
 
+	memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features));
+
 	lseek(fd, 0, SEEK_SET);
-	do_write(fd, &f_header, sizeof(f_header));
+	err = do_write(fd, &f_header, sizeof(f_header));
+	if (err < 0) {
+		pr_debug("failed to write perf header\n");
+		return err;
+	}
 	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
 
 	self->frozen = 1;
+	return 0;
 }
 
 static void do_read(int fd, void *buf, size_t size)
@@ -241,22 +386,109 @@
 	}
 }
 
-struct perf_header *perf_header__read(int fd)
+int perf_header__process_sections(struct perf_header *self, int fd,
+				  int (*process)(struct perf_file_section *self,
+						 int feat, int fd))
 {
-	struct perf_header	*self = perf_header__new();
+	struct perf_file_section *feat_sec;
+	int nr_sections;
+	int sec_size;
+	int idx = 0;
+	int err = 0, feat = 1;
+
+	nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
+	if (!nr_sections)
+		return 0;
+
+	feat_sec = calloc(sizeof(*feat_sec), nr_sections);
+	if (!feat_sec)
+		return -1;
+
+	sec_size = sizeof(*feat_sec) * nr_sections;
+
+	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+
+	do_read(fd, feat_sec, sec_size);
+
+	while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
+		if (perf_header__has_feat(self, feat)) {
+			struct perf_file_section *sec = &feat_sec[idx++];
+
+			err = process(sec, feat, fd);
+			if (err < 0)
+				break;
+		}
+		++feat;
+	}
+
+	free(feat_sec);
+	return err;
+};
+
+int perf_file_header__read(struct perf_file_header *self,
+			   struct perf_header *ph, int fd)
+{
+	lseek(fd, 0, SEEK_SET);
+	do_read(fd, self, sizeof(*self));
+
+	if (self->magic     != PERF_MAGIC ||
+	    self->attr_size != sizeof(struct perf_file_attr))
+		return -1;
+
+	if (self->size != sizeof(*self)) {
+		/* Support the previous format */
+		if (self->size == offsetof(typeof(*self), adds_features))
+			bitmap_zero(self->adds_features, HEADER_FEAT_BITS);
+		else
+			return -1;
+	}
+
+	memcpy(&ph->adds_features, &self->adds_features,
+	       sizeof(self->adds_features));
+
+	ph->event_offset = self->event_types.offset;
+	ph->event_size	 = self->event_types.size;
+	ph->data_offset	 = self->data.offset;
+	ph->data_size	 = self->data.size;
+	return 0;
+}
+
+static int perf_file_section__process(struct perf_file_section *self,
+				      int feat, int fd)
+{
+	if (lseek(fd, self->offset, SEEK_SET) < 0) {
+		pr_debug("Failed to lseek to %Ld offset for feature %d, "
+			 "continuing...\n", self->offset, feat);
+		return 0;
+	}
+
+	switch (feat) {
+	case HEADER_TRACE_INFO:
+		trace_report(fd);
+		break;
+
+	case HEADER_BUILD_ID:
+		if (perf_header__read_build_ids(fd, self->offset, self->size))
+			pr_debug("Failed to read buildids, continuing...\n");
+		break;
+	default:
+		pr_debug("unknown feature %d, continuing...\n", feat);
+	}
+
+	return 0;
+}
+
+int perf_header__read(struct perf_header *self, int fd)
+{
 	struct perf_file_header f_header;
 	struct perf_file_attr	f_attr;
 	u64			f_id;
-
 	int nr_attrs, nr_ids, i, j;
 
-	lseek(fd, 0, SEEK_SET);
-	do_read(fd, &f_header, sizeof(f_header));
-
-	if (f_header.magic	!= PERF_MAGIC		||
-	    f_header.size	!= sizeof(f_header)	||
-	    f_header.attr_size	!= sizeof(f_attr))
-		die("incompatible file format");
+	if (perf_file_header__read(&f_header, self, fd) < 0) {
+		pr_debug("incompatible file format\n");
+		return -EINVAL;
+	}
 
 	nr_attrs = f_header.attrs.size / sizeof(f_attr);
 	lseek(fd, f_header.attrs.offset, SEEK_SET);
@@ -269,6 +501,8 @@
 		tmp = lseek(fd, 0, SEEK_CUR);
 
 		attr = perf_header_attr__new(&f_attr.attr);
+		if (attr == NULL)
+			 return -ENOMEM;
 
 		nr_ids = f_attr.ids.size / sizeof(u64);
 		lseek(fd, f_attr.ids.offset, SEEK_SET);
@@ -276,31 +510,34 @@
 		for (j = 0; j < nr_ids; j++) {
 			do_read(fd, &f_id, sizeof(f_id));
 
-			perf_header_attr__add_id(attr, f_id);
+			if (perf_header_attr__add_id(attr, f_id) < 0) {
+				perf_header_attr__delete(attr);
+				return -ENOMEM;
+			}
 		}
-		perf_header__add_attr(self, attr);
+		if (perf_header__add_attr(self, attr) < 0) {
+			perf_header_attr__delete(attr);
+			return -ENOMEM;
+		}
+
 		lseek(fd, tmp, SEEK_SET);
 	}
 
 	if (f_header.event_types.size) {
 		lseek(fd, f_header.event_types.offset, SEEK_SET);
 		events = malloc(f_header.event_types.size);
-		if (!events)
-			die("nomem");
+		if (events == NULL)
+			return -ENOMEM;
 		do_read(fd, events, f_header.event_types.size);
 		event_count =  f_header.event_types.size / sizeof(struct perf_trace_event_type);
 	}
-	self->event_offset = f_header.event_types.offset;
-	self->event_size   = f_header.event_types.size;
 
-	self->data_offset = f_header.data.offset;
-	self->data_size   = f_header.data.size;
+	perf_header__process_sections(self, fd, perf_file_section__process);
 
 	lseek(fd, self->data_offset, SEEK_SET);
 
 	self->frozen = 1;
-
-	return self;
+	return 0;
 }
 
 u64 perf_header__sample_type(struct perf_header *header)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index a0761bc..d1dbe2b 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -1,10 +1,13 @@
-#ifndef _PERF_HEADER_H
-#define _PERF_HEADER_H
+#ifndef __PERF_HEADER_H
+#define __PERF_HEADER_H
 
 #include "../../../include/linux/perf_event.h"
 #include <sys/types.h>
+#include <stdbool.h>
 #include "types.h"
 
+#include <linux/bitmap.h>
+
 struct perf_header_attr {
 	struct perf_event_attr attr;
 	int ids, size;
@@ -12,36 +15,71 @@
 	off_t id_offset;
 };
 
-struct perf_header {
-	int frozen;
-	int attrs, size;
-	struct perf_header_attr **attr;
-	s64 attr_offset;
-	u64 data_offset;
-	u64 data_size;
-	u64 event_offset;
-	u64 event_size;
+enum {
+	HEADER_TRACE_INFO = 1,
+	HEADER_BUILD_ID,
+	HEADER_LAST_FEATURE,
 };
 
-struct perf_header *perf_header__read(int fd);
-void perf_header__write(struct perf_header *self, int fd);
+#define HEADER_FEAT_BITS			256
 
-void perf_header__add_attr(struct perf_header *self,
-			   struct perf_header_attr *attr);
+struct perf_file_section {
+	u64 offset;
+	u64 size;
+};
+
+struct perf_file_header {
+	u64				magic;
+	u64				size;
+	u64				attr_size;
+	struct perf_file_section	attrs;
+	struct perf_file_section	data;
+	struct perf_file_section	event_types;
+	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+};
+
+struct perf_header;
+
+int perf_file_header__read(struct perf_file_header *self,
+			   struct perf_header *ph, int fd);
+
+struct perf_header {
+	int			frozen;
+	int			attrs, size;
+	struct perf_header_attr **attr;
+	s64			attr_offset;
+	u64			data_offset;
+	u64			data_size;
+	u64			event_offset;
+	u64			event_size;
+	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+};
+
+struct perf_header *perf_header__new(void);
+void perf_header__delete(struct perf_header *self);
+
+int perf_header__read(struct perf_header *self, int fd);
+int perf_header__write(struct perf_header *self, int fd, bool at_exit);
+
+int perf_header__add_attr(struct perf_header *self,
+			  struct perf_header_attr *attr);
 
 void perf_header__push_event(u64 id, const char *name);
 char *perf_header__find_event(u64 id);
 
+struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr);
+void perf_header_attr__delete(struct perf_header_attr *self);
 
-struct perf_header_attr *
-perf_header_attr__new(struct perf_event_attr *attr);
-void perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
+int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
 
 u64 perf_header__sample_type(struct perf_header *header);
 struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header);
+void perf_header__set_feat(struct perf_header *self, int feat);
+bool perf_header__has_feat(const struct perf_header *self, int feat);
 
+int perf_header__process_sections(struct perf_header *self, int fd,
+				  int (*process)(struct perf_file_section *self,
+						 int feat, int fd));
 
-struct perf_header *perf_header__new(void);
-
-#endif /* _PERF_HEADER_H */
+#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h
index 7128783..7f5c6de 100644
--- a/tools/perf/util/help.h
+++ b/tools/perf/util/help.h
@@ -1,5 +1,5 @@
-#ifndef HELP_H
-#define HELP_H
+#ifndef __PERF_HELP_H
+#define __PERF_HELP_H
 
 struct cmdnames {
 	size_t alloc;
@@ -26,4 +26,4 @@
 void list_commands(const char *title, struct cmdnames *main_cmds,
 		   struct cmdnames *other_cmds);
 
-#endif /* HELP_H */
+#endif /* __PERF_HELP_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
new file mode 100644
index 0000000..0ebf6ee
--- /dev/null
+++ b/tools/perf/util/hist.c
@@ -0,0 +1,202 @@
+#include "hist.h"
+
+struct rb_root hist;
+struct rb_root collapse_hists;
+struct rb_root output_hists;
+int callchain;
+
+struct callchain_param	callchain_param = {
+	.mode	= CHAIN_GRAPH_REL,
+	.min_percent = 0.5
+};
+
+/*
+ * histogram, sorted on item, collects counts
+ */
+
+struct hist_entry *__hist_entry__add(struct addr_location *al,
+				     struct symbol *sym_parent,
+				     u64 count, bool *hit)
+{
+	struct rb_node **p = &hist.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct hist_entry entry = {
+		.thread	= al->thread,
+		.map	= al->map,
+		.sym	= al->sym,
+		.ip	= al->addr,
+		.level	= al->level,
+		.count	= count,
+		.parent = sym_parent,
+	};
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__cmp(&entry, he);
+
+		if (!cmp) {
+			*hit = true;
+			return he;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	he = malloc(sizeof(*he));
+	if (!he)
+		return NULL;
+	*he = entry;
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &hist);
+	*hit = false;
+	return he;
+}
+
+int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		cmp = se->cmp(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int64_t (*f)(struct hist_entry *, struct hist_entry *);
+
+		f = se->collapse ?: se->cmp;
+
+		cmp = f(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+void hist_entry__free(struct hist_entry *he)
+{
+	free(he);
+}
+
+/*
+ * collapse the histogram
+ */
+
+void collapse__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &collapse_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__collapse(iter, he);
+
+		if (!cmp) {
+			iter->count += he->count;
+			hist_entry__free(he);
+			return;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &collapse_hists);
+}
+
+void collapse__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+
+	if (!sort__need_collapse)
+		return;
+
+	next = rb_first(&hist);
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, &hist);
+		collapse__insert_entry(n);
+	}
+}
+
+/*
+ * reverse the map, sort on count.
+ */
+
+void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits)
+{
+	struct rb_node **p = &output_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	if (callchain)
+		callchain_param.sort(&he->sorted_chain, &he->callchain,
+				      min_callchain_hits, &callchain_param);
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (he->count > iter->count)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &output_hists);
+}
+
+void output__resort(u64 total_samples)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+	struct rb_root *tree = &hist;
+	u64 min_callchain_hits;
+
+	min_callchain_hits =
+		total_samples * (callchain_param.min_percent / 100);
+
+	if (sort__need_collapse)
+		tree = &collapse_hists;
+
+	next = rb_first(tree);
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, tree);
+		output__insert_entry(n, min_callchain_hits);
+	}
+}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
new file mode 100644
index 0000000..3020db0
--- /dev/null
+++ b/tools/perf/util/hist.h
@@ -0,0 +1,50 @@
+#ifndef __PERF_HIST_H
+#define __PERF_HIST_H
+#include "../builtin.h"
+
+#include "util.h"
+
+#include "color.h"
+#include <linux/list.h>
+#include "cache.h"
+#include <linux/rbtree.h>
+#include "symbol.h"
+#include "string.h"
+#include "callchain.h"
+#include "strlist.h"
+#include "values.h"
+
+#include "../perf.h"
+#include "debug.h"
+#include "header.h"
+
+#include "parse-options.h"
+#include "parse-events.h"
+
+#include "thread.h"
+#include "sort.h"
+
+extern struct rb_root hist;
+extern struct rb_root collapse_hists;
+extern struct rb_root output_hists;
+extern int callchain;
+extern struct callchain_param callchain_param;
+extern unsigned long total;
+extern unsigned long total_mmap;
+extern unsigned long total_comm;
+extern unsigned long total_fork;
+extern unsigned long total_unknown;
+extern unsigned long total_lost;
+
+struct hist_entry *__hist_entry__add(struct addr_location *al,
+				     struct symbol *parent,
+				     u64 count, bool *hit);
+extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
+extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
+extern void hist_entry__free(struct hist_entry *);
+extern void collapse__insert_entry(struct hist_entry *);
+extern void collapse__resort(void);
+extern void output__insert_entry(struct hist_entry *, u64);
+extern void output__resort(u64);
+
+#endif	/* __PERF_HIST_H */
diff --git a/tools/perf/util/include/asm/asm-offsets.h b/tools/perf/util/include/asm/asm-offsets.h
new file mode 100644
index 0000000..ed53894
--- /dev/null
+++ b/tools/perf/util/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+/* stub */
diff --git a/tools/perf/util/include/asm/bitops.h b/tools/perf/util/include/asm/bitops.h
new file mode 100644
index 0000000..58e9817
--- /dev/null
+++ b/tools/perf/util/include/asm/bitops.h
@@ -0,0 +1,18 @@
+#ifndef _PERF_ASM_BITOPS_H_
+#define _PERF_ASM_BITOPS_H_
+
+#include <sys/types.h>
+#include "../../types.h"
+#include <linux/compiler.h>
+
+/* CHECKME: Not sure both always match */
+#define BITS_PER_LONG	__WORDSIZE
+
+#include "../../../../include/asm-generic/bitops/__fls.h"
+#include "../../../../include/asm-generic/bitops/fls.h"
+#include "../../../../include/asm-generic/bitops/fls64.h"
+#include "../../../../include/asm-generic/bitops/__ffs.h"
+#include "../../../../include/asm-generic/bitops/ffz.h"
+#include "../../../../include/asm-generic/bitops/hweight.h"
+
+#endif
diff --git a/tools/perf/util/include/asm/bug.h b/tools/perf/util/include/asm/bug.h
new file mode 100644
index 0000000..7fcc681
--- /dev/null
+++ b/tools/perf/util/include/asm/bug.h
@@ -0,0 +1,22 @@
+#ifndef _PERF_ASM_GENERIC_BUG_H
+#define _PERF_ASM_GENERIC_BUG_H
+
+#define __WARN_printf(arg...)	do { fprintf(stderr, arg); } while (0)
+
+#define WARN(condition, format...) ({		\
+	int __ret_warn_on = !!(condition);	\
+	if (unlikely(__ret_warn_on))		\
+		__WARN_printf(format);		\
+	unlikely(__ret_warn_on);		\
+})
+
+#define WARN_ONCE(condition, format...)	({	\
+	static int __warned;			\
+	int __ret_warn_once = !!(condition);	\
+						\
+	if (unlikely(__ret_warn_once))		\
+		if (WARN(!__warned, format)) 	\
+			__warned = 1;		\
+	unlikely(__ret_warn_once);		\
+})
+#endif
diff --git a/tools/perf/util/include/asm/byteorder.h b/tools/perf/util/include/asm/byteorder.h
new file mode 100644
index 0000000..b722abe
--- /dev/null
+++ b/tools/perf/util/include/asm/byteorder.h
@@ -0,0 +1,2 @@
+#include <asm/types.h>
+#include "../../../../include/linux/swab.h"
diff --git a/tools/perf/util/include/asm/swab.h b/tools/perf/util/include/asm/swab.h
new file mode 100644
index 0000000..ed53894
--- /dev/null
+++ b/tools/perf/util/include/asm/swab.h
@@ -0,0 +1 @@
+/* stub */
diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h
new file mode 100644
index 0000000..d0f72b8
--- /dev/null
+++ b/tools/perf/util/include/asm/uaccess.h
@@ -0,0 +1,14 @@
+#ifndef _PERF_ASM_UACCESS_H_
+#define _PERF_ASM_UACCESS_H_
+
+#define __get_user(src, dest)						\
+({									\
+	(src) = *dest;							\
+	0;								\
+})
+
+#define get_user	__get_user
+
+#define access_ok(type, addr, size)	1
+
+#endif
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h
new file mode 100644
index 0000000..9450763
--- /dev/null
+++ b/tools/perf/util/include/linux/bitmap.h
@@ -0,0 +1,3 @@
+#include "../../../../include/linux/bitmap.h"
+#include "../../../../include/asm-generic/bitops/find.h"
+#include <linux/errno.h>
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
new file mode 100644
index 0000000..8d63116
--- /dev/null
+++ b/tools/perf/util/include/linux/bitops.h
@@ -0,0 +1,29 @@
+#ifndef _PERF_LINUX_BITOPS_H_
+#define _PERF_LINUX_BITOPS_H_
+
+#define __KERNEL__
+
+#define CONFIG_GENERIC_FIND_NEXT_BIT
+#define CONFIG_GENERIC_FIND_FIRST_BIT
+#include "../../../../include/linux/bitops.h"
+
+#undef __KERNEL__
+
+static inline void set_bit(int nr, unsigned long *addr)
+{
+	addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
+}
+
+static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
+{
+	return ((1UL << (nr % BITS_PER_LONG)) &
+		(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
+}
+
+unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned
+		long size, unsigned long offset);
+
+unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned
+		long size, unsigned long offset);
+
+#endif
diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h
new file mode 100644
index 0000000..dfb0713
--- /dev/null
+++ b/tools/perf/util/include/linux/compiler.h
@@ -0,0 +1,10 @@
+#ifndef _PERF_LINUX_COMPILER_H_
+#define _PERF_LINUX_COMPILER_H_
+
+#ifndef __always_inline
+#define __always_inline	inline
+#endif
+#define __user
+#define __attribute_const__
+
+#endif
diff --git a/tools/perf/util/include/linux/ctype.h b/tools/perf/util/include/linux/ctype.h
new file mode 100644
index 0000000..a53d4ee
--- /dev/null
+++ b/tools/perf/util/include/linux/ctype.h
@@ -0,0 +1 @@
+#include "../util.h"
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h
index a6b8739..21c0274 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/perf/util/include/linux/kernel.h
@@ -1,6 +1,16 @@
 #ifndef PERF_LINUX_KERNEL_H_
 #define PERF_LINUX_KERNEL_H_
 
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+#define ALIGN(x,a)		__ALIGN_MASK(x,(typeof(x))(a)-1)
+#define __ALIGN_MASK(x,mask)	(((x)+(mask))&~(mask))
+
 #ifndef offsetof
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 #endif
@@ -26,4 +36,70 @@
 	_max1 > _max2 ? _max1 : _max2; })
 #endif
 
+#ifndef min
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+#endif
+
+#ifndef BUG_ON
+#define BUG_ON(cond) assert(!(cond))
+#endif
+
+/*
+ * Both need more care to handle endianness
+ * (Don't use bitmap_copy_le() for now)
+ */
+#define cpu_to_le64(x)	(x)
+#define cpu_to_le32(x)	(x)
+
+static inline int
+vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+	int i;
+	ssize_t ssize = size;
+
+	i = vsnprintf(buf, size, fmt, args);
+
+	return (i >= ssize) ? (ssize - 1) : i;
+}
+
+static inline int scnprintf(char * buf, size_t size, const char * fmt, ...)
+{
+	va_list args;
+	ssize_t ssize = size;
+	int i;
+
+	va_start(args, fmt);
+	i = vsnprintf(buf, size, fmt, args);
+	va_end(args);
+
+	return (i >= ssize) ? (ssize - 1) : i;
+}
+
+static inline unsigned long
+simple_strtoul(const char *nptr, char **endptr, int base)
+{
+	return strtoul(nptr, endptr, base);
+}
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define pr_err(fmt, ...) \
+	do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0)
+#define pr_warning(fmt, ...) \
+	do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0)
+#define pr_info(fmt, ...) \
+	do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0)
+#define pr_debug(fmt, ...) \
+	eprintf(1, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debugN(n, fmt, ...) \
+	eprintf(n, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)
+
 #endif
diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h
new file mode 100644
index 0000000..3b2f590
--- /dev/null
+++ b/tools/perf/util/include/linux/string.h
@@ -0,0 +1 @@
+#include <string.h>
diff --git a/tools/perf/util/include/linux/types.h b/tools/perf/util/include/linux/types.h
new file mode 100644
index 0000000..196862a
--- /dev/null
+++ b/tools/perf/util/include/linux/types.h
@@ -0,0 +1,9 @@
+#ifndef _PERF_LINUX_TYPES_H_
+#define _PERF_LINUX_TYPES_H_
+
+#include <asm/types.h>
+
+#define DECLARE_BITMAP(name,bits) \
+	unsigned long name[BITS_TO_LONGS(bits)]
+
+#endif
diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h
index 0173abe..b0fcb6d 100644
--- a/tools/perf/util/levenshtein.h
+++ b/tools/perf/util/levenshtein.h
@@ -1,8 +1,8 @@
-#ifndef LEVENSHTEIN_H
-#define LEVENSHTEIN_H
+#ifndef __PERF_LEVENSHTEIN_H
+#define __PERF_LEVENSHTEIN_H
 
 int levenshtein(const char *string1, const char *string2,
 	int swap_penalty, int substition_penalty,
 	int insertion_penalty, int deletion_penalty);
 
-#endif
+#endif /* __PERF_LEVENSHTEIN_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 804e023..69f94fe 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
+#include "debug.h"
 
 static inline int is_anon_memory(const char *filename)
 {
@@ -19,13 +20,28 @@
 	return n;
 }
 
- struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen)
+void map__init(struct map *self, enum map_type type,
+	       u64 start, u64 end, u64 pgoff, struct dso *dso)
+{
+	self->type     = type;
+	self->start    = start;
+	self->end      = end;
+	self->pgoff    = pgoff;
+	self->dso      = dso;
+	self->map_ip   = map__map_ip;
+	self->unmap_ip = map__unmap_ip;
+	RB_CLEAR_NODE(&self->rb_node);
+}
+
+struct map *map__new(struct mmap_event *event, enum map_type type,
+		     char *cwd, int cwdlen)
 {
 	struct map *self = malloc(sizeof(*self));
 
 	if (self != NULL) {
 		const char *filename = event->filename;
 		char newfilename[PATH_MAX];
+		struct dso *dso;
 		int anon;
 
 		if (cwd) {
@@ -45,18 +61,15 @@
 			filename = newfilename;
 		}
 
-		self->start = event->start;
-		self->end   = event->start + event->len;
-		self->pgoff = event->pgoff;
-
-		self->dso = dsos__findnew(filename);
-		if (self->dso == NULL)
+		dso = dsos__findnew(filename);
+		if (dso == NULL)
 			goto out_delete;
 
+		map__init(self, type, event->start, event->start + event->len,
+			  event->pgoff, dso);
+
 		if (self->dso == vdso || anon)
-			self->map_ip = vdso__map_ip;
-		else
-			self->map_ip = map__map_ip;
+			self->map_ip = self->unmap_ip = identity__map_ip;
 	}
 	return self;
 out_delete:
@@ -64,6 +77,72 @@
 	return NULL;
 }
 
+void map__delete(struct map *self)
+{
+	free(self);
+}
+
+void map__fixup_start(struct map *self)
+{
+	struct rb_root *symbols = &self->dso->symbols[self->type];
+	struct rb_node *nd = rb_first(symbols);
+	if (nd != NULL) {
+		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+		self->start = sym->start;
+	}
+}
+
+void map__fixup_end(struct map *self)
+{
+	struct rb_root *symbols = &self->dso->symbols[self->type];
+	struct rb_node *nd = rb_last(symbols);
+	if (nd != NULL) {
+		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+		self->end = sym->end;
+	}
+}
+
+#define DSO__DELETED "(deleted)"
+
+struct symbol *map__find_symbol(struct map *self, u64 addr,
+				symbol_filter_t filter)
+{
+	if (!dso__loaded(self->dso, self->type)) {
+		int nr = dso__load(self->dso, self, filter);
+
+		if (nr < 0) {
+			if (self->dso->has_build_id) {
+				char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+				build_id__sprintf(self->dso->build_id,
+						  sizeof(self->dso->build_id),
+						  sbuild_id);
+				pr_warning("%s with build id %s not found",
+					   self->dso->long_name, sbuild_id);
+			} else
+				pr_warning("Failed to open %s",
+					   self->dso->long_name);
+			pr_warning(", continuing without symbols\n");
+			return NULL;
+		} else if (nr == 0) {
+			const char *name = self->dso->long_name;
+			const size_t len = strlen(name);
+			const size_t real_len = len - sizeof(DSO__DELETED);
+
+			if (len > sizeof(DSO__DELETED) &&
+			    strcmp(name + real_len + 1, DSO__DELETED) == 0) {
+				pr_warning("%.*s was updated, restart the long running apps that use it!\n",
+					   (int)real_len, name);
+			} else {
+				pr_warning("no symbols found in %s, maybe install a debug package?\n", name);
+			}
+			return NULL;
+		}
+	}
+
+	return self->dso->find_symbol(self->dso, self->type, addr);
+}
+
 struct map *map__clone(struct map *self)
 {
 	struct map *map = malloc(sizeof(*self));
diff --git a/tools/perf/util/module.c b/tools/perf/util/module.c
deleted file mode 100644
index 0d8c85d..0000000
--- a/tools/perf/util/module.c
+++ /dev/null
@@ -1,545 +0,0 @@
-#include "util.h"
-#include "../perf.h"
-#include "string.h"
-#include "module.h"
-
-#include <libelf.h>
-#include <libgen.h>
-#include <gelf.h>
-#include <elf.h>
-#include <dirent.h>
-#include <sys/utsname.h>
-
-static unsigned int crc32(const char *p, unsigned int len)
-{
-	int i;
-	unsigned int crc = 0;
-
-	while (len--) {
-		crc ^= *p++;
-		for (i = 0; i < 8; i++)
-			crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
-	}
-	return crc;
-}
-
-/* module section methods */
-
-struct sec_dso *sec_dso__new_dso(const char *name)
-{
-	struct sec_dso *self = malloc(sizeof(*self) + strlen(name) + 1);
-
-	if (self != NULL) {
-		strcpy(self->name, name);
-		self->secs = RB_ROOT;
-		self->find_section = sec_dso__find_section;
-	}
-
-	return self;
-}
-
-static void sec_dso__delete_section(struct section *self)
-{
-	free(((void *)self));
-}
-
-void sec_dso__delete_sections(struct sec_dso *self)
-{
-	struct section *pos;
-	struct rb_node *next = rb_first(&self->secs);
-
-	while (next) {
-		pos = rb_entry(next, struct section, rb_node);
-		next = rb_next(&pos->rb_node);
-		rb_erase(&pos->rb_node, &self->secs);
-		sec_dso__delete_section(pos);
-	}
-}
-
-void sec_dso__delete_self(struct sec_dso *self)
-{
-	sec_dso__delete_sections(self);
-	free(self);
-}
-
-static void sec_dso__insert_section(struct sec_dso *self, struct section *sec)
-{
-	struct rb_node **p = &self->secs.rb_node;
-	struct rb_node *parent = NULL;
-	const u64 hash = sec->hash;
-	struct section *s;
-
-	while (*p != NULL) {
-		parent = *p;
-		s = rb_entry(parent, struct section, rb_node);
-		if (hash < s->hash)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-	rb_link_node(&sec->rb_node, parent, p);
-	rb_insert_color(&sec->rb_node, &self->secs);
-}
-
-struct section *sec_dso__find_section(struct sec_dso *self, const char *name)
-{
-	struct rb_node *n;
-	u64 hash;
-	int len;
-
-	if (self == NULL)
-		return NULL;
-
-	len = strlen(name);
-	hash = crc32(name, len);
-
-	n = self->secs.rb_node;
-
-	while (n) {
-		struct section *s = rb_entry(n, struct section, rb_node);
-
-		if (hash < s->hash)
-			n = n->rb_left;
-		else if (hash > s->hash)
-			n = n->rb_right;
-		else {
-			if (!strcmp(name, s->name))
-				return s;
-			else
-				n = rb_next(&s->rb_node);
-		}
-	}
-
-	return NULL;
-}
-
-static size_t sec_dso__fprintf_section(struct section *self, FILE *fp)
-{
-	return fprintf(fp, "name:%s vma:%llx path:%s\n",
-		       self->name, self->vma, self->path);
-}
-
-size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp)
-{
-	size_t ret = fprintf(fp, "dso: %s\n", self->name);
-
-	struct rb_node *nd;
-	for (nd = rb_first(&self->secs); nd; nd = rb_next(nd)) {
-		struct section *pos = rb_entry(nd, struct section, rb_node);
-		ret += sec_dso__fprintf_section(pos, fp);
-	}
-
-	return ret;
-}
-
-static struct section *section__new(const char *name, const char *path)
-{
-	struct section *self = calloc(1, sizeof(*self));
-
-	if (!self)
-		goto out_failure;
-
-	self->name = calloc(1, strlen(name) + 1);
-	if (!self->name)
-		goto out_failure;
-
-	self->path = calloc(1, strlen(path) + 1);
-	if (!self->path)
-		goto out_failure;
-
-	strcpy(self->name, name);
-	strcpy(self->path, path);
-	self->hash = crc32(self->name, strlen(name));
-
-	return self;
-
-out_failure:
-	if (self) {
-		if (self->name)
-			free(self->name);
-		if (self->path)
-			free(self->path);
-		free(self);
-	}
-
-	return NULL;
-}
-
-/* module methods */
-
-struct mod_dso *mod_dso__new_dso(const char *name)
-{
-	struct mod_dso *self = malloc(sizeof(*self) + strlen(name) + 1);
-
-	if (self != NULL) {
-		strcpy(self->name, name);
-		self->mods = RB_ROOT;
-		self->find_module = mod_dso__find_module;
-	}
-
-	return self;
-}
-
-static void mod_dso__delete_module(struct module *self)
-{
-	free(((void *)self));
-}
-
-void mod_dso__delete_modules(struct mod_dso *self)
-{
-	struct module *pos;
-	struct rb_node *next = rb_first(&self->mods);
-
-	while (next) {
-		pos = rb_entry(next, struct module, rb_node);
-		next = rb_next(&pos->rb_node);
-		rb_erase(&pos->rb_node, &self->mods);
-		mod_dso__delete_module(pos);
-	}
-}
-
-void mod_dso__delete_self(struct mod_dso *self)
-{
-	mod_dso__delete_modules(self);
-	free(self);
-}
-
-static void mod_dso__insert_module(struct mod_dso *self, struct module *mod)
-{
-	struct rb_node **p = &self->mods.rb_node;
-	struct rb_node *parent = NULL;
-	const u64 hash = mod->hash;
-	struct module *m;
-
-	while (*p != NULL) {
-		parent = *p;
-		m = rb_entry(parent, struct module, rb_node);
-		if (hash < m->hash)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-	rb_link_node(&mod->rb_node, parent, p);
-	rb_insert_color(&mod->rb_node, &self->mods);
-}
-
-struct module *mod_dso__find_module(struct mod_dso *self, const char *name)
-{
-	struct rb_node *n;
-	u64 hash;
-	int len;
-
-	if (self == NULL)
-		return NULL;
-
-	len = strlen(name);
-	hash = crc32(name, len);
-
-	n = self->mods.rb_node;
-
-	while (n) {
-		struct module *m = rb_entry(n, struct module, rb_node);
-
-		if (hash < m->hash)
-			n = n->rb_left;
-		else if (hash > m->hash)
-			n = n->rb_right;
-		else {
-			if (!strcmp(name, m->name))
-				return m;
-			else
-				n = rb_next(&m->rb_node);
-		}
-	}
-
-	return NULL;
-}
-
-static size_t mod_dso__fprintf_module(struct module *self, FILE *fp)
-{
-	return fprintf(fp, "name:%s path:%s\n", self->name, self->path);
-}
-
-size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp)
-{
-	struct rb_node *nd;
-	size_t ret;
-
-	ret = fprintf(fp, "dso: %s\n", self->name);
-
-	for (nd = rb_first(&self->mods); nd; nd = rb_next(nd)) {
-		struct module *pos = rb_entry(nd, struct module, rb_node);
-
-		ret += mod_dso__fprintf_module(pos, fp);
-	}
-
-	return ret;
-}
-
-static struct module *module__new(const char *name, const char *path)
-{
-	struct module *self = calloc(1, sizeof(*self));
-
-	if (!self)
-		goto out_failure;
-
-	self->name = calloc(1, strlen(name) + 1);
-	if (!self->name)
-		goto out_failure;
-
-	self->path = calloc(1, strlen(path) + 1);
-	if (!self->path)
-		goto out_failure;
-
-	strcpy(self->name, name);
-	strcpy(self->path, path);
-	self->hash = crc32(self->name, strlen(name));
-
-	return self;
-
-out_failure:
-	if (self) {
-		if (self->name)
-			free(self->name);
-		if (self->path)
-			free(self->path);
-		free(self);
-	}
-
-	return NULL;
-}
-
-static int mod_dso__load_sections(struct module *mod)
-{
-	int count = 0, path_len;
-	struct dirent *entry;
-	char *line = NULL;
-	char *dir_path;
-	DIR *dir;
-	size_t n;
-
-	path_len = strlen("/sys/module/");
-	path_len += strlen(mod->name);
-	path_len += strlen("/sections/");
-
-	dir_path = calloc(1, path_len + 1);
-	if (dir_path == NULL)
-		goto out_failure;
-
-	strcat(dir_path, "/sys/module/");
-	strcat(dir_path, mod->name);
-	strcat(dir_path, "/sections/");
-
-	dir = opendir(dir_path);
-	if (dir == NULL)
-		goto out_free;
-
-	while ((entry = readdir(dir))) {
-		struct section *section;
-		char *path, *vma;
-		int line_len;
-		FILE *file;
-
-		if (!strcmp(".", entry->d_name) || !strcmp("..", entry->d_name))
-			continue;
-
-		path = calloc(1, path_len + strlen(entry->d_name) + 1);
-		if (path == NULL)
-			break;
-		strcat(path, dir_path);
-		strcat(path, entry->d_name);
-
-		file = fopen(path, "r");
-		if (file == NULL) {
-			free(path);
-			break;
-		}
-
-		line_len = getline(&line, &n, file);
-		if (line_len < 0) {
-			free(path);
-			fclose(file);
-			break;
-		}
-
-		if (!line) {
-			free(path);
-			fclose(file);
-			break;
-		}
-
-		line[--line_len] = '\0'; /* \n */
-
-		vma = strstr(line, "0x");
-		if (!vma) {
-			free(path);
-			fclose(file);
-			break;
-		}
-		vma += 2;
-
-		section = section__new(entry->d_name, path);
-		if (!section) {
-			fprintf(stderr, "load_sections: allocation error\n");
-			free(path);
-			fclose(file);
-			break;
-		}
-
-		hex2u64(vma, &section->vma);
-		sec_dso__insert_section(mod->sections, section);
-
-		free(path);
-		fclose(file);
-		count++;
-	}
-
-	closedir(dir);
-	free(line);
-	free(dir_path);
-
-	return count;
-
-out_free:
-	free(dir_path);
-
-out_failure:
-	return count;
-}
-
-static int mod_dso__load_module_paths(struct mod_dso *self)
-{
-	struct utsname uts;
-	int count = 0, len, err = -1;
-	char *line = NULL;
-	FILE *file;
-	char *dpath, *dir;
-	size_t n;
-
-	if (uname(&uts) < 0)
-		return err;
-
-	len = strlen("/lib/modules/");
-	len += strlen(uts.release);
-	len += strlen("/modules.dep");
-
-	dpath = calloc(1, len + 1);
-	if (dpath == NULL)
-		return err;
-
-	strcat(dpath, "/lib/modules/");
-	strcat(dpath, uts.release);
-	strcat(dpath, "/modules.dep");
-
-	file = fopen(dpath, "r");
-	if (file == NULL)
-		goto out_failure;
-
-	dir = dirname(dpath);
-	if (!dir)
-		goto out_failure;
-	strcat(dir, "/");
-
-	while (!feof(file)) {
-		struct module *module;
-		char *name, *path, *tmp;
-		FILE *modfile;
-		int line_len;
-
-		line_len = getline(&line, &n, file);
-		if (line_len < 0)
-			break;
-
-		if (!line)
-			break;
-
-		line[--line_len] = '\0'; /* \n */
-
-		path = strchr(line, ':');
-		if (!path)
-			break;
-		*path = '\0';
-
-		path = strdup(line);
-		if (!path)
-			break;
-
-		if (!strstr(path, dir)) {
-			if (strncmp(path, "kernel/", 7))
-				break;
-
-			free(path);
-			path = calloc(1, strlen(dir) + strlen(line) + 1);
-			if (!path)
-				break;
-			strcat(path, dir);
-			strcat(path, line);
-		}
-
-		modfile = fopen(path, "r");
-		if (modfile == NULL)
-			break;
-		fclose(modfile);
-
-		name = strdup(path);
-		if (!name)
-			break;
-
-		name = strtok(name, "/");
-		tmp = name;
-
-		while (tmp) {
-			tmp = strtok(NULL, "/");
-			if (tmp)
-				name = tmp;
-		}
-
-		name = strsep(&name, ".");
-		if (!name)
-			break;
-
-		/* Quirk: replace '-' with '_' in all modules */
-		for (len = strlen(name); len; len--) {
-			if (*(name+len) == '-')
-				*(name+len) = '_';
-		}
-
-		module = module__new(name, path);
-		if (!module)
-			break;
-		mod_dso__insert_module(self, module);
-
-		module->sections = sec_dso__new_dso("sections");
-		if (!module->sections)
-			break;
-
-		module->active = mod_dso__load_sections(module);
-
-		if (module->active > 0)
-			count++;
-	}
-
-	if (feof(file))
-		err = count;
-	else
-		fprintf(stderr, "load_module_paths: modules.dep parsing failure!\n");
-
-out_failure:
-	if (dpath)
-		free(dpath);
-	if (file)
-		fclose(file);
-	if (line)
-		free(line);
-
-	return err;
-}
-
-int mod_dso__load_modules(struct mod_dso *dso)
-{
-	int err;
-
-	err = mod_dso__load_module_paths(dso);
-
-	return err;
-}
diff --git a/tools/perf/util/module.h b/tools/perf/util/module.h
deleted file mode 100644
index 8a592ef..0000000
--- a/tools/perf/util/module.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef _PERF_MODULE_
-#define _PERF_MODULE_ 1
-
-#include <linux/types.h>
-#include "../types.h"
-#include <linux/list.h>
-#include <linux/rbtree.h>
-
-struct section {
-	struct rb_node	rb_node;
-	u64		hash;
-	u64		vma;
-	char		*name;
-	char		*path;
-};
-
-struct sec_dso {
-	struct list_head node;
-	struct rb_root	 secs;
-	struct section    *(*find_section)(struct sec_dso *, const char *name);
-	char		 name[0];
-};
-
-struct module {
-	struct rb_node	rb_node;
-	u64		hash;
-	char		*name;
-	char		*path;
-	struct sec_dso	*sections;
-	int		active;
-};
-
-struct mod_dso {
-	struct list_head node;
-	struct rb_root	 mods;
-	struct module    *(*find_module)(struct mod_dso *, const char *name);
-	char		 name[0];
-};
-
-struct sec_dso *sec_dso__new_dso(const char *name);
-void sec_dso__delete_sections(struct sec_dso *self);
-void sec_dso__delete_self(struct sec_dso *self);
-size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp);
-struct section *sec_dso__find_section(struct sec_dso *self, const char *name);
-
-struct mod_dso *mod_dso__new_dso(const char *name);
-void mod_dso__delete_modules(struct mod_dso *self);
-void mod_dso__delete_self(struct mod_dso *self);
-size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp);
-struct module *mod_dso__find_module(struct mod_dso *self, const char *name);
-int mod_dso__load_modules(struct mod_dso *dso);
-
-#endif /* _PERF_MODULE_ */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 8cfb48c..9e5dbd6 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,4 +1,4 @@
-
+#include "../../../include/linux/hw_breakpoint.h"
 #include "util.h"
 #include "../perf.h"
 #include "parse-options.h"
@@ -7,10 +7,12 @@
 #include "string.h"
 #include "cache.h"
 #include "header.h"
+#include "debugfs.h"
 
-int					nr_counters;
+int				nr_counters;
 
 struct perf_event_attr		attrs[MAX_COUNTERS];
+char				*filters[MAX_COUNTERS];
 
 struct event_symbol {
 	u8		type;
@@ -46,6 +48,8 @@
   { CSW(PAGE_FAULTS_MAJ),	"major-faults",		""		},
   { CSW(CONTEXT_SWITCHES),	"context-switches",	"cs"		},
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
+  { CSW(ALIGNMENT_FAULTS),	"alignment-faults",	""		},
+  { CSW(EMULATION_FAULTS),	"emulation-faults",	""		},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
@@ -74,6 +78,8 @@
 	"CPU-migrations",
 	"minor-faults",
 	"major-faults",
+	"alignment-faults",
+	"emulation-faults",
 };
 
 #define MAX_ALIASES 8
@@ -148,16 +154,6 @@
 
 #define MAX_EVENT_LENGTH 512
 
-int valid_debugfs_mount(const char *debugfs)
-{
-	struct statfs st_fs;
-
-	if (statfs(debugfs, &st_fs) < 0)
-		return -ENOENT;
-	else if (st_fs.f_type != (long) DEBUGFS_MAGIC)
-		return -ENOENT;
-	return 0;
-}
 
 struct tracepoint_path *tracepoint_id_to_path(u64 config)
 {
@@ -170,7 +166,7 @@
 	char evt_path[MAXPATHLEN];
 	char dir_path[MAXPATHLEN];
 
-	if (valid_debugfs_mount(debugfs_path))
+	if (debugfs_valid_mountpoint(debugfs_path))
 		return NULL;
 
 	sys_dir = opendir(debugfs_path);
@@ -201,7 +197,7 @@
 			if (id == config) {
 				closedir(evt_dir);
 				closedir(sys_dir);
-				path = calloc(1, sizeof(path));
+				path = zalloc(sizeof(path));
 				path->system = malloc(MAX_EVENT_LENGTH);
 				if (!path->system) {
 					free(path);
@@ -509,7 +505,7 @@
 	char sys_name[MAX_EVENT_LENGTH];
 	unsigned int sys_length, evt_length;
 
-	if (valid_debugfs_mount(debugfs_path))
+	if (debugfs_valid_mountpoint(debugfs_path))
 		return 0;
 
 	evt_name = strchr(*strp, ':');
@@ -544,6 +540,81 @@
 						     attr, strp);
 }
 
+static enum event_result
+parse_breakpoint_type(const char *type, const char **strp,
+		      struct perf_event_attr *attr)
+{
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		if (!type[i])
+			break;
+
+		switch (type[i]) {
+		case 'r':
+			attr->bp_type |= HW_BREAKPOINT_R;
+			break;
+		case 'w':
+			attr->bp_type |= HW_BREAKPOINT_W;
+			break;
+		case 'x':
+			attr->bp_type |= HW_BREAKPOINT_X;
+			break;
+		default:
+			return EVT_FAILED;
+		}
+	}
+	if (!attr->bp_type) /* Default */
+		attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+
+	*strp = type + i;
+
+	return EVT_HANDLED;
+}
+
+static enum event_result
+parse_breakpoint_event(const char **strp, struct perf_event_attr *attr)
+{
+	const char *target;
+	const char *type;
+	char *endaddr;
+	u64 addr;
+	enum event_result err;
+
+	target = strchr(*strp, ':');
+	if (!target)
+		return EVT_FAILED;
+
+	if (strncmp(*strp, "mem", target - *strp) != 0)
+		return EVT_FAILED;
+
+	target++;
+
+	addr = strtoull(target, &endaddr, 0);
+	if (target == endaddr)
+		return EVT_FAILED;
+
+	attr->bp_addr = addr;
+	*strp = endaddr;
+
+	type = strchr(target, ':');
+
+	/* If no type is defined, just rw as default */
+	if (!type) {
+		attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+	} else {
+		err = parse_breakpoint_type(++type, strp, attr);
+		if (err == EVT_FAILED)
+			return EVT_FAILED;
+	}
+
+	/* We should find a nice way to override the access type */
+	attr->bp_len = HW_BREAKPOINT_LEN_4;
+	attr->type = PERF_TYPE_BREAKPOINT;
+
+	return EVT_HANDLED;
+}
+
 static int check_events(const char *str, unsigned int i)
 {
 	int n;
@@ -677,6 +748,12 @@
 	if (ret != EVT_FAILED)
 		goto modifier;
 
+	ret = parse_breakpoint_event(str, attr);
+	if (ret != EVT_FAILED)
+		goto modifier;
+
+	fprintf(stderr, "invalid or unsupported event: '%s'\n", *str);
+	fprintf(stderr, "Run 'perf list' for a list of valid events\n");
 	return EVT_FAILED;
 
 modifier:
@@ -708,7 +785,6 @@
 	perf_header__push_event(id, orgname);
 }
 
-
 int parse_events(const struct option *opt __used, const char *str, int unset __used)
 {
 	struct perf_event_attr attr;
@@ -745,6 +821,28 @@
 	return 0;
 }
 
+int parse_filter(const struct option *opt __used, const char *str,
+		 int unset __used)
+{
+	int i = nr_counters - 1;
+	int len = strlen(str);
+
+	if (i < 0 || attrs[i].type != PERF_TYPE_TRACEPOINT) {
+		fprintf(stderr,
+			"-F option should follow a -e tracepoint option\n");
+		return -1;
+	}
+
+	filters[i] = malloc(len + 1);
+	if (!filters[i]) {
+		fprintf(stderr, "not enough memory to hold filter string\n");
+		return -1;
+	}
+	strcpy(filters[i], str);
+
+	return 0;
+}
+
 static const char * const event_type_descriptors[] = {
 	"",
 	"Hardware event",
@@ -764,7 +862,7 @@
 	char evt_path[MAXPATHLEN];
 	char dir_path[MAXPATHLEN];
 
-	if (valid_debugfs_mount(debugfs_path))
+	if (debugfs_valid_mountpoint(debugfs_path))
 		return;
 
 	sys_dir = opendir(debugfs_path);
@@ -782,7 +880,7 @@
 		for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
 			snprintf(evt_path, MAXPATHLEN, "%s:%s",
 				 sys_dirent.d_name, evt_dirent.d_name);
-			fprintf(stderr, "  %-42s [%s]\n", evt_path,
+			printf("  %-42s [%s]\n", evt_path,
 				event_type_descriptors[PERF_TYPE_TRACEPOINT+1]);
 		}
 		closedir(evt_dir);
@@ -799,8 +897,8 @@
 	unsigned int i, type, op, prev_type = -1;
 	char name[40];
 
-	fprintf(stderr, "\n");
-	fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
+	printf("\n");
+	printf("List of pre-defined events (to be used in -e):\n");
 
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
 		type = syms->type + 1;
@@ -808,19 +906,19 @@
 			type = 0;
 
 		if (type != prev_type)
-			fprintf(stderr, "\n");
+			printf("\n");
 
 		if (strlen(syms->alias))
 			sprintf(name, "%s OR %s", syms->symbol, syms->alias);
 		else
 			strcpy(name, syms->symbol);
-		fprintf(stderr, "  %-42s [%s]\n", name,
+		printf("  %-42s [%s]\n", name,
 			event_type_descriptors[type]);
 
 		prev_type = type;
 	}
 
-	fprintf(stderr, "\n");
+	printf("\n");
 	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
 		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
 			/* skip invalid cache type */
@@ -828,17 +926,20 @@
 				continue;
 
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
-				fprintf(stderr, "  %-42s [%s]\n",
+				printf("  %-42s [%s]\n",
 					event_cache_name(type, op, i),
 					event_type_descriptors[4]);
 			}
 		}
 	}
 
-	fprintf(stderr, "\n");
-	fprintf(stderr, "  %-42s [raw hardware event descriptor]\n",
+	printf("\n");
+	printf("  %-42s [raw hardware event descriptor]\n",
 		"rNNN");
-	fprintf(stderr, "\n");
+	printf("\n");
+
+	printf("  %-42s [hardware breakpoint]\n", "mem:<addr>[:access]");
+	printf("\n");
 
 	print_tracepoint_events();
 
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 30c6081..b8c1f64 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -1,5 +1,5 @@
-#ifndef _PARSE_EVENTS_H
-#define _PARSE_EVENTS_H
+#ifndef __PERF_PARSE_EVENTS_H
+#define __PERF_PARSE_EVENTS_H
 /*
  * Parse symbolic events/counts passed in as options:
  */
@@ -17,11 +17,13 @@
 extern int			nr_counters;
 
 extern struct perf_event_attr attrs[MAX_COUNTERS];
+extern char *filters[MAX_COUNTERS];
 
 extern const char *event_name(int ctr);
 extern const char *__event_name(int type, u64 config);
 
 extern int parse_events(const struct option *opt, const char *str, int unset);
+extern int parse_filter(const struct option *opt, const char *str, int unset);
 
 #define EVENTS_HELP_MAX (128*1024)
 
@@ -31,4 +33,4 @@
 extern int valid_debugfs_mount(const char *debugfs);
 
 
-#endif /* _PARSE_EVENTS_H */
+#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index 2ee248f..948805a 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -1,5 +1,5 @@
-#ifndef PARSE_OPTIONS_H
-#define PARSE_OPTIONS_H
+#ifndef __PERF_PARSE_OPTIONS_H
+#define __PERF_PARSE_OPTIONS_H
 
 enum parse_opt_type {
 	/* special types */
@@ -174,4 +174,4 @@
 
 extern const char *parse_options_fix_filename(const char *prefix, const char *file);
 
-#endif
+#endif /* __PERF_PARSE_OPTIONS_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
new file mode 100644
index 0000000..cd7fbda
--- /dev/null
+++ b/tools/perf/util/probe-event.c
@@ -0,0 +1,484 @@
+/*
+ * probe-event.c : perf-probe definition to kprobe_events format converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <limits.h>
+
+#undef _GNU_SOURCE
+#include "event.h"
+#include "string.h"
+#include "strlist.h"
+#include "debug.h"
+#include "parse-events.h"  /* For debugfs_path */
+#include "probe-event.h"
+
+#define MAX_CMDLEN 256
+#define MAX_PROBE_ARGS 128
+#define PERFPROBE_GROUP "probe"
+
+#define semantic_error(msg ...) die("Semantic error :" msg)
+
+/* If there is no space to write, returns -E2BIG. */
+static int e_snprintf(char *str, size_t size, const char *format, ...)
+{
+	int ret;
+	va_list ap;
+	va_start(ap, format);
+	ret = vsnprintf(str, size, format, ap);
+	va_end(ap);
+	if (ret >= (int)size)
+		ret = -E2BIG;
+	return ret;
+}
+
+/* Parse probepoint definition. */
+static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp)
+{
+	char *ptr, *tmp;
+	char c, nc = 0;
+	/*
+	 * <Syntax>
+	 * perf probe SRC:LN
+	 * perf probe FUNC[+OFFS|%return][@SRC]
+	 */
+
+	ptr = strpbrk(arg, ":+@%");
+	if (ptr) {
+		nc = *ptr;
+		*ptr++ = '\0';
+	}
+
+	/* Check arg is function or file and copy it */
+	if (strchr(arg, '.'))	/* File */
+		pp->file = strdup(arg);
+	else			/* Function */
+		pp->function = strdup(arg);
+	DIE_IF(pp->file == NULL && pp->function == NULL);
+
+	/* Parse other options */
+	while (ptr) {
+		arg = ptr;
+		c = nc;
+		ptr = strpbrk(arg, ":+@%");
+		if (ptr) {
+			nc = *ptr;
+			*ptr++ = '\0';
+		}
+		switch (c) {
+		case ':':	/* Line number */
+			pp->line = strtoul(arg, &tmp, 0);
+			if (*tmp != '\0')
+				semantic_error("There is non-digit charactor"
+						" in line number.");
+			break;
+		case '+':	/* Byte offset from a symbol */
+			pp->offset = strtoul(arg, &tmp, 0);
+			if (*tmp != '\0')
+				semantic_error("There is non-digit charactor"
+						" in offset.");
+			break;
+		case '@':	/* File name */
+			if (pp->file)
+				semantic_error("SRC@SRC is not allowed.");
+			pp->file = strdup(arg);
+			DIE_IF(pp->file == NULL);
+			if (ptr)
+				semantic_error("@SRC must be the last "
+					       "option.");
+			break;
+		case '%':	/* Probe places */
+			if (strcmp(arg, "return") == 0) {
+				pp->retprobe = 1;
+			} else	/* Others not supported yet */
+				semantic_error("%%%s is not supported.", arg);
+			break;
+		default:
+			DIE_IF("Program has a bug.");
+			break;
+		}
+	}
+
+	/* Exclusion check */
+	if (pp->line && pp->offset)
+		semantic_error("Offset can't be used with line number.");
+
+	if (!pp->line && pp->file && !pp->function)
+		semantic_error("File always requires line number.");
+
+	if (pp->offset && !pp->function)
+		semantic_error("Offset requires an entry function.");
+
+	if (pp->retprobe && !pp->function)
+		semantic_error("Return probe requires an entry function.");
+
+	if ((pp->offset || pp->line) && pp->retprobe)
+		semantic_error("Offset/Line can't be used with return probe.");
+
+	pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n",
+		 pp->function, pp->file, pp->line, pp->offset, pp->retprobe);
+}
+
+/* Parse perf-probe event definition */
+int parse_perf_probe_event(const char *str, struct probe_point *pp)
+{
+	char **argv;
+	int argc, i, need_dwarf = 0;
+
+	argv = argv_split(str, &argc);
+	if (!argv)
+		die("argv_split failed.");
+	if (argc > MAX_PROBE_ARGS + 1)
+		semantic_error("Too many arguments");
+
+	/* Parse probe point */
+	parse_perf_probe_probepoint(argv[0], pp);
+	if (pp->file || pp->line)
+		need_dwarf = 1;
+
+	/* Copy arguments and ensure return probe has no C argument */
+	pp->nr_args = argc - 1;
+	pp->args = zalloc(sizeof(char *) * pp->nr_args);
+	for (i = 0; i < pp->nr_args; i++) {
+		pp->args[i] = strdup(argv[i + 1]);
+		if (!pp->args[i])
+			die("Failed to copy argument.");
+		if (is_c_varname(pp->args[i])) {
+			if (pp->retprobe)
+				semantic_error("You can't specify local"
+						" variable for kretprobe");
+			need_dwarf = 1;
+		}
+	}
+
+	argv_free(argv);
+	return need_dwarf;
+}
+
+/* Parse kprobe_events event into struct probe_point */
+void parse_trace_kprobe_event(const char *str, char **group, char **event,
+			      struct probe_point *pp)
+{
+	char pr;
+	char *p;
+	int ret, i, argc;
+	char **argv;
+
+	pr_debug("Parsing kprobe_events: %s\n", str);
+	argv = argv_split(str, &argc);
+	if (!argv)
+		die("argv_split failed.");
+	if (argc < 2)
+		semantic_error("Too less arguments.");
+
+	/* Scan event and group name. */
+	ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]",
+		     &pr, (float *)(void *)group, (float *)(void *)event);
+	if (ret != 3)
+		semantic_error("Failed to parse event name: %s", argv[0]);
+	pr_debug("Group:%s Event:%s probe:%c\n", *group, *event, pr);
+
+	if (!pp)
+		goto end;
+
+	pp->retprobe = (pr == 'r');
+
+	/* Scan function name and offset */
+	ret = sscanf(argv[1], "%a[^+]+%d", (float *)(void *)&pp->function, &pp->offset);
+	if (ret == 1)
+		pp->offset = 0;
+
+	/* kprobe_events doesn't have this information */
+	pp->line = 0;
+	pp->file = NULL;
+
+	pp->nr_args = argc - 2;
+	pp->args = zalloc(sizeof(char *) * pp->nr_args);
+	for (i = 0; i < pp->nr_args; i++) {
+		p = strchr(argv[i + 2], '=');
+		if (p)	/* We don't need which register is assigned. */
+			*p = '\0';
+		pp->args[i] = strdup(argv[i + 2]);
+		if (!pp->args[i])
+			die("Failed to copy argument.");
+	}
+
+end:
+	argv_free(argv);
+}
+
+int synthesize_perf_probe_event(struct probe_point *pp)
+{
+	char *buf;
+	char offs[64] = "", line[64] = "";
+	int i, len, ret;
+
+	pp->probes[0] = buf = zalloc(MAX_CMDLEN);
+	if (!buf)
+		die("Failed to allocate memory by zalloc.");
+	if (pp->offset) {
+		ret = e_snprintf(offs, 64, "+%d", pp->offset);
+		if (ret <= 0)
+			goto error;
+	}
+	if (pp->line) {
+		ret = e_snprintf(line, 64, ":%d", pp->line);
+		if (ret <= 0)
+			goto error;
+	}
+
+	if (pp->function)
+		ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function,
+				 offs, pp->retprobe ? "%return" : "", line);
+	else
+		ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->file, line);
+	if (ret <= 0)
+		goto error;
+	len = ret;
+
+	for (i = 0; i < pp->nr_args; i++) {
+		ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s",
+				 pp->args[i]);
+		if (ret <= 0)
+			goto error;
+		len += ret;
+	}
+	pp->found = 1;
+
+	return pp->found;
+error:
+	free(pp->probes[0]);
+
+	return ret;
+}
+
+int synthesize_trace_kprobe_event(struct probe_point *pp)
+{
+	char *buf;
+	int i, len, ret;
+
+	pp->probes[0] = buf = zalloc(MAX_CMDLEN);
+	if (!buf)
+		die("Failed to allocate memory by zalloc.");
+	ret = e_snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset);
+	if (ret <= 0)
+		goto error;
+	len = ret;
+
+	for (i = 0; i < pp->nr_args; i++) {
+		ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s",
+				 pp->args[i]);
+		if (ret <= 0)
+			goto error;
+		len += ret;
+	}
+	pp->found = 1;
+
+	return pp->found;
+error:
+	free(pp->probes[0]);
+
+	return ret;
+}
+
+static int open_kprobe_events(int flags, int mode)
+{
+	char buf[PATH_MAX];
+	int ret;
+
+	ret = e_snprintf(buf, PATH_MAX, "%s/../kprobe_events", debugfs_path);
+	if (ret < 0)
+		die("Failed to make kprobe_events path.");
+
+	ret = open(buf, flags, mode);
+	if (ret < 0) {
+		if (errno == ENOENT)
+			die("kprobe_events file does not exist -"
+			    " please rebuild with CONFIG_KPROBE_TRACER.");
+		else
+			die("Could not open kprobe_events file: %s",
+			    strerror(errno));
+	}
+	return ret;
+}
+
+/* Get raw string list of current kprobe_events */
+static struct strlist *get_trace_kprobe_event_rawlist(int fd)
+{
+	int ret, idx;
+	FILE *fp;
+	char buf[MAX_CMDLEN];
+	char *p;
+	struct strlist *sl;
+
+	sl = strlist__new(true, NULL);
+
+	fp = fdopen(dup(fd), "r");
+	while (!feof(fp)) {
+		p = fgets(buf, MAX_CMDLEN, fp);
+		if (!p)
+			break;
+
+		idx = strlen(p) - 1;
+		if (p[idx] == '\n')
+			p[idx] = '\0';
+		ret = strlist__add(sl, buf);
+		if (ret < 0)
+			die("strlist__add failed: %s", strerror(-ret));
+	}
+	fclose(fp);
+
+	return sl;
+}
+
+/* Free and zero clear probe_point */
+static void clear_probe_point(struct probe_point *pp)
+{
+	int i;
+
+	if (pp->function)
+		free(pp->function);
+	if (pp->file)
+		free(pp->file);
+	for (i = 0; i < pp->nr_args; i++)
+		free(pp->args[i]);
+	if (pp->args)
+		free(pp->args);
+	for (i = 0; i < pp->found; i++)
+		free(pp->probes[i]);
+	memset(pp, 0, sizeof(pp));
+}
+
+/* List up current perf-probe events */
+void show_perf_probe_events(void)
+{
+	unsigned int i;
+	int fd;
+	char *group, *event;
+	struct probe_point pp;
+	struct strlist *rawlist;
+	struct str_node *ent;
+
+	fd = open_kprobe_events(O_RDONLY, 0);
+	rawlist = get_trace_kprobe_event_rawlist(fd);
+	close(fd);
+
+	for (i = 0; i < strlist__nr_entries(rawlist); i++) {
+		ent = strlist__entry(rawlist, i);
+		parse_trace_kprobe_event(ent->s, &group, &event, &pp);
+		synthesize_perf_probe_event(&pp);
+		printf("[%s:%s]\t%s\n", group, event, pp.probes[0]);
+		free(group);
+		free(event);
+		clear_probe_point(&pp);
+	}
+
+	strlist__delete(rawlist);
+}
+
+/* Get current perf-probe event names */
+static struct strlist *get_perf_event_names(int fd)
+{
+	unsigned int i;
+	char *group, *event;
+	struct strlist *sl, *rawlist;
+	struct str_node *ent;
+
+	rawlist = get_trace_kprobe_event_rawlist(fd);
+
+	sl = strlist__new(false, NULL);
+	for (i = 0; i < strlist__nr_entries(rawlist); i++) {
+		ent = strlist__entry(rawlist, i);
+		parse_trace_kprobe_event(ent->s, &group, &event, NULL);
+		strlist__add(sl, event);
+		free(group);
+	}
+
+	strlist__delete(rawlist);
+
+	return sl;
+}
+
+static int write_trace_kprobe_event(int fd, const char *buf)
+{
+	int ret;
+
+	ret = write(fd, buf, strlen(buf));
+	if (ret <= 0)
+		die("Failed to create event.");
+	else
+		printf("Added new event: %s\n", buf);
+
+	return ret;
+}
+
+static void get_new_event_name(char *buf, size_t len, const char *base,
+			       struct strlist *namelist)
+{
+	int i, ret;
+	for (i = 0; i < MAX_EVENT_INDEX; i++) {
+		ret = e_snprintf(buf, len, "%s_%d", base, i);
+		if (ret < 0)
+			die("snprintf() failed: %s", strerror(-ret));
+		if (!strlist__has_entry(namelist, buf))
+			break;
+	}
+	if (i == MAX_EVENT_INDEX)
+		die("Too many events are on the same function.");
+}
+
+void add_trace_kprobe_events(struct probe_point *probes, int nr_probes)
+{
+	int i, j, fd;
+	struct probe_point *pp;
+	char buf[MAX_CMDLEN];
+	char event[64];
+	struct strlist *namelist;
+
+	fd = open_kprobe_events(O_RDWR, O_APPEND);
+	/* Get current event names */
+	namelist = get_perf_event_names(fd);
+
+	for (j = 0; j < nr_probes; j++) {
+		pp = probes + j;
+		for (i = 0; i < pp->found; i++) {
+			/* Get an unused new event name */
+			get_new_event_name(event, 64, pp->function, namelist);
+			snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s\n",
+				 pp->retprobe ? 'r' : 'p',
+				 PERFPROBE_GROUP, event,
+				 pp->probes[i]);
+			write_trace_kprobe_event(fd, buf);
+			/* Add added event name to namelist */
+			strlist__add(namelist, event);
+		}
+	}
+	close(fd);
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
new file mode 100644
index 0000000..0c6fe56
--- /dev/null
+++ b/tools/perf/util/probe-event.h
@@ -0,0 +1,18 @@
+#ifndef _PROBE_EVENT_H
+#define _PROBE_EVENT_H
+
+#include "probe-finder.h"
+#include "strlist.h"
+
+extern int parse_perf_probe_event(const char *str, struct probe_point *pp);
+extern int synthesize_perf_probe_event(struct probe_point *pp);
+extern void parse_trace_kprobe_event(const char *str, char **group,
+				     char **event, struct probe_point *pp);
+extern int synthesize_trace_kprobe_event(struct probe_point *pp);
+extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes);
+extern void show_perf_probe_events(void);
+
+/* Maximum index number of event-name postfix */
+#define MAX_EVENT_INDEX	1024
+
+#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
new file mode 100644
index 0000000..293cdfc
--- /dev/null
+++ b/tools/perf/util/probe-finder.c
@@ -0,0 +1,732 @@
+/*
+ * probe-finder.c : C expression to kprobe event converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+
+#include "event.h"
+#include "debug.h"
+#include "util.h"
+#include "probe-finder.h"
+
+
+/* Dwarf_Die Linkage to parent Die */
+struct die_link {
+	struct die_link *parent;	/* Parent die */
+	Dwarf_Die die;			/* Current die */
+};
+
+static Dwarf_Debug __dw_debug;
+static Dwarf_Error __dw_error;
+
+/*
+ * Generic dwarf analysis helpers
+ */
+
+#define X86_32_MAX_REGS 8
+const char *x86_32_regs_table[X86_32_MAX_REGS] = {
+	"%ax",
+	"%cx",
+	"%dx",
+	"%bx",
+	"$stack",	/* Stack address instead of %sp */
+	"%bp",
+	"%si",
+	"%di",
+};
+
+#define X86_64_MAX_REGS 16
+const char *x86_64_regs_table[X86_64_MAX_REGS] = {
+	"%ax",
+	"%dx",
+	"%cx",
+	"%bx",
+	"%si",
+	"%di",
+	"%bp",
+	"%sp",
+	"%r8",
+	"%r9",
+	"%r10",
+	"%r11",
+	"%r12",
+	"%r13",
+	"%r14",
+	"%r15",
+};
+
+/* TODO: switching by dwarf address size */
+#ifdef __x86_64__
+#define ARCH_MAX_REGS X86_64_MAX_REGS
+#define arch_regs_table x86_64_regs_table
+#else
+#define ARCH_MAX_REGS X86_32_MAX_REGS
+#define arch_regs_table x86_32_regs_table
+#endif
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+static const char *get_arch_regstr(unsigned int n)
+{
+	return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL;
+}
+
+/*
+ * Compare the tail of two strings.
+ * Return 0 if whole of either string is same as another's tail part.
+ */
+static int strtailcmp(const char *s1, const char *s2)
+{
+	int i1 = strlen(s1);
+	int i2 = strlen(s2);
+	while (--i1 > 0 && --i2 > 0) {
+		if (s1[i1] != s2[i2])
+			return s1[i1] - s2[i2];
+	}
+	return 0;
+}
+
+/* Find the fileno of the target file. */
+static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname)
+{
+	Dwarf_Signed cnt, i;
+	Dwarf_Unsigned found = 0;
+	char **srcs;
+	int ret;
+
+	if (!fname)
+		return 0;
+
+	ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
+	if (ret == DW_DLV_OK) {
+		for (i = 0; i < cnt && !found; i++) {
+			if (strtailcmp(srcs[i], fname) == 0)
+				found = i + 1;
+			dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
+		}
+		for (; i < cnt; i++)
+			dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
+		dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
+	}
+	if (found)
+		pr_debug("found fno: %d\n", (int)found);
+	return found;
+}
+
+/* Compare diename and tname */
+static int die_compare_name(Dwarf_Die dw_die, const char *tname)
+{
+	char *name;
+	int ret;
+	ret = dwarf_diename(dw_die, &name, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	if (ret == DW_DLV_OK) {
+		ret = strcmp(tname, name);
+		dwarf_dealloc(__dw_debug, name, DW_DLA_STRING);
+	} else
+		ret = -1;
+	return ret;
+}
+
+/* Check the address is in the subprogram(function). */
+static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr,
+				 Dwarf_Signed *offs)
+{
+	Dwarf_Addr lopc, hipc;
+	int ret;
+
+	/* TODO: check ranges */
+	ret = dwarf_lowpc(sp_die, &lopc, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	if (ret == DW_DLV_NO_ENTRY)
+		return 0;
+	ret = dwarf_highpc(sp_die, &hipc, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	if (lopc <= addr && addr < hipc) {
+		*offs = addr - lopc;
+		return 1;
+	} else
+		return 0;
+}
+
+/* Check the die is inlined function */
+static Dwarf_Bool die_inlined_subprogram(Dwarf_Die dw_die)
+{
+	/* TODO: check strictly */
+	Dwarf_Bool inl;
+	int ret;
+
+	ret = dwarf_hasattr(dw_die, DW_AT_inline, &inl, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	return inl;
+}
+
+/* Get the offset of abstruct_origin */
+static Dwarf_Off die_get_abstract_origin(Dwarf_Die dw_die)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Off cu_offs;
+	int ret;
+
+	ret = dwarf_attr(dw_die, DW_AT_abstract_origin, &attr, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	ret = dwarf_formref(attr, &cu_offs, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
+	return cu_offs;
+}
+
+/* Get entry pc(or low pc, 1st entry of ranges)  of the die */
+static Dwarf_Addr die_get_entrypc(Dwarf_Die dw_die)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Addr addr;
+	Dwarf_Off offs;
+	Dwarf_Ranges *ranges;
+	Dwarf_Signed cnt;
+	int ret;
+
+	/* Try to get entry pc */
+	ret = dwarf_attr(dw_die, DW_AT_entry_pc, &attr, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	if (ret == DW_DLV_OK) {
+		ret = dwarf_formaddr(attr, &addr, &__dw_error);
+		DIE_IF(ret != DW_DLV_OK);
+		dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
+		return addr;
+	}
+
+	/* Try to get low pc */
+	ret = dwarf_lowpc(dw_die, &addr, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	if (ret == DW_DLV_OK)
+		return addr;
+
+	/* Try to get ranges */
+	ret = dwarf_attr(dw_die, DW_AT_ranges, &attr, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	ret = dwarf_formref(attr, &offs, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	ret = dwarf_get_ranges(__dw_debug, offs, &ranges, &cnt, NULL,
+				&__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	addr = ranges[0].dwr_addr1;
+	dwarf_ranges_dealloc(__dw_debug, ranges, cnt);
+	return addr;
+}
+
+/*
+ * Search a Die from Die tree.
+ * Note: cur_link->die should be deallocated in this function.
+ */
+static int __search_die_tree(struct die_link *cur_link,
+			     int (*die_cb)(struct die_link *, void *),
+			     void *data)
+{
+	Dwarf_Die new_die;
+	struct die_link new_link;
+	int ret;
+
+	if (!die_cb)
+		return 0;
+
+	/* Check current die */
+	while (!(ret = die_cb(cur_link, data))) {
+		/* Check child die */
+		ret = dwarf_child(cur_link->die, &new_die, &__dw_error);
+		DIE_IF(ret == DW_DLV_ERROR);
+		if (ret == DW_DLV_OK) {
+			new_link.parent = cur_link;
+			new_link.die = new_die;
+			ret = __search_die_tree(&new_link, die_cb, data);
+			if (ret)
+				break;
+		}
+
+		/* Move to next sibling */
+		ret = dwarf_siblingof(__dw_debug, cur_link->die, &new_die,
+				      &__dw_error);
+		DIE_IF(ret == DW_DLV_ERROR);
+		dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE);
+		cur_link->die = new_die;
+		if (ret == DW_DLV_NO_ENTRY)
+			return 0;
+	}
+	dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE);
+	return ret;
+}
+
+/* Search a die in its children's die tree */
+static int search_die_from_children(Dwarf_Die parent_die,
+				    int (*die_cb)(struct die_link *, void *),
+				    void *data)
+{
+	struct die_link new_link;
+	int ret;
+
+	new_link.parent = NULL;
+	ret = dwarf_child(parent_die, &new_link.die, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	if (ret == DW_DLV_OK)
+		return __search_die_tree(&new_link, die_cb, data);
+	else
+		return 0;
+}
+
+/* Find a locdesc corresponding to the address */
+static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc,
+			    Dwarf_Addr addr)
+{
+	Dwarf_Signed lcnt;
+	Dwarf_Locdesc **llbuf;
+	int ret, i;
+
+	ret = dwarf_loclist_n(attr, &llbuf, &lcnt, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	ret = DW_DLV_NO_ENTRY;
+	for (i = 0; i < lcnt; ++i) {
+		if (llbuf[i]->ld_lopc <= addr &&
+		    llbuf[i]->ld_hipc > addr) {
+			memcpy(desc, llbuf[i], sizeof(Dwarf_Locdesc));
+			desc->ld_s =
+				malloc(sizeof(Dwarf_Loc) * llbuf[i]->ld_cents);
+			DIE_IF(desc->ld_s == NULL);
+			memcpy(desc->ld_s, llbuf[i]->ld_s,
+				sizeof(Dwarf_Loc) * llbuf[i]->ld_cents);
+			ret = DW_DLV_OK;
+			break;
+		}
+		dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK);
+		dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC);
+	}
+	/* Releasing loop */
+	for (; i < lcnt; ++i) {
+		dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK);
+		dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC);
+	}
+	dwarf_dealloc(__dw_debug, llbuf, DW_DLA_LIST);
+	return ret;
+}
+
+/* Get decl_file attribute value (file number) */
+static Dwarf_Unsigned die_get_decl_file(Dwarf_Die sp_die)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Unsigned fno;
+	int ret;
+
+	ret = dwarf_attr(sp_die, DW_AT_decl_file, &attr, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	dwarf_formudata(attr, &fno, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
+	return fno;
+}
+
+/* Get decl_line attribute value (line number) */
+static Dwarf_Unsigned die_get_decl_line(Dwarf_Die sp_die)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Unsigned lno;
+	int ret;
+
+	ret = dwarf_attr(sp_die, DW_AT_decl_line, &attr, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	dwarf_formudata(attr, &lno, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
+	return lno;
+}
+
+/*
+ * Probe finder related functions
+ */
+
+/* Show a location */
+static void show_location(Dwarf_Loc *loc, struct probe_finder *pf)
+{
+	Dwarf_Small op;
+	Dwarf_Unsigned regn;
+	Dwarf_Signed offs;
+	int deref = 0, ret;
+	const char *regs;
+
+	op = loc->lr_atom;
+
+	/* If this is based on frame buffer, set the offset */
+	if (op == DW_OP_fbreg) {
+		deref = 1;
+		offs = (Dwarf_Signed)loc->lr_number;
+		op = pf->fbloc.ld_s[0].lr_atom;
+		loc = &pf->fbloc.ld_s[0];
+	} else
+		offs = 0;
+
+	if (op >= DW_OP_breg0 && op <= DW_OP_breg31) {
+		regn = op - DW_OP_breg0;
+		offs += (Dwarf_Signed)loc->lr_number;
+		deref = 1;
+	} else if (op >= DW_OP_reg0 && op <= DW_OP_reg31) {
+		regn = op - DW_OP_reg0;
+	} else if (op == DW_OP_bregx) {
+		regn = loc->lr_number;
+		offs += (Dwarf_Signed)loc->lr_number2;
+		deref = 1;
+	} else if (op == DW_OP_regx) {
+		regn = loc->lr_number;
+	} else
+		die("Dwarf_OP %d is not supported.\n", op);
+
+	regs = get_arch_regstr(regn);
+	if (!regs)
+		die("%lld exceeds max register number.\n", regn);
+
+	if (deref)
+		ret = snprintf(pf->buf, pf->len,
+				 " %s=%+lld(%s)", pf->var, offs, regs);
+	else
+		ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs);
+	DIE_IF(ret < 0);
+	DIE_IF(ret >= pf->len);
+}
+
+/* Show a variables in kprobe event format */
+static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Locdesc ld;
+	int ret;
+
+	ret = dwarf_attr(vr_die, DW_AT_location, &attr, &__dw_error);
+	if (ret != DW_DLV_OK)
+		goto error;
+	ret = attr_get_locdesc(attr, &ld, (pf->addr - pf->cu_base));
+	if (ret != DW_DLV_OK)
+		goto error;
+	/* TODO? */
+	DIE_IF(ld.ld_cents != 1);
+	show_location(&ld.ld_s[0], pf);
+	free(ld.ld_s);
+	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
+	return ;
+error:
+	die("Failed to find the location of %s at this address.\n"
+	    " Perhaps, it has been optimized out.\n", pf->var);
+}
+
+static int variable_callback(struct die_link *dlink, void *data)
+{
+	struct probe_finder *pf = (struct probe_finder *)data;
+	Dwarf_Half tag;
+	int ret;
+
+	ret = dwarf_tag(dlink->die, &tag, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	if ((tag == DW_TAG_formal_parameter ||
+	     tag == DW_TAG_variable) &&
+	    (die_compare_name(dlink->die, pf->var) == 0)) {
+		show_variable(dlink->die, pf);
+		return 1;
+	}
+	/* TODO: Support struct members and arrays */
+	return 0;
+}
+
+/* Find a variable in a subprogram die */
+static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf)
+{
+	int ret;
+
+	if (!is_c_varname(pf->var)) {
+		/* Output raw parameters */
+		ret = snprintf(pf->buf, pf->len, " %s", pf->var);
+		DIE_IF(ret < 0);
+		DIE_IF(ret >= pf->len);
+		return ;
+	}
+
+	pr_debug("Searching '%s' variable in context.\n", pf->var);
+	/* Search child die for local variables and parameters. */
+	ret = search_die_from_children(sp_die, variable_callback, pf);
+	if (!ret)
+		die("Failed to find '%s' in this function.\n", pf->var);
+}
+
+/* Get a frame base on the address */
+static void get_current_frame_base(Dwarf_Die sp_die, struct probe_finder *pf)
+{
+	Dwarf_Attribute attr;
+	int ret;
+
+	ret = dwarf_attr(sp_die, DW_AT_frame_base, &attr, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	ret = attr_get_locdesc(attr, &pf->fbloc, (pf->addr - pf->cu_base));
+	DIE_IF(ret != DW_DLV_OK);
+	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
+}
+
+static void free_current_frame_base(struct probe_finder *pf)
+{
+	free(pf->fbloc.ld_s);
+	memset(&pf->fbloc, 0, sizeof(Dwarf_Locdesc));
+}
+
+/* Show a probe point to output buffer */
+static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs,
+			    struct probe_finder *pf)
+{
+	struct probe_point *pp = pf->pp;
+	char *name;
+	char tmp[MAX_PROBE_BUFFER];
+	int ret, i, len;
+
+	/* Output name of probe point */
+	ret = dwarf_diename(sp_die, &name, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	if (ret == DW_DLV_OK) {
+		ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name,
+				(unsigned int)offs);
+		/* Copy the function name if possible */
+		if (!pp->function) {
+			pp->function = strdup(name);
+			pp->offset = offs;
+		}
+		dwarf_dealloc(__dw_debug, name, DW_DLA_STRING);
+	} else {
+		/* This function has no name. */
+		ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr);
+		if (!pp->function) {
+			/* TODO: Use _stext */
+			pp->function = strdup("");
+			pp->offset = (int)pf->addr;
+		}
+	}
+	DIE_IF(ret < 0);
+	DIE_IF(ret >= MAX_PROBE_BUFFER);
+	len = ret;
+	pr_debug("Probe point found: %s\n", tmp);
+
+	/* Find each argument */
+	get_current_frame_base(sp_die, pf);
+	for (i = 0; i < pp->nr_args; i++) {
+		pf->var = pp->args[i];
+		pf->buf = &tmp[len];
+		pf->len = MAX_PROBE_BUFFER - len;
+		find_variable(sp_die, pf);
+		len += strlen(pf->buf);
+	}
+	free_current_frame_base(pf);
+
+	pp->probes[pp->found] = strdup(tmp);
+	pp->found++;
+}
+
+static int probeaddr_callback(struct die_link *dlink, void *data)
+{
+	struct probe_finder *pf = (struct probe_finder *)data;
+	Dwarf_Half tag;
+	Dwarf_Signed offs;
+	int ret;
+
+	ret = dwarf_tag(dlink->die, &tag, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	/* Check the address is in this subprogram */
+	if (tag == DW_TAG_subprogram &&
+	    die_within_subprogram(dlink->die, pf->addr, &offs)) {
+		show_probepoint(dlink->die, offs, pf);
+		return 1;
+	}
+	return 0;
+}
+
+/* Find probe point from its line number */
+static void find_by_line(struct probe_finder *pf)
+{
+	Dwarf_Signed cnt, i, clm;
+	Dwarf_Line *lines;
+	Dwarf_Unsigned lineno = 0;
+	Dwarf_Addr addr;
+	Dwarf_Unsigned fno;
+	int ret;
+
+	ret = dwarf_srclines(pf->cu_die, &lines, &cnt, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+
+	for (i = 0; i < cnt; i++) {
+		ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
+		DIE_IF(ret != DW_DLV_OK);
+		if (fno != pf->fno)
+			continue;
+
+		ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
+		DIE_IF(ret != DW_DLV_OK);
+		if (lineno != pf->lno)
+			continue;
+
+		ret = dwarf_lineoff(lines[i], &clm, &__dw_error);
+		DIE_IF(ret != DW_DLV_OK);
+
+		ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
+		DIE_IF(ret != DW_DLV_OK);
+		pr_debug("Probe line found: line[%d]:%u,%d addr:0x%llx\n",
+			 (int)i, (unsigned)lineno, (int)clm, addr);
+		pf->addr = addr;
+		/* Search a real subprogram including this line, */
+		ret = search_die_from_children(pf->cu_die,
+					       probeaddr_callback, pf);
+		if (ret == 0)
+			die("Probe point is not found in subprograms.\n");
+		/* Continuing, because target line might be inlined. */
+	}
+	dwarf_srclines_dealloc(__dw_debug, lines, cnt);
+}
+
+/* Search function from function name */
+static int probefunc_callback(struct die_link *dlink, void *data)
+{
+	struct probe_finder *pf = (struct probe_finder *)data;
+	struct probe_point *pp = pf->pp;
+	struct die_link *lk;
+	Dwarf_Signed offs;
+	Dwarf_Half tag;
+	int ret;
+
+	ret = dwarf_tag(dlink->die, &tag, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	if (tag == DW_TAG_subprogram) {
+		if (die_compare_name(dlink->die, pp->function) == 0) {
+			if (pp->line) {	/* Function relative line */
+				pf->fno = die_get_decl_file(dlink->die);
+				pf->lno = die_get_decl_line(dlink->die)
+					 + pp->line;
+				find_by_line(pf);
+				return 1;
+			}
+			if (die_inlined_subprogram(dlink->die)) {
+				/* Inlined function, save it. */
+				ret = dwarf_die_CU_offset(dlink->die,
+							  &pf->inl_offs,
+							  &__dw_error);
+				DIE_IF(ret != DW_DLV_OK);
+				pr_debug("inline definition offset %lld\n",
+					 pf->inl_offs);
+				return 0;	/* Continue to search */
+			}
+			/* Get probe address */
+			pf->addr = die_get_entrypc(dlink->die);
+			pf->addr += pp->offset;
+			/* TODO: Check the address in this function */
+			show_probepoint(dlink->die, pp->offset, pf);
+			return 1; /* Exit; no same symbol in this CU. */
+		}
+	} else if (tag == DW_TAG_inlined_subroutine && pf->inl_offs) {
+		if (die_get_abstract_origin(dlink->die) == pf->inl_offs) {
+			/* Get probe address */
+			pf->addr = die_get_entrypc(dlink->die);
+			pf->addr += pp->offset;
+			pr_debug("found inline addr: 0x%llx\n", pf->addr);
+			/* Inlined function. Get a real subprogram */
+			for (lk = dlink->parent; lk != NULL; lk = lk->parent) {
+				tag = 0;
+				dwarf_tag(lk->die, &tag, &__dw_error);
+				DIE_IF(ret == DW_DLV_ERROR);
+				if (tag == DW_TAG_subprogram &&
+				    !die_inlined_subprogram(lk->die))
+					goto found;
+			}
+			die("Failed to find real subprogram.\n");
+found:
+			/* Get offset from subprogram */
+			ret = die_within_subprogram(lk->die, pf->addr, &offs);
+			DIE_IF(!ret);
+			show_probepoint(lk->die, offs, pf);
+			/* Continue to search */
+		}
+	}
+	return 0;
+}
+
+static void find_by_func(struct probe_finder *pf)
+{
+	search_die_from_children(pf->cu_die, probefunc_callback, pf);
+}
+
+/* Find a probe point */
+int find_probepoint(int fd, struct probe_point *pp)
+{
+	Dwarf_Half addr_size = 0;
+	Dwarf_Unsigned next_cuh = 0;
+	int cu_number = 0, ret;
+	struct probe_finder pf = {.pp = pp};
+
+	ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
+	if (ret != DW_DLV_OK) {
+		pr_warning("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO.\n");
+		return -ENOENT;
+	}
+
+	pp->found = 0;
+	while (++cu_number) {
+		/* Search CU (Compilation Unit) */
+		ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
+			&addr_size, &next_cuh, &__dw_error);
+		DIE_IF(ret == DW_DLV_ERROR);
+		if (ret == DW_DLV_NO_ENTRY)
+			break;
+
+		/* Get the DIE(Debugging Information Entry) of this CU */
+		ret = dwarf_siblingof(__dw_debug, 0, &pf.cu_die, &__dw_error);
+		DIE_IF(ret != DW_DLV_OK);
+
+		/* Check if target file is included. */
+		if (pp->file)
+			pf.fno = cu_find_fileno(pf.cu_die, pp->file);
+
+		if (!pp->file || pf.fno) {
+			/* Save CU base address (for frame_base) */
+			ret = dwarf_lowpc(pf.cu_die, &pf.cu_base, &__dw_error);
+			DIE_IF(ret == DW_DLV_ERROR);
+			if (ret == DW_DLV_NO_ENTRY)
+				pf.cu_base = 0;
+			if (pp->function)
+				find_by_func(&pf);
+			else {
+				pf.lno = pp->line;
+				find_by_line(&pf);
+			}
+		}
+		dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
+	}
+	ret = dwarf_finish(__dw_debug, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+
+	return pp->found;
+}
+
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
new file mode 100644
index 0000000..bdebca6
--- /dev/null
+++ b/tools/perf/util/probe-finder.h
@@ -0,0 +1,57 @@
+#ifndef _PROBE_FINDER_H
+#define _PROBE_FINDER_H
+
+#define MAX_PATH_LEN 256
+#define MAX_PROBE_BUFFER 1024
+#define MAX_PROBES 128
+
+static inline int is_c_varname(const char *name)
+{
+	/* TODO */
+	return isalpha(name[0]) || name[0] == '_';
+}
+
+struct probe_point {
+	/* Inputs */
+	char	*file;		/* File name */
+	int	line;		/* Line number */
+
+	char	*function;	/* Function name */
+	int	offset;		/* Offset bytes */
+
+	int	nr_args;	/* Number of arguments */
+	char	**args;		/* Arguments */
+
+	int	retprobe;	/* Return probe */
+
+	/* Output */
+	int	found;		/* Number of found probe points */
+	char	*probes[MAX_PROBES];	/* Output buffers (will be allocated)*/
+};
+
+#ifndef NO_LIBDWARF
+extern int find_probepoint(int fd, struct probe_point *pp);
+
+#include <libdwarf/dwarf.h>
+#include <libdwarf/libdwarf.h>
+
+struct probe_finder {
+	struct probe_point	*pp;	/* Target probe point */
+
+	/* For function searching */
+	Dwarf_Addr	addr;		/* Address */
+	Dwarf_Unsigned	fno;		/* File number */
+	Dwarf_Unsigned	lno;		/* Line number */
+	Dwarf_Off	inl_offs;	/* Inline offset */
+	Dwarf_Die	cu_die;		/* Current CU */
+
+	/* For variable searching */
+	Dwarf_Addr	cu_base;	/* Current CU base address */
+	Dwarf_Locdesc	fbloc;		/* Location of Current Frame Base */
+	const char	*var;		/* Current variable name */
+	char		*buf;		/* Current output buffer */
+	int		len;		/* Length of output buffer */
+};
+#endif /* NO_LIBDWARF */
+
+#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
index a5454a1..b6a0197 100644
--- a/tools/perf/util/quote.h
+++ b/tools/perf/util/quote.h
@@ -1,5 +1,5 @@
-#ifndef QUOTE_H
-#define QUOTE_H
+#ifndef __PERF_QUOTE_H
+#define __PERF_QUOTE_H
 
 #include <stddef.h>
 #include <stdio.h>
@@ -65,4 +65,4 @@
 extern void python_quote_print(FILE *stream, const char *src);
 extern void tcl_quote_print(FILE *stream, const char *src);
 
-#endif
+#endif /* __PERF_QUOTE_H */
diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h
index cc1837d..d790287 100644
--- a/tools/perf/util/run-command.h
+++ b/tools/perf/util/run-command.h
@@ -1,5 +1,5 @@
-#ifndef RUN_COMMAND_H
-#define RUN_COMMAND_H
+#ifndef __PERF_RUN_COMMAND_H
+#define __PERF_RUN_COMMAND_H
 
 enum {
 	ERR_RUN_COMMAND_FORK = 10000,
@@ -85,4 +85,4 @@
 int start_async(struct async *async);
 int finish_async(struct async *async);
 
-#endif
+#endif /* __PERF_RUN_COMMAND_H */
diff --git a/tools/perf/util/sigchain.h b/tools/perf/util/sigchain.h
index 618083bc..1a53c11 100644
--- a/tools/perf/util/sigchain.h
+++ b/tools/perf/util/sigchain.h
@@ -1,5 +1,5 @@
-#ifndef SIGCHAIN_H
-#define SIGCHAIN_H
+#ifndef __PERF_SIGCHAIN_H
+#define __PERF_SIGCHAIN_H
 
 typedef void (*sigchain_fun)(int);
 
@@ -8,4 +8,4 @@
 
 void sigchain_push_common(sigchain_fun f);
 
-#endif /* SIGCHAIN_H */
+#endif /* __PERF_SIGCHAIN_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
new file mode 100644
index 0000000..b490354
--- /dev/null
+++ b/tools/perf/util/sort.c
@@ -0,0 +1,290 @@
+#include "sort.h"
+
+regex_t		parent_regex;
+char		default_parent_pattern[] = "^sys_|^do_page_fault";
+char		*parent_pattern = default_parent_pattern;
+char		default_sort_order[] = "comm,dso,symbol";
+char		*sort_order = default_sort_order;
+int		sort__need_collapse = 0;
+int		sort__has_parent = 0;
+
+enum sort_type	sort__first_dimension;
+
+unsigned int dsos__col_width;
+unsigned int comms__col_width;
+unsigned int threads__col_width;
+static unsigned int parent_symbol__col_width;
+char * field_sep;
+
+LIST_HEAD(hist_entry__sort_list);
+
+struct sort_entry sort_thread = {
+	.header = "Command:  Pid",
+	.cmp	= sort__thread_cmp,
+	.print	= sort__thread_print,
+	.width	= &threads__col_width,
+};
+
+struct sort_entry sort_comm = {
+	.header		= "Command",
+	.cmp		= sort__comm_cmp,
+	.collapse	= sort__comm_collapse,
+	.print		= sort__comm_print,
+	.width		= &comms__col_width,
+};
+
+struct sort_entry sort_dso = {
+	.header = "Shared Object",
+	.cmp	= sort__dso_cmp,
+	.print	= sort__dso_print,
+	.width	= &dsos__col_width,
+};
+
+struct sort_entry sort_sym = {
+	.header = "Symbol",
+	.cmp	= sort__sym_cmp,
+	.print	= sort__sym_print,
+};
+
+struct sort_entry sort_parent = {
+	.header = "Parent symbol",
+	.cmp	= sort__parent_cmp,
+	.print	= sort__parent_print,
+	.width	= &parent_symbol__col_width,
+};
+
+struct sort_dimension {
+	const char		*name;
+	struct sort_entry	*entry;
+	int			taken;
+};
+
+static struct sort_dimension sort_dimensions[] = {
+	{ .name = "pid",	.entry = &sort_thread,	},
+	{ .name = "comm",	.entry = &sort_comm,	},
+	{ .name = "dso",	.entry = &sort_dso,	},
+	{ .name = "symbol",	.entry = &sort_sym,	},
+	{ .name = "parent",	.entry = &sort_parent,	},
+};
+
+int64_t cmp_null(void *l, void *r)
+{
+	if (!l && !r)
+		return 0;
+	else if (!l)
+		return -1;
+	else
+		return 1;
+}
+
+/* --sort pid */
+
+int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+int repsep_fprintf(FILE *fp, const char *fmt, ...)
+{
+	int n;
+	va_list ap;
+
+	va_start(ap, fmt);
+	if (!field_sep)
+		n = vfprintf(fp, fmt, ap);
+	else {
+		char *bf = NULL;
+		n = vasprintf(&bf, fmt, ap);
+		if (n > 0) {
+			char *sep = bf;
+
+			while (1) {
+				sep = strchr(sep, *field_sep);
+				if (sep == NULL)
+					break;
+				*sep = '.';
+			}
+		}
+		fputs(bf, fp);
+		free(bf);
+	}
+	va_end(ap);
+	return n;
+}
+
+size_t
+sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width)
+{
+	return repsep_fprintf(fp, "%*s:%5d", width - 6,
+			      self->thread->comm ?: "", self->thread->pid);
+}
+
+size_t
+sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width)
+{
+	return repsep_fprintf(fp, "%*s", width, self->thread->comm);
+}
+
+/* --sort dso */
+
+int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct dso *dso_l = left->map ? left->map->dso : NULL;
+	struct dso *dso_r = right->map ? right->map->dso : NULL;
+	const char *dso_name_l, *dso_name_r;
+
+	if (!dso_l || !dso_r)
+		return cmp_null(dso_l, dso_r);
+
+	if (verbose) {
+		dso_name_l = dso_l->long_name;
+		dso_name_r = dso_r->long_name;
+	} else {
+		dso_name_l = dso_l->short_name;
+		dso_name_r = dso_r->short_name;
+	}
+
+	return strcmp(dso_name_l, dso_name_r);
+}
+
+size_t
+sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width)
+{
+	if (self->map && self->map->dso) {
+		const char *dso_name = !verbose ? self->map->dso->short_name :
+						  self->map->dso->long_name;
+		return repsep_fprintf(fp, "%-*s", width, dso_name);
+	}
+
+	return repsep_fprintf(fp, "%*llx", width, (u64)self->ip);
+}
+
+/* --sort symbol */
+
+int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	u64 ip_l, ip_r;
+
+	if (left->sym == right->sym)
+		return 0;
+
+	ip_l = left->sym ? left->sym->start : left->ip;
+	ip_r = right->sym ? right->sym->start : right->ip;
+
+	return (int64_t)(ip_r - ip_l);
+}
+
+
+size_t
+sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
+{
+	size_t ret = 0;
+
+	if (verbose) {
+		char o = self->map ? dso__symtab_origin(self->map->dso) : '!';
+		ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, o);
+	}
+
+	ret += repsep_fprintf(fp, "[%c] ", self->level);
+	if (self->sym)
+		ret += repsep_fprintf(fp, "%s", self->sym->name);
+	else
+		ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip);
+
+	return ret;
+}
+
+/* --sort comm */
+
+int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	char *comm_l = left->thread->comm;
+	char *comm_r = right->thread->comm;
+
+	if (!comm_l || !comm_r)
+		return cmp_null(comm_l, comm_r);
+
+	return strcmp(comm_l, comm_r);
+}
+
+/* --sort parent */
+
+int64_t
+sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct symbol *sym_l = left->parent;
+	struct symbol *sym_r = right->parent;
+
+	if (!sym_l || !sym_r)
+		return cmp_null(sym_l, sym_r);
+
+	return strcmp(sym_l->name, sym_r->name);
+}
+
+size_t
+sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width)
+{
+	return repsep_fprintf(fp, "%-*s", width,
+			      self->parent ? self->parent->name : "[other]");
+}
+
+int sort_dimension__add(const char *tok)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
+		struct sort_dimension *sd = &sort_dimensions[i];
+
+		if (sd->taken)
+			continue;
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sd->entry->collapse)
+			sort__need_collapse = 1;
+
+		if (sd->entry == &sort_parent) {
+			int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
+			if (ret) {
+				char err[BUFSIZ];
+
+				regerror(ret, &parent_regex, err, sizeof(err));
+				fprintf(stderr, "Invalid regex: %s\n%s",
+					parent_pattern, err);
+				exit(-1);
+			}
+			sort__has_parent = 1;
+		}
+
+		if (list_empty(&hist_entry__sort_list)) {
+			if (!strcmp(sd->name, "pid"))
+				sort__first_dimension = SORT_PID;
+			else if (!strcmp(sd->name, "comm"))
+				sort__first_dimension = SORT_COMM;
+			else if (!strcmp(sd->name, "dso"))
+				sort__first_dimension = SORT_DSO;
+			else if (!strcmp(sd->name, "symbol"))
+				sort__first_dimension = SORT_SYM;
+			else if (!strcmp(sd->name, "parent"))
+				sort__first_dimension = SORT_PARENT;
+		}
+
+		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
+		sd->taken = 1;
+
+		return 0;
+	}
+
+	return -ESRCH;
+}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
new file mode 100644
index 0000000..333e664
--- /dev/null
+++ b/tools/perf/util/sort.h
@@ -0,0 +1,99 @@
+#ifndef __PERF_SORT_H
+#define __PERF_SORT_H
+#include "../builtin.h"
+
+#include "util.h"
+
+#include "color.h"
+#include <linux/list.h>
+#include "cache.h"
+#include <linux/rbtree.h>
+#include "symbol.h"
+#include "string.h"
+#include "callchain.h"
+#include "strlist.h"
+#include "values.h"
+
+#include "../perf.h"
+#include "debug.h"
+#include "header.h"
+
+#include "parse-options.h"
+#include "parse-events.h"
+
+#include "thread.h"
+#include "sort.h"
+
+extern regex_t parent_regex;
+extern char *sort_order;
+extern char default_parent_pattern[];
+extern char *parent_pattern;
+extern char default_sort_order[];
+extern int sort__need_collapse;
+extern int sort__has_parent;
+extern char *field_sep;
+extern struct sort_entry sort_comm;
+extern struct sort_entry sort_dso;
+extern struct sort_entry sort_sym;
+extern struct sort_entry sort_parent;
+extern unsigned int dsos__col_width;
+extern unsigned int comms__col_width;
+extern unsigned int threads__col_width;
+extern enum sort_type sort__first_dimension;
+
+struct hist_entry {
+	struct rb_node		rb_node;
+	u64			count;
+	struct thread		*thread;
+	struct map		*map;
+	struct symbol		*sym;
+	u64			ip;
+	char			level;
+	struct symbol		*parent;
+	struct callchain_node	callchain;
+	struct rb_root		sorted_chain;
+};
+
+enum sort_type {
+	SORT_PID,
+	SORT_COMM,
+	SORT_DSO,
+	SORT_SYM,
+	SORT_PARENT
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+	struct list_head list;
+
+	const char *header;
+
+	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
+	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
+	size_t	(*print)(FILE *fp, struct hist_entry *, unsigned int width);
+	unsigned int *width;
+	bool	elide;
+};
+
+extern struct sort_entry sort_thread;
+extern struct list_head hist_entry__sort_list;
+
+extern int repsep_fprintf(FILE *fp, const char *fmt, ...);
+extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int);
+extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int);
+extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int);
+extern size_t sort__sym_print(FILE *, struct hist_entry *, unsigned int __used);
+extern int64_t cmp_null(void *, void *);
+extern int64_t sort__thread_cmp(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__comm_cmp(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *);
+extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);
+extern int sort_dimension__add(const char *);
+
+#endif	/* __PERF_SORT_H */
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index d2aa86c..a3d121d 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -1,5 +1,5 @@
-#ifndef STRBUF_H
-#define STRBUF_H
+#ifndef __PERF_STRBUF_H
+#define __PERF_STRBUF_H
 
 /*
  * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
@@ -134,4 +134,4 @@
 extern int strbuf_branchname(struct strbuf *sb, const char *name);
 extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
 
-#endif /* STRBUF_H */
+#endif /* __PERF_STRBUF_H */
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index c93eca9..f24a8cc 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -1,4 +1,5 @@
 #include "string.h"
+#include "util.h"
 
 static int hex(char ch)
 {
@@ -32,3 +33,196 @@
 
 	return p - ptr;
 }
+
+char *strxfrchar(char *s, char from, char to)
+{
+	char *p = s;
+
+	while ((p = strchr(p, from)) != NULL)
+		*p++ = to;
+
+	return s;
+}
+
+#define K 1024LL
+/*
+ * perf_atoll()
+ * Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB")
+ * and return its numeric value
+ */
+s64 perf_atoll(const char *str)
+{
+	unsigned int i;
+	s64 length = -1, unit = 1;
+
+	if (!isdigit(str[0]))
+		goto out_err;
+
+	for (i = 1; i < strlen(str); i++) {
+		switch (str[i]) {
+		case 'B':
+		case 'b':
+			break;
+		case 'K':
+			if (str[i + 1] != 'B')
+				goto out_err;
+			else
+				goto kilo;
+		case 'k':
+			if (str[i + 1] != 'b')
+				goto out_err;
+kilo:
+			unit = K;
+			break;
+		case 'M':
+			if (str[i + 1] != 'B')
+				goto out_err;
+			else
+				goto mega;
+		case 'm':
+			if (str[i + 1] != 'b')
+				goto out_err;
+mega:
+			unit = K * K;
+			break;
+		case 'G':
+			if (str[i + 1] != 'B')
+				goto out_err;
+			else
+				goto giga;
+		case 'g':
+			if (str[i + 1] != 'b')
+				goto out_err;
+giga:
+			unit = K * K * K;
+			break;
+		case 'T':
+			if (str[i + 1] != 'B')
+				goto out_err;
+			else
+				goto tera;
+		case 't':
+			if (str[i + 1] != 'b')
+				goto out_err;
+tera:
+			unit = K * K * K * K;
+			break;
+		case '\0':	/* only specified figures */
+			unit = 1;
+			break;
+		default:
+			if (!isdigit(str[i]))
+				goto out_err;
+			break;
+		}
+	}
+
+	length = atoll(str) * unit;
+	goto out;
+
+out_err:
+	length = -1;
+out:
+	return length;
+}
+
+/*
+ * Helper function for splitting a string into an argv-like array.
+ * originaly copied from lib/argv_split.c
+ */
+static const char *skip_sep(const char *cp)
+{
+	while (*cp && isspace(*cp))
+		cp++;
+
+	return cp;
+}
+
+static const char *skip_arg(const char *cp)
+{
+	while (*cp && !isspace(*cp))
+		cp++;
+
+	return cp;
+}
+
+static int count_argc(const char *str)
+{
+	int count = 0;
+
+	while (*str) {
+		str = skip_sep(str);
+		if (*str) {
+			count++;
+			str = skip_arg(str);
+		}
+	}
+
+	return count;
+}
+
+/**
+ * argv_free - free an argv
+ * @argv - the argument vector to be freed
+ *
+ * Frees an argv and the strings it points to.
+ */
+void argv_free(char **argv)
+{
+	char **p;
+	for (p = argv; *p; p++)
+		free(*p);
+
+	free(argv);
+}
+
+/**
+ * argv_split - split a string at whitespace, returning an argv
+ * @str: the string to be split
+ * @argcp: returned argument count
+ *
+ * Returns an array of pointers to strings which are split out from
+ * @str.  This is performed by strictly splitting on white-space; no
+ * quote processing is performed.  Multiple whitespace characters are
+ * considered to be a single argument separator.  The returned array
+ * is always NULL-terminated.  Returns NULL on memory allocation
+ * failure.
+ */
+char **argv_split(const char *str, int *argcp)
+{
+	int argc = count_argc(str);
+	char **argv = zalloc(sizeof(*argv) * (argc+1));
+	char **argvp;
+
+	if (argv == NULL)
+		goto out;
+
+	if (argcp)
+		*argcp = argc;
+
+	argvp = argv;
+
+	while (*str) {
+		str = skip_sep(str);
+
+		if (*str) {
+			const char *p = str;
+			char *t;
+
+			str = skip_arg(str);
+
+			t = strndup(p, str-p);
+			if (t == NULL)
+				goto fail;
+			*argvp++ = t;
+		}
+	}
+	*argvp = NULL;
+
+out:
+	return argv;
+
+fail:
+	argv_free(argv);
+	return NULL;
+}
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h
index bf39dfa..bfecec2 100644
--- a/tools/perf/util/string.h
+++ b/tools/perf/util/string.h
@@ -1,11 +1,15 @@
-#ifndef _PERF_STRING_H_
-#define _PERF_STRING_H_
+#ifndef __PERF_STRING_H_
+#define __PERF_STRING_H_
 
 #include "types.h"
 
 int hex2u64(const char *ptr, u64 *val);
+char *strxfrchar(char *s, char from, char to);
+s64 perf_atoll(const char *str);
+char **argv_split(const char *str, int *argcp);
+void argv_free(char **argv);
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
 
-#endif
+#endif /* __PERF_STRING_H */
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index 921818e..cb46593 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -1,5 +1,5 @@
-#ifndef STRLIST_H_
-#define STRLIST_H_
+#ifndef __PERF_STRLIST_H
+#define __PERF_STRLIST_H
 
 #include <linux/rbtree.h>
 #include <stdbool.h>
@@ -36,4 +36,4 @@
 }
 
 int strlist__parse_list(struct strlist *self, const char *s);
-#endif /* STRLIST_H_ */
+#endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
index cd93195..e078198 100644
--- a/tools/perf/util/svghelper.h
+++ b/tools/perf/util/svghelper.h
@@ -1,5 +1,5 @@
-#ifndef _INCLUDE_GUARD_SVG_HELPER_
-#define _INCLUDE_GUARD_SVG_HELPER_
+#ifndef __PERF_SVGHELPER_H
+#define __PERF_SVGHELPER_H
 
 #include "types.h"
 
@@ -25,4 +25,4 @@
 
 extern int svg_page_width;
 
-#endif
+#endif /* __PERF_SVGHELPER_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 226f44a..fffcb93 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -2,14 +2,20 @@
 #include "../perf.h"
 #include "string.h"
 #include "symbol.h"
+#include "thread.h"
 
 #include "debug.h"
 
+#include <asm/bug.h>
 #include <libelf.h>
 #include <gelf.h>
 #include <elf.h>
+#include <limits.h>
+#include <sys/utsname.h>
 
-const char *sym_hist_filter;
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
 
 enum dso_origin {
 	DSO__ORIG_KERNEL = 0,
@@ -18,94 +24,189 @@
 	DSO__ORIG_UBUNTU,
 	DSO__ORIG_BUILDID,
 	DSO__ORIG_DSO,
+	DSO__ORIG_KMODULE,
 	DSO__ORIG_NOT_FOUND,
 };
 
-static struct symbol *symbol__new(u64 start, u64 len,
-				  const char *name, unsigned int priv_size,
-				  u64 obj_start, int v)
+static void dsos__add(struct list_head *head, struct dso *dso);
+static struct map *thread__find_map_by_name(struct thread *self, char *name);
+static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
+struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
+static int dso__load_kernel_sym(struct dso *self, struct map *map,
+				struct thread *thread, symbol_filter_t filter);
+unsigned int symbol__priv_size;
+static int vmlinux_path__nr_entries;
+static char **vmlinux_path;
+
+static struct symbol_conf symbol_conf__defaults = {
+	.use_modules	  = true,
+	.try_vmlinux_path = true,
+};
+
+static struct thread kthread_mem;
+struct thread *kthread = &kthread_mem;
+
+bool dso__loaded(const struct dso *self, enum map_type type)
+{
+	return self->loaded & (1 << type);
+}
+
+static void dso__set_loaded(struct dso *self, enum map_type type)
+{
+	self->loaded |= (1 << type);
+}
+
+static void symbols__fixup_end(struct rb_root *self)
+{
+	struct rb_node *nd, *prevnd = rb_first(self);
+	struct symbol *curr, *prev;
+
+	if (prevnd == NULL)
+		return;
+
+	curr = rb_entry(prevnd, struct symbol, rb_node);
+
+	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+		prev = curr;
+		curr = rb_entry(nd, struct symbol, rb_node);
+
+		if (prev->end == prev->start)
+			prev->end = curr->start - 1;
+	}
+
+	/* Last entry */
+	if (curr->end == curr->start)
+		curr->end = roundup(curr->start, 4096);
+}
+
+static void __thread__fixup_maps_end(struct thread *self, enum map_type type)
+{
+	struct map *prev, *curr;
+	struct rb_node *nd, *prevnd = rb_first(&self->maps[type]);
+
+	if (prevnd == NULL)
+		return;
+
+	curr = rb_entry(prevnd, struct map, rb_node);
+
+	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+		prev = curr;
+		curr = rb_entry(nd, struct map, rb_node);
+		prev->end = curr->start - 1;
+	}
+
+	/*
+	 * We still haven't the actual symbols, so guess the
+	 * last map final address.
+	 */
+	curr->end = ~0UL;
+}
+
+static void thread__fixup_maps_end(struct thread *self)
+{
+	int i;
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		__thread__fixup_maps_end(self, i);
+}
+
+static struct symbol *symbol__new(u64 start, u64 len, const char *name)
 {
 	size_t namelen = strlen(name) + 1;
-	struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen);
-
-	if (!self)
+	struct symbol *self = zalloc(symbol__priv_size +
+				     sizeof(*self) + namelen);
+	if (self == NULL)
 		return NULL;
 
-	if (v >= 2)
-		printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n",
-			(u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start);
+	if (symbol__priv_size)
+		self = ((void *)self) + symbol__priv_size;
 
-	self->obj_start= obj_start;
-	self->hist = NULL;
-	self->hist_sum = 0;
-
-	if (sym_hist_filter && !strcmp(name, sym_hist_filter))
-		self->hist = calloc(sizeof(u64), len);
-
-	if (priv_size) {
-		memset(self, 0, priv_size);
-		self = ((void *)self) + priv_size;
-	}
 	self->start = start;
 	self->end   = len ? start + len - 1 : start;
+
+	pr_debug3("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end);
+
 	memcpy(self->name, name, namelen);
 
 	return self;
 }
 
-static void symbol__delete(struct symbol *self, unsigned int priv_size)
+static void symbol__delete(struct symbol *self)
 {
-	free(((void *)self) - priv_size);
+	free(((void *)self) - symbol__priv_size);
 }
 
 static size_t symbol__fprintf(struct symbol *self, FILE *fp)
 {
-	if (!self->module)
-		return fprintf(fp, " %llx-%llx %s\n",
+	return fprintf(fp, " %llx-%llx %s\n",
 		       self->start, self->end, self->name);
-	else
-		return fprintf(fp, " %llx-%llx %s \t[%s]\n",
-		       self->start, self->end, self->name, self->module->name);
 }
 
-struct dso *dso__new(const char *name, unsigned int sym_priv_size)
+static void dso__set_long_name(struct dso *self, char *name)
+{
+	if (name == NULL)
+		return;
+	self->long_name = name;
+	self->long_name_len = strlen(name);
+}
+
+static void dso__set_basename(struct dso *self)
+{
+	self->short_name = basename(self->long_name);
+}
+
+struct dso *dso__new(const char *name)
 {
 	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
 
 	if (self != NULL) {
+		int i;
 		strcpy(self->name, name);
-		self->syms = RB_ROOT;
-		self->sym_priv_size = sym_priv_size;
+		dso__set_long_name(self, self->name);
+		self->short_name = self->name;
+		for (i = 0; i < MAP__NR_TYPES; ++i)
+			self->symbols[i] = RB_ROOT;
 		self->find_symbol = dso__find_symbol;
 		self->slen_calculated = 0;
 		self->origin = DSO__ORIG_NOT_FOUND;
+		self->loaded = 0;
+		self->has_build_id = 0;
 	}
 
 	return self;
 }
 
-static void dso__delete_symbols(struct dso *self)
+static void symbols__delete(struct rb_root *self)
 {
 	struct symbol *pos;
-	struct rb_node *next = rb_first(&self->syms);
+	struct rb_node *next = rb_first(self);
 
 	while (next) {
 		pos = rb_entry(next, struct symbol, rb_node);
 		next = rb_next(&pos->rb_node);
-		rb_erase(&pos->rb_node, &self->syms);
-		symbol__delete(pos, self->sym_priv_size);
+		rb_erase(&pos->rb_node, self);
+		symbol__delete(pos);
 	}
 }
 
 void dso__delete(struct dso *self)
 {
-	dso__delete_symbols(self);
+	int i;
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		symbols__delete(&self->symbols[i]);
+	if (self->long_name != self->name)
+		free(self->long_name);
 	free(self);
 }
 
-static void dso__insert_symbol(struct dso *self, struct symbol *sym)
+void dso__set_build_id(struct dso *self, void *build_id)
 {
-	struct rb_node **p = &self->syms.rb_node;
+	memcpy(self->build_id, build_id, sizeof(self->build_id));
+	self->has_build_id = 1;
+}
+
+static void symbols__insert(struct rb_root *self, struct symbol *sym)
+{
+	struct rb_node **p = &self->rb_node;
 	struct rb_node *parent = NULL;
 	const u64 ip = sym->start;
 	struct symbol *s;
@@ -119,17 +220,17 @@
 			p = &(*p)->rb_right;
 	}
 	rb_link_node(&sym->rb_node, parent, p);
-	rb_insert_color(&sym->rb_node, &self->syms);
+	rb_insert_color(&sym->rb_node, self);
 }
 
-struct symbol *dso__find_symbol(struct dso *self, u64 ip)
+static struct symbol *symbols__find(struct rb_root *self, u64 ip)
 {
 	struct rb_node *n;
 
 	if (self == NULL)
 		return NULL;
 
-	n = self->syms.rb_node;
+	n = self->rb_node;
 
 	while (n) {
 		struct symbol *s = rb_entry(n, struct symbol, rb_node);
@@ -145,12 +246,42 @@
 	return NULL;
 }
 
-size_t dso__fprintf(struct dso *self, FILE *fp)
+struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr)
 {
-	size_t ret = fprintf(fp, "dso: %s\n", self->name);
+	return symbols__find(&self->symbols[type], addr);
+}
 
+int build_id__sprintf(u8 *self, int len, char *bf)
+{
+	char *bid = bf;
+	u8 *raw = self;
+	int i;
+
+	for (i = 0; i < len; ++i) {
+		sprintf(bid, "%02x", *raw);
+		++raw;
+		bid += 2;
+	}
+
+	return raw - self;
+}
+
+size_t dso__fprintf_buildid(struct dso *self, FILE *fp)
+{
+	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+	build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id);
+	return fprintf(fp, "%s", sbuild_id);
+}
+
+size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
+{
 	struct rb_node *nd;
-	for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) {
+	size_t ret = fprintf(fp, "dso: %s (", self->short_name);
+
+	ret += dso__fprintf_buildid(self, fp);
+	ret += fprintf(fp, ")\n");
+	for (nd = rb_first(&self->symbols[type]); nd; nd = rb_next(nd)) {
 		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
 		ret += symbol__fprintf(pos, fp);
 	}
@@ -158,13 +289,17 @@
 	return ret;
 }
 
-static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v)
+/*
+ * Loads the function entries in /proc/kallsyms into kernel_map->dso,
+ * so that we can in the next step set the symbol ->end address and then
+ * call kernel_maps__split_kallsyms.
+ */
+static int dso__load_all_kallsyms(struct dso *self, struct map *map)
 {
-	struct rb_node *nd, *prevnd;
 	char *line = NULL;
 	size_t n;
+	struct rb_root *root = &self->symbols[map->type];
 	FILE *file = fopen("/proc/kallsyms", "r");
-	int count = 0;
 
 	if (file == NULL)
 		goto out_failure;
@@ -174,6 +309,7 @@
 		struct symbol *sym;
 		int line_len, len;
 		char symbol_type;
+		char *symbol_name;
 
 		line_len = getline(&line, &n, file);
 		if (line_len < 0)
@@ -196,44 +332,26 @@
 		 */
 		if (symbol_type != 'T' && symbol_type != 'W')
 			continue;
+
+		symbol_name = line + len + 2;
 		/*
-		 * Well fix up the end later, when we have all sorted.
+		 * Will fix up the end later, when we have all symbols sorted.
 		 */
-		sym = symbol__new(start, 0xdead, line + len + 2,
-				  self->sym_priv_size, 0, v);
+		sym = symbol__new(start, 0, symbol_name);
 
 		if (sym == NULL)
 			goto out_delete_line;
-
-		if (filter && filter(self, sym))
-			symbol__delete(sym, self->sym_priv_size);
-		else {
-			dso__insert_symbol(self, sym);
-			count++;
-		}
-	}
-
-	/*
-	 * Now that we have all sorted out, just set the ->end of all
-	 * symbols
-	 */
-	prevnd = rb_first(&self->syms);
-
-	if (prevnd == NULL)
-		goto out_delete_line;
-
-	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
-		struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node),
-			      *curr = rb_entry(nd, struct symbol, rb_node);
-
-		prev->end = curr->start - 1;
-		prevnd = nd;
+		/*
+		 * We will pass the symbols to the filter later, in
+		 * map__split_kallsyms, when we have split the maps per module
+		 */
+		symbols__insert(root, sym);
 	}
 
 	free(line);
 	fclose(file);
 
-	return count;
+	return 0;
 
 out_delete_line:
 	free(line);
@@ -241,14 +359,114 @@
 	return -1;
 }
 
-static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int v)
+/*
+ * Split the symbols into maps, making sure there are no overlaps, i.e. the
+ * kernel range is broken in several maps, named [kernel].N, as we don't have
+ * the original ELF section names vmlinux have.
+ */
+static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread *thread,
+			       symbol_filter_t filter)
+{
+	struct map *curr_map = map;
+	struct symbol *pos;
+	int count = 0;
+	struct rb_root *root = &self->symbols[map->type];
+	struct rb_node *next = rb_first(root);
+	int kernel_range = 0;
+
+	while (next) {
+		char *module;
+
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		module = strchr(pos->name, '\t');
+		if (module) {
+			if (!thread->use_modules)
+				goto discard_symbol;
+
+			*module++ = '\0';
+
+			if (strcmp(self->name, module)) {
+				curr_map = thread__find_map_by_name(thread, module);
+				if (curr_map == NULL) {
+					pr_debug("/proc/{kallsyms,modules} "
+					         "inconsistency!\n");
+					return -1;
+				}
+			}
+			/*
+			 * So that we look just like we get from .ko files,
+			 * i.e. not prelinked, relative to map->start.
+			 */
+			pos->start = curr_map->map_ip(curr_map, pos->start);
+			pos->end   = curr_map->map_ip(curr_map, pos->end);
+		} else if (curr_map != map) {
+			char dso_name[PATH_MAX];
+			struct dso *dso;
+
+			snprintf(dso_name, sizeof(dso_name), "[kernel].%d",
+				 kernel_range++);
+
+			dso = dso__new(dso_name);
+			if (dso == NULL)
+				return -1;
+
+			curr_map = map__new2(pos->start, dso, map->type);
+			if (map == NULL) {
+				dso__delete(dso);
+				return -1;
+			}
+
+			curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+			__thread__insert_map(thread, curr_map);
+			++kernel_range;
+		}
+
+		if (filter && filter(curr_map, pos)) {
+discard_symbol:		rb_erase(&pos->rb_node, root);
+			symbol__delete(pos);
+		} else {
+			if (curr_map != map) {
+				rb_erase(&pos->rb_node, root);
+				symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+			}
+			count++;
+		}
+	}
+
+	return count;
+}
+
+
+static int dso__load_kallsyms(struct dso *self, struct map *map,
+			      struct thread *thread, symbol_filter_t filter)
+{
+	if (dso__load_all_kallsyms(self, map) < 0)
+		return -1;
+
+	symbols__fixup_end(&self->symbols[map->type]);
+	self->origin = DSO__ORIG_KERNEL;
+
+	return dso__split_kallsyms(self, map, thread, filter);
+}
+
+size_t kernel_maps__fprintf(FILE *fp)
+{
+	size_t printed = fprintf(fp, "Kernel maps:\n");
+	printed += thread__fprintf_maps(kthread, fp);
+	return printed + fprintf(fp, "END kernel maps\n");
+}
+
+static int dso__load_perf_map(struct dso *self, struct map *map,
+			      symbol_filter_t filter)
 {
 	char *line = NULL;
 	size_t n;
 	FILE *file;
 	int nr_syms = 0;
 
-	file = fopen(self->name, "r");
+	file = fopen(self->long_name, "r");
 	if (file == NULL)
 		goto out_failure;
 
@@ -278,16 +496,15 @@
 		if (len + 2 >= line_len)
 			continue;
 
-		sym = symbol__new(start, size, line + len,
-				  self->sym_priv_size, start, v);
+		sym = symbol__new(start, size, line + len);
 
 		if (sym == NULL)
 			goto out_delete_line;
 
-		if (filter && filter(self, sym))
-			symbol__delete(sym, self->sym_priv_size);
+		if (filter && filter(map, sym))
+			symbol__delete(sym);
 		else {
-			dso__insert_symbol(self, sym);
+			symbols__insert(&self->symbols[map->type], sym);
 			nr_syms++;
 		}
 	}
@@ -393,7 +610,8 @@
  * And always look at the original dso, not at debuginfo packages, that
  * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
  */
-static int dso__synthesize_plt_symbols(struct  dso *self, int v)
+static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
+				       symbol_filter_t filter)
 {
 	uint32_t nr_rel_entries, idx;
 	GElf_Sym sym;
@@ -409,7 +627,7 @@
 	Elf *elf;
 	int nr = 0, symidx, fd, err = 0;
 
-	fd = open(self->name, O_RDONLY);
+	fd = open(self->long_name, O_RDONLY);
 	if (fd < 0)
 		goto out;
 
@@ -477,12 +695,16 @@
 				 "%s@plt", elf_sym__name(&sym, symstrs));
 
 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
-					sympltname, self->sym_priv_size, 0, v);
+					sympltname);
 			if (!f)
 				goto out_elf_end;
 
-			dso__insert_symbol(self, f);
-			++nr;
+			if (filter && filter(map, f))
+				symbol__delete(f);
+			else {
+				symbols__insert(&self->symbols[map->type], f);
+				++nr;
+			}
 		}
 	} else if (shdr_rel_plt.sh_type == SHT_REL) {
 		GElf_Rel pos_mem, *pos;
@@ -495,12 +717,16 @@
 				 "%s@plt", elf_sym__name(&sym, symstrs));
 
 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
-					sympltname, self->sym_priv_size, 0, v);
+					sympltname);
 			if (!f)
 				goto out_elf_end;
 
-			dso__insert_symbol(self, f);
-			++nr;
+			if (filter && filter(map, f))
+				symbol__delete(f);
+			else {
+				symbols__insert(&self->symbols[map->type], f);
+				++nr;
+			}
 		}
 	}
 
@@ -513,14 +739,18 @@
 	if (err == 0)
 		return nr;
 out:
-	fprintf(stderr, "%s: problems reading %s PLT info.\n",
-		__func__, self->name);
+	pr_warning("%s: problems reading %s PLT info.\n",
+		   __func__, self->long_name);
 	return 0;
 }
 
-static int dso__load_sym(struct dso *self, int fd, const char *name,
-			 symbol_filter_t filter, int v, struct module *mod)
+static int dso__load_sym(struct dso *self, struct map *map,
+			 struct thread *thread, const char *name, int fd,
+			 symbol_filter_t filter, int kernel, int kmodule)
 {
+	struct map *curr_map = map;
+	struct dso *curr_dso = self;
+	size_t dso_name_len = strlen(self->short_name);
 	Elf_Data *symstrs, *secstrs;
 	uint32_t nr_syms;
 	int err = -1;
@@ -531,19 +761,16 @@
 	GElf_Sym sym;
 	Elf_Scn *sec, *sec_strndx;
 	Elf *elf;
-	int nr = 0, kernel = !strcmp("[kernel]", self->name);
+	int nr = 0;
 
 	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
 	if (elf == NULL) {
-		if (v)
-			fprintf(stderr, "%s: cannot read %s ELF file.\n",
-				__func__, name);
+		pr_err("%s: cannot read %s ELF file.\n", __func__, name);
 		goto out_close;
 	}
 
 	if (gelf_getehdr(elf, &ehdr) == NULL) {
-		if (v)
-			fprintf(stderr, "%s: cannot get elf header.\n", __func__);
+		pr_err("%s: cannot get elf header.\n", __func__);
 		goto out_elf_end;
 	}
 
@@ -587,9 +814,7 @@
 	elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
 		struct symbol *f;
 		const char *elf_name;
-		char *demangled;
-		u64 obj_start;
-		struct section *section = NULL;
+		char *demangled = NULL;
 		int is_label = elf_sym__is_label(&sym);
 		const char *section_name;
 
@@ -605,52 +830,85 @@
 		if (is_label && !elf_sec__is_text(&shdr, secstrs))
 			continue;
 
+		elf_name = elf_sym__name(&sym, symstrs);
 		section_name = elf_sec__name(&shdr, secstrs);
-		obj_start = sym.st_value;
 
-		if (self->adjust_symbols) {
-			if (v >= 2)
-				printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n",
-					(u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset);
+		if (kernel || kmodule) {
+			char dso_name[PATH_MAX];
 
-			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+			if (strcmp(section_name,
+				   curr_dso->short_name + dso_name_len) == 0)
+				goto new_symbol;
+
+			if (strcmp(section_name, ".text") == 0) {
+				curr_map = map;
+				curr_dso = self;
+				goto new_symbol;
+			}
+
+			snprintf(dso_name, sizeof(dso_name),
+				 "%s%s", self->short_name, section_name);
+
+			curr_map = thread__find_map_by_name(thread, dso_name);
+			if (curr_map == NULL) {
+				u64 start = sym.st_value;
+
+				if (kmodule)
+					start += map->start + shdr.sh_offset;
+
+				curr_dso = dso__new(dso_name);
+				if (curr_dso == NULL)
+					goto out_elf_end;
+				curr_map = map__new2(start, curr_dso,
+						     MAP__FUNCTION);
+				if (curr_map == NULL) {
+					dso__delete(curr_dso);
+					goto out_elf_end;
+				}
+				curr_map->map_ip = identity__map_ip;
+				curr_map->unmap_ip = identity__map_ip;
+				curr_dso->origin = DSO__ORIG_KERNEL;
+				__thread__insert_map(kthread, curr_map);
+				dsos__add(&dsos__kernel, curr_dso);
+			} else
+				curr_dso = curr_map->dso;
+
+			goto new_symbol;
 		}
 
-		if (mod) {
-			section = mod->sections->find_section(mod->sections, section_name);
-			if (section)
-				sym.st_value += section->vma;
-			else {
-				fprintf(stderr, "dso__load_sym() module %s lookup of %s failed\n",
-					mod->name, section_name);
-				goto out_elf_end;
-			}
+		if (curr_dso->adjust_symbols) {
+			pr_debug2("adjusting symbol: st_value: %Lx sh_addr: "
+				  "%Lx sh_offset: %Lx\n", (u64)sym.st_value,
+				  (u64)shdr.sh_addr, (u64)shdr.sh_offset);
+			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 		}
 		/*
 		 * We need to figure out if the object was created from C++ sources
 		 * DWARF DW_compile_unit has this, but we don't always have access
 		 * to it...
 		 */
-		elf_name = elf_sym__name(&sym, symstrs);
 		demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI);
 		if (demangled != NULL)
 			elf_name = demangled;
-
-		f = symbol__new(sym.st_value, sym.st_size, elf_name,
-				self->sym_priv_size, obj_start, v);
+new_symbol:
+		f = symbol__new(sym.st_value, sym.st_size, elf_name);
 		free(demangled);
 		if (!f)
 			goto out_elf_end;
 
-		if (filter && filter(self, f))
-			symbol__delete(f, self->sym_priv_size);
+		if (filter && filter(curr_map, f))
+			symbol__delete(f);
 		else {
-			f->module = mod;
-			dso__insert_symbol(self, f);
+			symbols__insert(&curr_dso->symbols[curr_map->type], f);
 			nr++;
 		}
 	}
 
+	/*
+	 * For misannotated, zeroed, ASM function sizes.
+	 */
+	if (nr > 0)
+		symbols__fixup_end(&self->symbols[map->type]);
 	err = nr;
 out_elf_end:
 	elf_end(elf);
@@ -658,63 +916,153 @@
 	return err;
 }
 
-#define BUILD_ID_SIZE 128
-
-static char *dso__read_build_id(struct dso *self, int v)
+static bool dso__build_id_equal(const struct dso *self, u8 *build_id)
 {
-	int i;
+	return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0;
+}
+
+static bool __dsos__read_build_ids(struct list_head *head)
+{
+	bool have_build_id = false;
+	struct dso *pos;
+
+	list_for_each_entry(pos, head, node)
+		if (filename__read_build_id(pos->long_name, pos->build_id,
+					    sizeof(pos->build_id)) > 0) {
+			have_build_id	  = true;
+			pos->has_build_id = true;
+		}
+
+	return have_build_id;
+}
+
+bool dsos__read_build_ids(void)
+{
+	return __dsos__read_build_ids(&dsos__kernel) ||
+	       __dsos__read_build_ids(&dsos__user);
+}
+
+/*
+ * Align offset to 4 bytes as needed for note name and descriptor data.
+ */
+#define NOTE_ALIGN(n) (((n) + 3) & -4U)
+
+int filename__read_build_id(const char *filename, void *bf, size_t size)
+{
+	int fd, err = -1;
 	GElf_Ehdr ehdr;
 	GElf_Shdr shdr;
-	Elf_Data *build_id_data;
+	Elf_Data *data;
 	Elf_Scn *sec;
-	char *build_id = NULL, *bid;
-	unsigned char *raw;
+	Elf_Kind ek;
+	void *ptr;
 	Elf *elf;
-	int fd = open(self->name, O_RDONLY);
 
+	if (size < BUILD_ID_SIZE)
+		goto out;
+
+	fd = open(filename, O_RDONLY);
 	if (fd < 0)
 		goto out;
 
 	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
 	if (elf == NULL) {
-		if (v)
-			fprintf(stderr, "%s: cannot read %s ELF file.\n",
-				__func__, self->name);
+		pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
 		goto out_close;
 	}
 
+	ek = elf_kind(elf);
+	if (ek != ELF_K_ELF)
+		goto out_elf_end;
+
 	if (gelf_getehdr(elf, &ehdr) == NULL) {
-		if (v)
-			fprintf(stderr, "%s: cannot get elf header.\n", __func__);
+		pr_err("%s: cannot get elf header.\n", __func__);
 		goto out_elf_end;
 	}
 
-	sec = elf_section_by_name(elf, &ehdr, &shdr, ".note.gnu.build-id", NULL);
-	if (sec == NULL)
-		goto out_elf_end;
-
-	build_id_data = elf_getdata(sec, NULL);
-	if (build_id_data == NULL)
-		goto out_elf_end;
-	build_id = malloc(BUILD_ID_SIZE);
-	if (build_id == NULL)
-		goto out_elf_end;
-	raw = build_id_data->d_buf + 16;
-	bid = build_id;
-
-	for (i = 0; i < 20; ++i) {
-		sprintf(bid, "%02x", *raw);
-		++raw;
-		bid += 2;
+	sec = elf_section_by_name(elf, &ehdr, &shdr,
+				  ".note.gnu.build-id", NULL);
+	if (sec == NULL) {
+		sec = elf_section_by_name(elf, &ehdr, &shdr,
+					  ".notes", NULL);
+		if (sec == NULL)
+			goto out_elf_end;
 	}
-	if (v >= 2)
-		printf("%s(%s): %s\n", __func__, self->name, build_id);
+
+	data = elf_getdata(sec, NULL);
+	if (data == NULL)
+		goto out_elf_end;
+
+	ptr = data->d_buf;
+	while (ptr < (data->d_buf + data->d_size)) {
+		GElf_Nhdr *nhdr = ptr;
+		int namesz = NOTE_ALIGN(nhdr->n_namesz),
+		    descsz = NOTE_ALIGN(nhdr->n_descsz);
+		const char *name;
+
+		ptr += sizeof(*nhdr);
+		name = ptr;
+		ptr += namesz;
+		if (nhdr->n_type == NT_GNU_BUILD_ID &&
+		    nhdr->n_namesz == sizeof("GNU")) {
+			if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+				memcpy(bf, ptr, BUILD_ID_SIZE);
+				err = BUILD_ID_SIZE;
+				break;
+			}
+		}
+		ptr += descsz;
+	}
 out_elf_end:
 	elf_end(elf);
 out_close:
 	close(fd);
 out:
-	return build_id;
+	return err;
+}
+
+int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
+{
+	int fd, err = -1;
+
+	if (size < BUILD_ID_SIZE)
+		goto out;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	while (1) {
+		char bf[BUFSIZ];
+		GElf_Nhdr nhdr;
+		int namesz, descsz;
+
+		if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr))
+			break;
+
+		namesz = NOTE_ALIGN(nhdr.n_namesz);
+		descsz = NOTE_ALIGN(nhdr.n_descsz);
+		if (nhdr.n_type == NT_GNU_BUILD_ID &&
+		    nhdr.n_namesz == sizeof("GNU")) {
+			if (read(fd, bf, namesz) != namesz)
+				break;
+			if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
+				if (read(fd, build_id,
+				    BUILD_ID_SIZE) == BUILD_ID_SIZE) {
+					err = 0;
+					break;
+				}
+			} else if (read(fd, bf, descsz) != descsz)
+				break;
+		} else {
+			int n = namesz + descsz;
+			if (read(fd, bf, n) != n)
+				break;
+		}
+	}
+	close(fd);
+out:
+	return err;
 }
 
 char dso__symtab_origin(const struct dso *self)
@@ -726,6 +1074,7 @@
 		[DSO__ORIG_UBUNTU] =   'u',
 		[DSO__ORIG_BUILDID] =  'b',
 		[DSO__ORIG_DSO] =      'd',
+		[DSO__ORIG_KMODULE] =  'K',
 	};
 
 	if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
@@ -733,20 +1082,27 @@
 	return origin[self->origin];
 }
 
-int dso__load(struct dso *self, symbol_filter_t filter, int v)
+int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
 {
 	int size = PATH_MAX;
-	char *name = malloc(size), *build_id = NULL;
+	char *name;
+	u8 build_id[BUILD_ID_SIZE];
 	int ret = -1;
 	int fd;
 
+	dso__set_loaded(self, map->type);
+
+	if (self->kernel)
+		return dso__load_kernel_sym(self, map, kthread, filter);
+
+	name = malloc(size);
 	if (!name)
 		return -1;
 
 	self->adjust_symbols = 0;
 
 	if (strncmp(self->name, "/tmp/perf-", 10) == 0) {
-		ret = dso__load_perf_map(self, filter, v);
+		ret = dso__load_perf_map(self, map, filter);
 		self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT :
 					 DSO__ORIG_NOT_FOUND;
 		return ret;
@@ -759,34 +1115,50 @@
 		self->origin++;
 		switch (self->origin) {
 		case DSO__ORIG_FEDORA:
-			snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
+			snprintf(name, size, "/usr/lib/debug%s.debug",
+				 self->long_name);
 			break;
 		case DSO__ORIG_UBUNTU:
-			snprintf(name, size, "/usr/lib/debug%s", self->name);
+			snprintf(name, size, "/usr/lib/debug%s",
+				 self->long_name);
 			break;
 		case DSO__ORIG_BUILDID:
-			build_id = dso__read_build_id(self, v);
-			if (build_id != NULL) {
+			if (filename__read_build_id(self->long_name, build_id,
+						    sizeof(build_id))) {
+				char build_id_hex[BUILD_ID_SIZE * 2 + 1];
+
+				build_id__sprintf(build_id, sizeof(build_id),
+						  build_id_hex);
 				snprintf(name, size,
 					 "/usr/lib/debug/.build-id/%.2s/%s.debug",
-					build_id, build_id + 2);
-				free(build_id);
+					build_id_hex, build_id_hex + 2);
+				if (self->has_build_id)
+					goto compare_build_id;
 				break;
 			}
 			self->origin++;
 			/* Fall thru */
 		case DSO__ORIG_DSO:
-			snprintf(name, size, "%s", self->name);
+			snprintf(name, size, "%s", self->long_name);
 			break;
 
 		default:
 			goto out;
 		}
 
+		if (self->has_build_id) {
+			if (filename__read_build_id(name, build_id,
+						    sizeof(build_id)) < 0)
+				goto more;
+compare_build_id:
+			if (!dso__build_id_equal(self, build_id))
+				goto more;
+		}
+
 		fd = open(name, O_RDONLY);
 	} while (fd < 0);
 
-	ret = dso__load_sym(self, fd, name, filter, v, NULL);
+	ret = dso__load_sym(self, map, NULL, name, fd, filter, 0, 0);
 	close(fd);
 
 	/*
@@ -796,7 +1168,7 @@
 		goto more;
 
 	if (ret > 0) {
-		int nr_plt = dso__synthesize_plt_symbols(self, v);
+		int nr_plt = dso__synthesize_plt_symbols(self, map, filter);
 		if (nr_plt > 0)
 			ret += nr_plt;
 	}
@@ -807,151 +1179,279 @@
 	return ret;
 }
 
-static int dso__load_module(struct dso *self, struct mod_dso *mods, const char *name,
-			     symbol_filter_t filter, int v)
+static struct map *thread__find_map_by_name(struct thread *self, char *name)
 {
-	struct module *mod = mod_dso__find_module(mods, name);
-	int err = 0, fd;
-
-	if (mod == NULL || !mod->active)
-		return err;
-
-	fd = open(mod->path, O_RDONLY);
-
-	if (fd < 0)
-		return err;
-
-	err = dso__load_sym(self, fd, name, filter, v, mod);
-	close(fd);
-
-	return err;
-}
-
-int dso__load_modules(struct dso *self, symbol_filter_t filter, int v)
-{
-	struct mod_dso *mods = mod_dso__new_dso("modules");
-	struct module *pos;
-	struct rb_node *next;
-	int err, count = 0;
-
-	err = mod_dso__load_modules(mods);
-
-	if (err <= 0)
-		return err;
-
-	/*
-	 * Iterate over modules, and load active symbols.
-	 */
-	next = rb_first(&mods->mods);
-	while (next) {
-		pos = rb_entry(next, struct module, rb_node);
-		err = dso__load_module(self, mods, pos->name, filter, v);
-
-		if (err < 0)
-			break;
-
-		next = rb_next(&pos->rb_node);
-		count += err;
-	}
-
-	if (err < 0) {
-		mod_dso__delete_modules(mods);
-		mod_dso__delete_self(mods);
-		return err;
-	}
-
-	return count;
-}
-
-static inline void dso__fill_symbol_holes(struct dso *self)
-{
-	struct symbol *prev = NULL;
 	struct rb_node *nd;
 
-	for (nd = rb_last(&self->syms); nd; nd = rb_prev(nd)) {
-		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+	for (nd = rb_first(&self->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
+		struct map *map = rb_entry(nd, struct map, rb_node);
 
-		if (prev) {
-			u64 hole = 0;
-			int alias = pos->start == prev->start;
-
-			if (!alias)
-				hole = prev->start - pos->end - 1;
-
-			if (hole || alias) {
-				if (alias)
-					pos->end = prev->end;
-				else if (hole)
-					pos->end = prev->start - 1;
-			}
-		}
-		prev = pos;
+		if (map->dso && strcmp(map->dso->name, name) == 0)
+			return map;
 	}
+
+	return NULL;
 }
 
-static int dso__load_vmlinux(struct dso *self, const char *vmlinux,
-			     symbol_filter_t filter, int v)
+static int dsos__set_modules_path_dir(char *dirname)
 {
-	int err, fd = open(vmlinux, O_RDONLY);
+	struct dirent *dent;
+	DIR *dir = opendir(dirname);
 
+	if (!dir) {
+		pr_debug("%s: cannot open %s dir\n", __func__, dirname);
+		return -1;
+	}
+
+	while ((dent = readdir(dir)) != NULL) {
+		char path[PATH_MAX];
+
+		if (dent->d_type == DT_DIR) {
+			if (!strcmp(dent->d_name, ".") ||
+			    !strcmp(dent->d_name, ".."))
+				continue;
+
+			snprintf(path, sizeof(path), "%s/%s",
+				 dirname, dent->d_name);
+			if (dsos__set_modules_path_dir(path) < 0)
+				goto failure;
+		} else {
+			char *dot = strrchr(dent->d_name, '.'),
+			     dso_name[PATH_MAX];
+			struct map *map;
+			char *long_name;
+
+			if (dot == NULL || strcmp(dot, ".ko"))
+				continue;
+			snprintf(dso_name, sizeof(dso_name), "[%.*s]",
+				 (int)(dot - dent->d_name), dent->d_name);
+
+			strxfrchar(dso_name, '-', '_');
+			map = thread__find_map_by_name(kthread, dso_name);
+			if (map == NULL)
+				continue;
+
+			snprintf(path, sizeof(path), "%s/%s",
+				 dirname, dent->d_name);
+
+			long_name = strdup(path);
+			if (long_name == NULL)
+				goto failure;
+			dso__set_long_name(map->dso, long_name);
+		}
+	}
+
+	return 0;
+failure:
+	closedir(dir);
+	return -1;
+}
+
+static int dsos__set_modules_path(void)
+{
+	struct utsname uts;
+	char modules_path[PATH_MAX];
+
+	if (uname(&uts) < 0)
+		return -1;
+
+	snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel",
+		 uts.release);
+
+	return dsos__set_modules_path_dir(modules_path);
+}
+
+/*
+ * Constructor variant for modules (where we know from /proc/modules where
+ * they are loaded) and for vmlinux, where only after we load all the
+ * symbols we'll know where it starts and ends.
+ */
+static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
+{
+	struct map *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		/*
+		 * ->end will be filled after we load all the symbols
+		 */
+		map__init(self, type, start, 0, 0, dso);
+	}
+
+	return self;
+}
+
+static int thread__create_module_maps(struct thread *self)
+{
+	char *line = NULL;
+	size_t n;
+	FILE *file = fopen("/proc/modules", "r");
+	struct map *map;
+
+	if (file == NULL)
+		return -1;
+
+	while (!feof(file)) {
+		char name[PATH_MAX];
+		u64 start;
+		struct dso *dso;
+		char *sep;
+		int line_len;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0)
+			break;
+
+		if (!line)
+			goto out_failure;
+
+		line[--line_len] = '\0'; /* \n */
+
+		sep = strrchr(line, 'x');
+		if (sep == NULL)
+			continue;
+
+		hex2u64(sep + 1, &start);
+
+		sep = strchr(line, ' ');
+		if (sep == NULL)
+			continue;
+
+		*sep = '\0';
+
+		snprintf(name, sizeof(name), "[%s]", line);
+		dso = dso__new(name);
+
+		if (dso == NULL)
+			goto out_delete_line;
+
+		map = map__new2(start, dso, MAP__FUNCTION);
+		if (map == NULL) {
+			dso__delete(dso);
+			goto out_delete_line;
+		}
+
+		snprintf(name, sizeof(name),
+			 "/sys/module/%s/notes/.note.gnu.build-id", line);
+		if (sysfs__read_build_id(name, dso->build_id,
+					 sizeof(dso->build_id)) == 0)
+			dso->has_build_id = true;
+
+		dso->origin = DSO__ORIG_KMODULE;
+		__thread__insert_map(self, map);
+		dsos__add(&dsos__kernel, dso);
+	}
+
+	free(line);
+	fclose(file);
+
+	return dsos__set_modules_path();
+
+out_delete_line:
+	free(line);
+out_failure:
+	return -1;
+}
+
+static int dso__load_vmlinux(struct dso *self, struct map *map, struct thread *thread,
+			     const char *vmlinux, symbol_filter_t filter)
+{
+	int err = -1, fd;
+
+	if (self->has_build_id) {
+		u8 build_id[BUILD_ID_SIZE];
+
+		if (filename__read_build_id(vmlinux, build_id,
+					    sizeof(build_id)) < 0) {
+			pr_debug("No build_id in %s, ignoring it\n", vmlinux);
+			return -1;
+		}
+		if (!dso__build_id_equal(self, build_id)) {
+			char expected_build_id[BUILD_ID_SIZE * 2 + 1],
+			     vmlinux_build_id[BUILD_ID_SIZE * 2 + 1];
+
+			build_id__sprintf(self->build_id,
+					  sizeof(self->build_id),
+					  expected_build_id);
+			build_id__sprintf(build_id, sizeof(build_id),
+					  vmlinux_build_id);
+			pr_debug("build_id in %s is %s while expected is %s, "
+				 "ignoring it\n", vmlinux, vmlinux_build_id,
+				 expected_build_id);
+			return -1;
+		}
+	}
+
+	fd = open(vmlinux, O_RDONLY);
 	if (fd < 0)
 		return -1;
 
-	err = dso__load_sym(self, fd, vmlinux, filter, v, NULL);
-
-	if (err > 0)
-		dso__fill_symbol_holes(self);
-
+	dso__set_loaded(self, map->type);
+	err = dso__load_sym(self, map, thread, self->long_name, fd, filter, 1, 0);
 	close(fd);
 
 	return err;
 }
 
-int dso__load_kernel(struct dso *self, const char *vmlinux,
-		     symbol_filter_t filter, int v, int use_modules)
+static int dso__load_kernel_sym(struct dso *self, struct map *map,
+				struct thread *thread, symbol_filter_t filter)
 {
-	int err = -1;
+	int err;
+	bool is_kallsyms;
 
-	if (vmlinux) {
-		err = dso__load_vmlinux(self, vmlinux, filter, v);
-		if (err > 0 && use_modules) {
-			int syms = dso__load_modules(self, filter, v);
-
-			if (syms < 0) {
-				fprintf(stderr, "dso__load_modules failed!\n");
-				return syms;
+	if (vmlinux_path != NULL) {
+		int i;
+		pr_debug("Looking at the vmlinux_path (%d entries long)\n",
+			 vmlinux_path__nr_entries);
+		for (i = 0; i < vmlinux_path__nr_entries; ++i) {
+			err = dso__load_vmlinux(self, map, thread,
+						vmlinux_path[i], filter);
+			if (err > 0) {
+				pr_debug("Using %s for symbols\n",
+					 vmlinux_path[i]);
+				dso__set_long_name(self,
+						   strdup(vmlinux_path[i]));
+				goto out_fixup;
 			}
-			err += syms;
 		}
 	}
 
-	if (err <= 0)
-		err = dso__load_kallsyms(self, filter, v);
+	is_kallsyms = self->long_name[0] == '[';
+	if (is_kallsyms)
+		goto do_kallsyms;
 
-	if (err > 0)
-		self->origin = DSO__ORIG_KERNEL;
+	err = dso__load_vmlinux(self, map, thread, self->long_name, filter);
+	if (err <= 0) {
+		pr_info("The file %s cannot be used, "
+			"trying to use /proc/kallsyms...", self->long_name);
+do_kallsyms:
+		err = dso__load_kallsyms(self, map, thread, filter);
+		if (err > 0 && !is_kallsyms)
+                        dso__set_long_name(self, strdup("[kernel.kallsyms]"));
+	}
+
+	if (err > 0) {
+out_fixup:
+		map__fixup_start(map);
+		map__fixup_end(map);
+	}
 
 	return err;
 }
 
-LIST_HEAD(dsos);
-struct dso	*kernel_dso;
-struct dso	*vdso;
-struct dso	*hypervisor_dso;
+LIST_HEAD(dsos__user);
+LIST_HEAD(dsos__kernel);
+struct dso *vdso;
 
-const char	*vmlinux_name = "vmlinux";
-int		modules;
-
-static void dsos__add(struct dso *dso)
+static void dsos__add(struct list_head *head, struct dso *dso)
 {
-	list_add_tail(&dso->node, &dsos);
+	list_add_tail(&dso->node, head);
 }
 
-static struct dso *dsos__find(const char *name)
+static struct dso *dsos__find(struct list_head *head, const char *name)
 {
 	struct dso *pos;
 
-	list_for_each_entry(pos, &dsos, node)
+	list_for_each_entry(pos, head, node)
 		if (strcmp(pos->name, name) == 0)
 			return pos;
 	return NULL;
@@ -959,79 +1459,170 @@
 
 struct dso *dsos__findnew(const char *name)
 {
-	struct dso *dso = dsos__find(name);
-	int nr;
+	struct dso *dso = dsos__find(&dsos__user, name);
 
-	if (dso)
-		return dso;
-
-	dso = dso__new(name, 0);
-	if (!dso)
-		goto out_delete_dso;
-
-	nr = dso__load(dso, NULL, verbose);
-	if (nr < 0) {
-		eprintf("Failed to open: %s\n", name);
-		goto out_delete_dso;
+	if (!dso) {
+		dso = dso__new(name);
+		if (dso != NULL) {
+			dsos__add(&dsos__user, dso);
+			dso__set_basename(dso);
+		}
 	}
-	if (!nr)
-		eprintf("No symbols found in: %s, maybe install a debug package?\n", name);
-
-	dsos__add(dso);
 
 	return dso;
+}
 
-out_delete_dso:
-	dso__delete(dso);
-	return NULL;
+static void __dsos__fprintf(struct list_head *head, FILE *fp)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, head, node) {
+		int i;
+		for (i = 0; i < MAP__NR_TYPES; ++i)
+			dso__fprintf(pos, i, fp);
+	}
 }
 
 void dsos__fprintf(FILE *fp)
 {
+	__dsos__fprintf(&dsos__kernel, fp);
+	__dsos__fprintf(&dsos__user, fp);
+}
+
+static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp)
+{
 	struct dso *pos;
+	size_t ret = 0;
 
-	list_for_each_entry(pos, &dsos, node)
-		dso__fprintf(pos, fp);
+	list_for_each_entry(pos, head, node) {
+		ret += dso__fprintf_buildid(pos, fp);
+		ret += fprintf(fp, " %s\n", pos->long_name);
+	}
+	return ret;
 }
 
-static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip)
+size_t dsos__fprintf_buildid(FILE *fp)
 {
-	return dso__find_symbol(dso, ip);
+	return (__dsos__fprintf_buildid(&dsos__kernel, fp) +
+		__dsos__fprintf_buildid(&dsos__user, fp));
 }
 
-int load_kernel(void)
+static int thread__create_kernel_map(struct thread *self, const char *vmlinux)
 {
-	int err;
+	struct map *kmap;
+	struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]");
 
-	kernel_dso = dso__new("[kernel]", 0);
-	if (!kernel_dso)
+	if (kernel == NULL)
 		return -1;
 
-	err = dso__load_kernel(kernel_dso, vmlinux_name, NULL, verbose, modules);
-	if (err <= 0) {
-		dso__delete(kernel_dso);
-		kernel_dso = NULL;
-	} else
-		dsos__add(kernel_dso);
+	kmap = map__new2(0, kernel, MAP__FUNCTION);
+	if (kmap == NULL)
+		goto out_delete_kernel_dso;
 
-	vdso = dso__new("[vdso]", 0);
-	if (!vdso)
-		return -1;
+	kmap->map_ip	   = kmap->unmap_ip = identity__map_ip;
+	kernel->short_name = "[kernel]";
+	kernel->kernel	   = 1;
 
-	vdso->find_symbol = vdso__find_symbol;
+	vdso = dso__new("[vdso]");
+	if (vdso == NULL)
+		goto out_delete_kernel_map;
+	dso__set_loaded(vdso, MAP__FUNCTION);
 
-	dsos__add(vdso);
+	if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id,
+				 sizeof(kernel->build_id)) == 0)
+		kernel->has_build_id = true;
 
-	hypervisor_dso = dso__new("[hypervisor]", 0);
-	if (!hypervisor_dso)
-		return -1;
-	dsos__add(hypervisor_dso);
+	__thread__insert_map(self, kmap);
+	dsos__add(&dsos__kernel, kernel);
+	dsos__add(&dsos__user, vdso);
 
-	return err;
+	return 0;
+
+out_delete_kernel_map:
+	map__delete(kmap);
+out_delete_kernel_dso:
+	dso__delete(kernel);
+	return -1;
 }
 
-
-void symbol__init(void)
+static void vmlinux_path__exit(void)
 {
+	while (--vmlinux_path__nr_entries >= 0) {
+		free(vmlinux_path[vmlinux_path__nr_entries]);
+		vmlinux_path[vmlinux_path__nr_entries] = NULL;
+	}
+
+	free(vmlinux_path);
+	vmlinux_path = NULL;
+}
+
+static int vmlinux_path__init(void)
+{
+	struct utsname uts;
+	char bf[PATH_MAX];
+
+	if (uname(&uts) < 0)
+		return -1;
+
+	vmlinux_path = malloc(sizeof(char *) * 5);
+	if (vmlinux_path == NULL)
+		return -1;
+
+	vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux");
+	if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+		goto out_fail;
+	++vmlinux_path__nr_entries;
+	vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux");
+	if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+		goto out_fail;
+	++vmlinux_path__nr_entries;
+	snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
+	vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
+	if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+		goto out_fail;
+	++vmlinux_path__nr_entries;
+	snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", uts.release);
+	vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
+	if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+		goto out_fail;
+	++vmlinux_path__nr_entries;
+	snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux",
+		 uts.release);
+	vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
+	if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+		goto out_fail;
+	++vmlinux_path__nr_entries;
+
+	return 0;
+
+out_fail:
+	vmlinux_path__exit();
+	return -1;
+}
+
+int symbol__init(struct symbol_conf *conf)
+{
+	const struct symbol_conf *pconf = conf ?: &symbol_conf__defaults;
+
 	elf_version(EV_CURRENT);
+	symbol__priv_size = pconf->priv_size;
+	thread__init(kthread, 0);
+
+	if (pconf->try_vmlinux_path && vmlinux_path__init() < 0)
+		return -1;
+
+	if (thread__create_kernel_map(kthread, pconf->vmlinux_name) < 0) {
+		vmlinux_path__exit();
+		return -1;
+	}
+
+	kthread->use_modules = pconf->use_modules;
+	if (pconf->use_modules && thread__create_module_maps(kthread) < 0)
+		pr_debug("Failed to load list of modules in use, "
+			 "continuing...\n");
+	/*
+	 * Now that we have all the maps created, just set the ->end of them:
+	 */
+	thread__fixup_maps_end(kthread);
+	return 0;
 }
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 829da9e..17003ef 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -1,11 +1,11 @@
-#ifndef _PERF_SYMBOL_
-#define _PERF_SYMBOL_ 1
+#ifndef __PERF_SYMBOL
+#define __PERF_SYMBOL 1
 
 #include <linux/types.h>
+#include <stdbool.h>
 #include "types.h"
 #include <linux/list.h>
 #include <linux/rbtree.h>
-#include "module.h"
 #include "event.h"
 
 #ifdef HAVE_CPLUS_DEMANGLE
@@ -46,57 +46,75 @@
 	struct rb_node	rb_node;
 	u64		start;
 	u64		end;
-	u64		obj_start;
-	u64		hist_sum;
-	u64		*hist;
-	struct module	*module;
-	void		*priv;
 	char		name[0];
 };
 
+struct symbol_conf {
+	unsigned short	priv_size;
+	bool		try_vmlinux_path,
+			use_modules;
+	const char	*vmlinux_name;
+};
+
+extern unsigned int symbol__priv_size;
+
+static inline void *symbol__priv(struct symbol *self)
+{
+	return ((void *)self) - symbol__priv_size;
+}
+
+struct addr_location {
+	struct thread *thread;
+	struct map    *map;
+	struct symbol *sym;
+	u64	      addr;
+	char	      level;
+};
+
 struct dso {
 	struct list_head node;
-	struct rb_root	 syms;
-	struct symbol    *(*find_symbol)(struct dso *, u64 ip);
-	unsigned int	 sym_priv_size;
-	unsigned char	 adjust_symbols;
-	unsigned char	 slen_calculated;
+	struct rb_root	 symbols[MAP__NR_TYPES];
+	struct symbol    *(*find_symbol)(struct dso *self,
+					 enum map_type type, u64 addr);
+	u8		 adjust_symbols:1;
+	u8		 slen_calculated:1;
+	u8		 has_build_id:1;
+	u8		 kernel:1;
 	unsigned char	 origin;
+	u8		 loaded;
+	u8		 build_id[BUILD_ID_SIZE];
+	u16		 long_name_len;
+	const char	 *short_name;
+	char	 	 *long_name;
 	char		 name[0];
 };
 
-extern const char *sym_hist_filter;
-
-typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym);
-
-struct dso *dso__new(const char *name, unsigned int sym_priv_size);
+struct dso *dso__new(const char *name);
 void dso__delete(struct dso *self);
 
-static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
-{
-	return ((void *)sym) - self->sym_priv_size;
-}
+bool dso__loaded(const struct dso *self, enum map_type type);
 
-struct symbol *dso__find_symbol(struct dso *self, u64 ip);
-
-int dso__load_kernel(struct dso *self, const char *vmlinux,
-		     symbol_filter_t filter, int verbose, int modules);
-int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose);
-int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
 struct dso *dsos__findnew(const char *name);
+int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
 void dsos__fprintf(FILE *fp);
+size_t dsos__fprintf_buildid(FILE *fp);
 
-size_t dso__fprintf(struct dso *self, FILE *fp);
+size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
+size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
 char dso__symtab_origin(const struct dso *self);
+void dso__set_build_id(struct dso *self, void *build_id);
 
-int load_kernel(void);
+int filename__read_build_id(const char *filename, void *bf, size_t size);
+int sysfs__read_build_id(const char *filename, void *bf, size_t size);
+bool dsos__read_build_ids(void);
+int build_id__sprintf(u8 *self, int len, char *bf);
 
-void symbol__init(void);
+size_t kernel_maps__fprintf(FILE *fp);
 
-extern struct list_head dsos;
-extern struct dso *kernel_dso;
+int symbol__init(struct symbol_conf *conf);
+
+struct thread;
+struct thread *kthread;
+extern struct list_head dsos__user, dsos__kernel;
 extern struct dso *vdso;
-extern struct dso *hypervisor_dso;
-extern const char *vmlinux_name;
-extern int   modules;
-#endif /* _PERF_SYMBOL_ */
+#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 45efb5d..603f561 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -6,16 +6,29 @@
 #include "util.h"
 #include "debug.h"
 
+static struct rb_root threads;
+static struct thread *last_match;
+
+void thread__init(struct thread *self, pid_t pid)
+{
+	int i;
+	self->pid = pid;
+	self->comm = NULL;
+	for (i = 0; i < MAP__NR_TYPES; ++i) {
+		self->maps[i] = RB_ROOT;
+		INIT_LIST_HEAD(&self->removed_maps[i]);
+	}
+}
+
 static struct thread *thread__new(pid_t pid)
 {
-	struct thread *self = calloc(1, sizeof(*self));
+	struct thread *self = zalloc(sizeof(*self));
 
 	if (self != NULL) {
-		self->pid = pid;
+		thread__init(self, pid);
 		self->comm = malloc(32);
 		if (self->comm)
 			snprintf(self->comm, 32, ":%d", self->pid);
-		INIT_LIST_HEAD(&self->maps);
 	}
 
 	return self;
@@ -29,21 +42,84 @@
 	return self->comm ? 0 : -ENOMEM;
 }
 
-static size_t thread__fprintf(struct thread *self, FILE *fp)
+int thread__comm_len(struct thread *self)
 {
-	struct map *pos;
-	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
+	if (!self->comm_len) {
+		if (!self->comm)
+			return 0;
+		self->comm_len = strlen(self->comm);
+	}
 
-	list_for_each_entry(pos, &self->maps, node)
-		ret += map__fprintf(pos, fp);
-
-	return ret;
+	return self->comm_len;
 }
 
-struct thread *
-threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
+static const char *map_type__name[MAP__NR_TYPES] = {
+	[MAP__FUNCTION] = "Functions",
+};
+
+static size_t __thread__fprintf_maps(struct thread *self,
+				     enum map_type type, FILE *fp)
 {
-	struct rb_node **p = &threads->rb_node;
+	size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
+	struct rb_node *nd;
+
+	for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
+		struct map *pos = rb_entry(nd, struct map, rb_node);
+		printed += fprintf(fp, "Map:");
+		printed += map__fprintf(pos, fp);
+		if (verbose > 1) {
+			printed += dso__fprintf(pos->dso, type, fp);
+			printed += fprintf(fp, "--\n");
+		}
+	}
+
+	return printed;
+}
+
+size_t thread__fprintf_maps(struct thread *self, FILE *fp)
+{
+	size_t printed = 0, i;
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		printed += __thread__fprintf_maps(self, i, fp);
+	return printed;
+}
+
+static size_t __thread__fprintf_removed_maps(struct thread *self,
+					     enum map_type type, FILE *fp)
+{
+	struct map *pos;
+	size_t printed = 0;
+
+	list_for_each_entry(pos, &self->removed_maps[type], node) {
+		printed += fprintf(fp, "Map:");
+		printed += map__fprintf(pos, fp);
+		if (verbose > 1) {
+			printed += dso__fprintf(pos->dso, type, fp);
+			printed += fprintf(fp, "--\n");
+		}
+	}
+	return printed;
+}
+
+static size_t thread__fprintf_removed_maps(struct thread *self, FILE *fp)
+{
+	size_t printed = 0, i;
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		printed += __thread__fprintf_removed_maps(self, i, fp);
+	return printed;
+}
+
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+	size_t printed = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
+	printed += thread__fprintf_removed_maps(self, fp);
+	printed += fprintf(fp, "Removed maps:\n");
+	return printed + thread__fprintf_removed_maps(self, fp);
+}
+
+struct thread *threads__findnew(pid_t pid)
+{
+	struct rb_node **p = &threads.rb_node;
 	struct rb_node *parent = NULL;
 	struct thread *th;
 
@@ -52,15 +128,15 @@
 	 * so most of the time we dont have to look up
 	 * the full rbtree:
 	 */
-	if (*last_match && (*last_match)->pid == pid)
-		return *last_match;
+	if (last_match && last_match->pid == pid)
+		return last_match;
 
 	while (*p != NULL) {
 		parent = *p;
 		th = rb_entry(parent, struct thread, rb_node);
 
 		if (th->pid == pid) {
-			*last_match = th;
+			last_match = th;
 			return th;
 		}
 
@@ -73,17 +149,16 @@
 	th = thread__new(pid);
 	if (th != NULL) {
 		rb_link_node(&th->rb_node, parent, p);
-		rb_insert_color(&th->rb_node, threads);
-		*last_match = th;
+		rb_insert_color(&th->rb_node, &threads);
+		last_match = th;
 	}
 
 	return th;
 }
 
-struct thread *
-register_idle_thread(struct rb_root *threads, struct thread **last_match)
+struct thread *register_idle_thread(void)
 {
-	struct thread *thread = threads__findnew(0, threads, last_match);
+	struct thread *thread = threads__findnew(0);
 
 	if (!thread || thread__set_comm(thread, "swapper")) {
 		fprintf(stderr, "problem inserting idle task.\n");
@@ -93,42 +168,97 @@
 	return thread;
 }
 
-void thread__insert_map(struct thread *self, struct map *map)
+static void thread__remove_overlappings(struct thread *self, struct map *map)
 {
-	struct map *pos, *tmp;
+	struct rb_root *root = &self->maps[map->type];
+	struct rb_node *next = rb_first(root);
 
-	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
-		if (map__overlap(pos, map)) {
-			if (verbose >= 2) {
-				printf("overlapping maps:\n");
-				map__fprintf(map, stdout);
-				map__fprintf(pos, stdout);
-			}
+	while (next) {
+		struct map *pos = rb_entry(next, struct map, rb_node);
+		next = rb_next(&pos->rb_node);
 
-			if (map->start <= pos->start && map->end > pos->start)
-				pos->start = map->end;
+		if (!map__overlap(pos, map))
+			continue;
 
-			if (map->end >= pos->end && map->start < pos->end)
-				pos->end = map->start;
-
-			if (verbose >= 2) {
-				printf("after collision:\n");
-				map__fprintf(pos, stdout);
-			}
-
-			if (pos->start >= pos->end) {
-				list_del_init(&pos->node);
-				free(pos);
-			}
+		if (verbose >= 2) {
+			fputs("overlapping maps:\n", stderr);
+			map__fprintf(map, stderr);
+			map__fprintf(pos, stderr);
 		}
+
+		rb_erase(&pos->rb_node, root);
+		/*
+		 * We may have references to this map, for instance in some
+		 * hist_entry instances, so just move them to a separate
+		 * list.
+		 */
+		list_add_tail(&pos->node, &self->removed_maps[map->type]);
+	}
+}
+
+void maps__insert(struct rb_root *maps, struct map *map)
+{
+	struct rb_node **p = &maps->rb_node;
+	struct rb_node *parent = NULL;
+	const u64 ip = map->start;
+	struct map *m;
+
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct map, rb_node);
+		if (ip < m->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
 	}
 
-	list_add_tail(&map->node, &self->maps);
+	rb_link_node(&map->rb_node, parent, p);
+	rb_insert_color(&map->rb_node, maps);
+}
+
+struct map *maps__find(struct rb_root *maps, u64 ip)
+{
+	struct rb_node **p = &maps->rb_node;
+	struct rb_node *parent = NULL;
+	struct map *m;
+
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct map, rb_node);
+		if (ip < m->start)
+			p = &(*p)->rb_left;
+		else if (ip > m->end)
+			p = &(*p)->rb_right;
+		else
+			return m;
+	}
+
+	return NULL;
+}
+
+void thread__insert_map(struct thread *self, struct map *map)
+{
+	thread__remove_overlappings(self, map);
+	maps__insert(&self->maps[map->type], map);
+}
+
+static int thread__clone_maps(struct thread *self, struct thread *parent,
+			      enum map_type type)
+{
+	struct rb_node *nd;
+	for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) {
+		struct map *map = rb_entry(nd, struct map, rb_node);
+		struct map *new = map__clone(map);
+		if (new == NULL)
+			return -ENOMEM;
+		thread__insert_map(self, new);
+	}
+	return 0;
 }
 
 int thread__fork(struct thread *self, struct thread *parent)
 {
-	struct map *map;
+	int i;
 
 	if (self->comm)
 		free(self->comm);
@@ -136,36 +266,18 @@
 	if (!self->comm)
 		return -ENOMEM;
 
-	list_for_each_entry(map, &parent->maps, node) {
-		struct map *new = map__clone(map);
-		if (!new)
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		if (thread__clone_maps(self, parent, i) < 0)
 			return -ENOMEM;
-		thread__insert_map(self, new);
-	}
-
 	return 0;
 }
 
-struct map *thread__find_map(struct thread *self, u64 ip)
-{
-	struct map *pos;
-
-	if (self == NULL)
-		return NULL;
-
-	list_for_each_entry(pos, &self->maps, node)
-		if (ip >= pos->start && ip <= pos->end)
-			return pos;
-
-	return NULL;
-}
-
-size_t threads__fprintf(FILE *fp, struct rb_root *threads)
+size_t threads__fprintf(FILE *fp)
 {
 	size_t ret = 0;
 	struct rb_node *nd;
 
-	for (nd = rb_first(threads); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
 		struct thread *pos = rb_entry(nd, struct thread, rb_node);
 
 		ret += thread__fprintf(pos, fp);
@@ -173,3 +285,15 @@
 
 	return ret;
 }
+
+struct symbol *thread__find_symbol(struct thread *self,
+				   enum map_type type, u64 addr,
+				   symbol_filter_t filter)
+{
+	struct map *map = thread__find_map(self, type, addr);
+
+	if (map != NULL)
+		return map__find_symbol(map, map->map_ip(map, addr), filter);
+
+	return NULL;
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 32aea3c..686d6e9 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -1,22 +1,56 @@
+#ifndef __PERF_THREAD_H
+#define __PERF_THREAD_H
+
 #include <linux/rbtree.h>
-#include <linux/list.h>
 #include <unistd.h>
 #include "symbol.h"
 
 struct thread {
 	struct rb_node		rb_node;
-	struct list_head	maps;
+	struct rb_root		maps[MAP__NR_TYPES];
+	struct list_head	removed_maps[MAP__NR_TYPES];
 	pid_t			pid;
+	bool			use_modules;
 	char			shortname[3];
 	char			*comm;
+	int			comm_len;
 };
 
+void thread__init(struct thread *self, pid_t pid);
 int thread__set_comm(struct thread *self, const char *comm);
-struct thread *
-threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match);
-struct thread *
-register_idle_thread(struct rb_root *threads, struct thread **last_match);
+int thread__comm_len(struct thread *self);
+struct thread *threads__findnew(pid_t pid);
+struct thread *register_idle_thread(void);
 void thread__insert_map(struct thread *self, struct map *map);
 int thread__fork(struct thread *self, struct thread *parent);
-struct map *thread__find_map(struct thread *self, u64 ip);
-size_t threads__fprintf(FILE *fp, struct rb_root *threads);
+size_t thread__fprintf_maps(struct thread *self, FILE *fp);
+size_t threads__fprintf(FILE *fp);
+
+void maps__insert(struct rb_root *maps, struct map *map);
+struct map *maps__find(struct rb_root *maps, u64 addr);
+
+static inline struct map *thread__find_map(struct thread *self,
+					   enum map_type type, u64 addr)
+{
+	return self ? maps__find(&self->maps[type], addr) : NULL;
+}
+
+static inline void __thread__insert_map(struct thread *self, struct map *map)
+{
+	 maps__insert(&self->maps[map->type], map);
+}
+
+void thread__find_addr_location(struct thread *self, u8 cpumode,
+				enum map_type type, u64 addr,
+				struct addr_location *al,
+				symbol_filter_t filter);
+struct symbol *thread__find_symbol(struct thread *self,
+				   enum map_type type, u64 addr,
+				   symbol_filter_t filter);
+
+static inline struct symbol *
+thread__find_function(struct thread *self, u64 addr, symbol_filter_t filter)
+{
+	return thread__find_symbol(self, MAP__FUNCTION, addr, filter);
+}
+#endif	/* __PERF_THREAD_H */
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index af4b057..cace355 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -33,11 +33,11 @@
 #include <ctype.h>
 #include <errno.h>
 #include <stdbool.h>
+#include <linux/kernel.h>
 
 #include "../perf.h"
 #include "trace-event.h"
 
-
 #define VERSION "0.5"
 
 #define _STR(x) #x
@@ -483,27 +483,33 @@
 get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
 {
 	struct tracepoint_path path, *ppath = &path;
-	int i;
+	int i, nr_tracepoints = 0;
 
 	for (i = 0; i < nb_events; i++) {
 		if (pattrs[i].type != PERF_TYPE_TRACEPOINT)
 			continue;
+		++nr_tracepoints;
 		ppath->next = tracepoint_id_to_path(pattrs[i].config);
 		if (!ppath->next)
 			die("%s\n", "No memory to alloc tracepoints list");
 		ppath = ppath->next;
 	}
 
-	return path.next;
+	return nr_tracepoints > 0 ? path.next : NULL;
 }
-void read_tracing_data(struct perf_event_attr *pattrs, int nb_events)
+
+int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
 {
 	char buf[BUFSIZ];
-	struct tracepoint_path *tps;
+	struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events);
 
-	output_fd = open(output_file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
-	if (output_fd < 0)
-		die("creating file '%s'", output_file);
+	/*
+	 * What? No tracepoints? No sense writing anything here, bail out.
+	 */
+	if (tps == NULL)
+		return -1;
+
+	output_fd = fd;
 
 	buf[0] = 23;
 	buf[1] = 8;
@@ -530,11 +536,11 @@
 	page_size = getpagesize();
 	write_or_die(&page_size, 4);
 
-	tps = get_tracepoints_path(pattrs, nb_events);
-
 	read_header_files();
 	read_ftrace_files(tps);
 	read_event_files(tps);
 	read_proc_kallsyms();
 	read_ftrace_printk();
+
+	return 0;
 }
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 55c9659..0302405 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -40,12 +40,19 @@
 int header_page_data_offset;
 int header_page_data_size;
 
+int latency_format;
+
 static char *input_buf;
 static unsigned long long input_buf_ptr;
 static unsigned long long input_buf_siz;
 
 static int cpus;
 static int long_size;
+static int is_flag_field;
+static int is_symbolic_field;
+
+static struct format_field *
+find_any_field(struct event *event, const char *name);
 
 static void init_input_buf(char *buf, unsigned long long size)
 {
@@ -284,18 +291,19 @@
 	char *line;
 	char *next = NULL;
 	char *addr_str;
-	int ret;
 	int i;
 
 	line = strtok_r(file, "\n", &next);
 	while (line) {
+		addr_str = strsep(&line, ":");
+		if (!line) {
+			warning("error parsing print strings");
+			break;
+		}
 		item = malloc_or_die(sizeof(*item));
-		ret = sscanf(line, "%as : %as",
-			     (float *)(void *)&addr_str, /* workaround gcc warning */
-			     (float *)(void *)&item->printk);
 		item->addr = strtoull(addr_str, NULL, 16);
-		free(addr_str);
-
+		/* fmt still has a space, skip it */
+		item->printk = strdup(line+1);
 		item->next = list;
 		list = item;
 		line = strtok_r(NULL, "\n", &next);
@@ -522,7 +530,10 @@
 			last_ch = ch;
 			ch = __read_char();
 			buf[i++] = ch;
-		} while (ch != quote_ch && last_ch != '\\');
+			/* the '\' '\' will cancel itself */
+			if (ch == '\\' && last_ch == '\\')
+				last_ch = 0;
+		} while (ch != quote_ch || last_ch == '\\');
 		/* remove the last quote */
 		i--;
 		goto out;
@@ -610,7 +621,7 @@
 static int test_type(enum event_type type, enum event_type expect)
 {
 	if (type != expect) {
-		die("Error: expected type %d but read %d",
+		warning("Error: expected type %d but read %d",
 		    expect, type);
 		return -1;
 	}
@@ -621,13 +632,13 @@
 		    enum event_type expect, const char *expect_tok)
 {
 	if (type != expect) {
-		die("Error: expected type %d but read %d",
+		warning("Error: expected type %d but read %d",
 		    expect, type);
 		return -1;
 	}
 
 	if (strcmp(token, expect_tok) != 0) {
-		die("Error: expected '%s' but read '%s'",
+		warning("Error: expected '%s' but read '%s'",
 		    expect_tok, token);
 		return -1;
 	}
@@ -665,7 +676,7 @@
 
 	free_token(token);
 
-	return 0;
+	return ret;
 }
 
 static int read_expected(enum event_type expect, const char *str)
@@ -682,10 +693,10 @@
 {
 	char *token;
 
-	if (read_expected(EVENT_ITEM, (char *)"name") < 0)
+	if (read_expected(EVENT_ITEM, "name") < 0)
 		return NULL;
 
-	if (read_expected(EVENT_OP, (char *)":") < 0)
+	if (read_expected(EVENT_OP, ":") < 0)
 		return NULL;
 
 	if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -703,10 +714,10 @@
 	char *token;
 	int id;
 
-	if (read_expected_item(EVENT_ITEM, (char *)"ID") < 0)
+	if (read_expected_item(EVENT_ITEM, "ID") < 0)
 		return -1;
 
-	if (read_expected(EVENT_OP, (char *)":") < 0)
+	if (read_expected(EVENT_OP, ":") < 0)
 		return -1;
 
 	if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -721,6 +732,24 @@
 	return -1;
 }
 
+static int field_is_string(struct format_field *field)
+{
+	if ((field->flags & FIELD_IS_ARRAY) &&
+	    (!strstr(field->type, "char") || !strstr(field->type, "u8") ||
+	     !strstr(field->type, "s8")))
+		return 1;
+
+	return 0;
+}
+
+static int field_is_dynamic(struct format_field *field)
+{
+	if (!strcmp(field->type, "__data_loc"))
+		return 1;
+
+	return 0;
+}
+
 static int event_read_fields(struct event *event, struct format_field **fields)
 {
 	struct format_field *field = NULL;
@@ -738,7 +767,7 @@
 
 		count++;
 
-		if (test_type_token(type, token, EVENT_ITEM, (char *)"field"))
+		if (test_type_token(type, token, EVENT_ITEM, "field"))
 			goto fail;
 		free_token(token);
 
@@ -753,7 +782,7 @@
 			type = read_token(&token);
 		}
 
-		if (test_type_token(type, token, EVENT_OP, (char *)":") < 0)
+		if (test_type_token(type, token, EVENT_OP, ":") < 0)
 			return -1;
 
 		if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -865,14 +894,20 @@
 			free(brackets);
 		}
 
-		if (test_type_token(type, token,  EVENT_OP, (char *)";"))
+		if (field_is_string(field)) {
+			field->flags |= FIELD_IS_STRING;
+			if (field_is_dynamic(field))
+				field->flags |= FIELD_IS_DYNAMIC;
+		}
+
+		if (test_type_token(type, token,  EVENT_OP, ";"))
 			goto fail;
 		free_token(token);
 
-		if (read_expected(EVENT_ITEM, (char *)"offset") < 0)
+		if (read_expected(EVENT_ITEM, "offset") < 0)
 			goto fail_expect;
 
-		if (read_expected(EVENT_OP, (char *)":") < 0)
+		if (read_expected(EVENT_OP, ":") < 0)
 			goto fail_expect;
 
 		if (read_expect_type(EVENT_ITEM, &token))
@@ -880,13 +915,13 @@
 		field->offset = strtoul(token, NULL, 0);
 		free_token(token);
 
-		if (read_expected(EVENT_OP, (char *)";") < 0)
+		if (read_expected(EVENT_OP, ";") < 0)
 			goto fail_expect;
 
-		if (read_expected(EVENT_ITEM, (char *)"size") < 0)
+		if (read_expected(EVENT_ITEM, "size") < 0)
 			goto fail_expect;
 
-		if (read_expected(EVENT_OP, (char *)":") < 0)
+		if (read_expected(EVENT_OP, ":") < 0)
 			goto fail_expect;
 
 		if (read_expect_type(EVENT_ITEM, &token))
@@ -894,11 +929,34 @@
 		field->size = strtoul(token, NULL, 0);
 		free_token(token);
 
-		if (read_expected(EVENT_OP, (char *)";") < 0)
+		if (read_expected(EVENT_OP, ";") < 0)
 			goto fail_expect;
 
-		if (read_expect_type(EVENT_NEWLINE, &token) < 0)
-			goto fail;
+		type = read_token(&token);
+		if (type != EVENT_NEWLINE) {
+			/* newer versions of the kernel have a "signed" type */
+			if (test_type_token(type, token, EVENT_ITEM, "signed"))
+				goto fail;
+
+			free_token(token);
+
+			if (read_expected(EVENT_OP, ":") < 0)
+				goto fail_expect;
+
+			if (read_expect_type(EVENT_ITEM, &token))
+				goto fail;
+
+			if (strtoul(token, NULL, 0))
+				field->flags |= FIELD_IS_SIGNED;
+
+			free_token(token);
+			if (read_expected(EVENT_OP, ";") < 0)
+				goto fail_expect;
+
+			if (read_expect_type(EVENT_NEWLINE, &token))
+				goto fail;
+		}
+
 		free_token(token);
 
 		*fields = field;
@@ -921,10 +979,10 @@
 	char *token;
 	int ret;
 
-	if (read_expected_item(EVENT_ITEM, (char *)"format") < 0)
+	if (read_expected_item(EVENT_ITEM, "format") < 0)
 		return -1;
 
-	if (read_expected(EVENT_OP, (char *)":") < 0)
+	if (read_expected(EVENT_OP, ":") < 0)
 		return -1;
 
 	if (read_expect_type(EVENT_NEWLINE, &token))
@@ -984,7 +1042,7 @@
 
 	*tok = NULL;
 	type = process_arg(event, left, &token);
-	if (test_type_token(type, token, EVENT_OP, (char *)":"))
+	if (test_type_token(type, token, EVENT_OP, ":"))
 		goto out_free;
 
 	arg->op.op = token;
@@ -1004,6 +1062,35 @@
 	return EVENT_ERROR;
 }
 
+static enum event_type
+process_array(struct event *event, struct print_arg *top, char **tok)
+{
+	struct print_arg *arg;
+	enum event_type type;
+	char *token = NULL;
+
+	arg = malloc_or_die(sizeof(*arg));
+	memset(arg, 0, sizeof(*arg));
+
+	*tok = NULL;
+	type = process_arg(event, arg, &token);
+	if (test_type_token(type, token, EVENT_OP, "]"))
+		goto out_free;
+
+	top->op.right = arg;
+
+	free_token(token);
+	type = read_token_item(&token);
+	*tok = token;
+
+	return type;
+
+out_free:
+	free_token(*tok);
+	free_arg(arg);
+	return EVENT_ERROR;
+}
+
 static int get_op_prio(char *op)
 {
 	if (!op[1]) {
@@ -1128,6 +1215,8 @@
 		   strcmp(token, "*") == 0 ||
 		   strcmp(token, "^") == 0 ||
 		   strcmp(token, "/") == 0 ||
+		   strcmp(token, "<") == 0 ||
+		   strcmp(token, ">") == 0 ||
 		   strcmp(token, "==") == 0 ||
 		   strcmp(token, "!=") == 0) {
 
@@ -1144,17 +1233,46 @@
 
 		right = malloc_or_die(sizeof(*right));
 
-		type = process_arg(event, right, tok);
+		type = read_token_item(&token);
+		*tok = token;
+
+		/* could just be a type pointer */
+		if ((strcmp(arg->op.op, "*") == 0) &&
+		    type == EVENT_DELIM && (strcmp(token, ")") == 0)) {
+			if (left->type != PRINT_ATOM)
+				die("bad pointer type");
+			left->atom.atom = realloc(left->atom.atom,
+					    sizeof(left->atom.atom) + 3);
+			strcat(left->atom.atom, " *");
+			*arg = *left;
+			free(arg);
+
+			return type;
+		}
+
+		type = process_arg_token(event, right, tok, type);
 
 		arg->op.right = right;
 
+	} else if (strcmp(token, "[") == 0) {
+
+		left = malloc_or_die(sizeof(*left));
+		*left = *arg;
+
+		arg->type = PRINT_OP;
+		arg->op.op = token;
+		arg->op.left = left;
+
+		arg->op.prio = 0;
+		type = process_array(event, arg, tok);
+
 	} else {
-		die("unknown op '%s'", token);
+		warning("unknown op '%s'", token);
+		event->flags |= EVENT_FL_FAILED;
 		/* the arg is now the left side */
 		return EVENT_NONE;
 	}
 
-
 	if (type == EVENT_OP) {
 		int prio;
 
@@ -1178,7 +1296,7 @@
 	char *field;
 	char *token;
 
-	if (read_expected(EVENT_OP, (char *)"->") < 0)
+	if (read_expected(EVENT_OP, "->") < 0)
 		return EVENT_ERROR;
 
 	if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -1188,6 +1306,16 @@
 	arg->type = PRINT_FIELD;
 	arg->field.name = field;
 
+	if (is_flag_field) {
+		arg->field.field = find_any_field(event, arg->field.name);
+		arg->field.field->flags |= FIELD_IS_FLAG;
+		is_flag_field = 0;
+	} else if (is_symbolic_field) {
+		arg->field.field = find_any_field(event, arg->field.name);
+		arg->field.field->flags |= FIELD_IS_SYMBOLIC;
+		is_symbolic_field = 0;
+	}
+
 	type = read_token(&token);
 	*tok = token;
 
@@ -1338,14 +1466,14 @@
 	do {
 		free_token(token);
 		type = read_token_item(&token);
-		if (test_type_token(type, token, EVENT_OP, (char *)"{"))
+		if (test_type_token(type, token, EVENT_OP, "{"))
 			break;
 
 		arg = malloc_or_die(sizeof(*arg));
 
 		free_token(token);
 		type = process_arg(event, arg, &token);
-		if (test_type_token(type, token, EVENT_DELIM, (char *)","))
+		if (test_type_token(type, token, EVENT_DELIM, ","))
 			goto out_free;
 
 		field = malloc_or_die(sizeof(*field));
@@ -1356,7 +1484,7 @@
 
 		free_token(token);
 		type = process_arg(event, arg, &token);
-		if (test_type_token(type, token, EVENT_OP, (char *)"}"))
+		if (test_type_token(type, token, EVENT_OP, "}"))
 			goto out_free;
 
 		value = arg_eval(arg);
@@ -1391,13 +1519,13 @@
 	memset(arg, 0, sizeof(*arg));
 	arg->type = PRINT_FLAGS;
 
-	if (read_expected_item(EVENT_DELIM, (char *)"(") < 0)
+	if (read_expected_item(EVENT_DELIM, "(") < 0)
 		return EVENT_ERROR;
 
 	field = malloc_or_die(sizeof(*field));
 
 	type = process_arg(event, field, &token);
-	if (test_type_token(type, token, EVENT_DELIM, (char *)","))
+	if (test_type_token(type, token, EVENT_DELIM, ","))
 		goto out_free;
 
 	arg->flags.field = field;
@@ -1408,11 +1536,11 @@
 		type = read_token_item(&token);
 	}
 
-	if (test_type_token(type, token, EVENT_DELIM, (char *)","))
+	if (test_type_token(type, token, EVENT_DELIM, ","))
 		goto out_free;
 
 	type = process_fields(event, &arg->flags.flags, &token);
-	if (test_type_token(type, token, EVENT_DELIM, (char *)")"))
+	if (test_type_token(type, token, EVENT_DELIM, ")"))
 		goto out_free;
 
 	free_token(token);
@@ -1434,19 +1562,19 @@
 	memset(arg, 0, sizeof(*arg));
 	arg->type = PRINT_SYMBOL;
 
-	if (read_expected_item(EVENT_DELIM, (char *)"(") < 0)
+	if (read_expected_item(EVENT_DELIM, "(") < 0)
 		return EVENT_ERROR;
 
 	field = malloc_or_die(sizeof(*field));
 
 	type = process_arg(event, field, &token);
-	if (test_type_token(type, token, EVENT_DELIM, (char *)","))
+	if (test_type_token(type, token, EVENT_DELIM, ","))
 		goto out_free;
 
 	arg->symbol.field = field;
 
 	type = process_fields(event, &arg->symbol.symbols, &token);
-	if (test_type_token(type, token, EVENT_DELIM, (char *)")"))
+	if (test_type_token(type, token, EVENT_DELIM, ")"))
 		goto out_free;
 
 	free_token(token);
@@ -1463,7 +1591,6 @@
 {
 	struct print_arg *item_arg;
 	enum event_type type;
-	int ptr_cast = 0;
 	char *token;
 
 	type = process_arg(event, arg, &token);
@@ -1471,28 +1598,13 @@
 	if (type == EVENT_ERROR)
 		return EVENT_ERROR;
 
-	if (type == EVENT_OP) {
-		/* handle the ptr casts */
-		if (!strcmp(token, "*")) {
-			/*
-			 * FIXME: should we zapp whitespaces before ')' ?
-			 * (may require a peek_token_item())
-			 */
-			if (__peek_char() == ')') {
-				ptr_cast = 1;
-				free_token(token);
-				type = read_token_item(&token);
-			}
-		}
-		if (!ptr_cast) {
-			type = process_op(event, arg, &token);
+	if (type == EVENT_OP)
+		type = process_op(event, arg, &token);
 
-			if (type == EVENT_ERROR)
-				return EVENT_ERROR;
-		}
-	}
+	if (type == EVENT_ERROR)
+		return EVENT_ERROR;
 
-	if (test_type_token(type, token, EVENT_DELIM, (char *)")")) {
+	if (test_type_token(type, token, EVENT_DELIM, ")")) {
 		free_token(token);
 		return EVENT_ERROR;
 	}
@@ -1516,13 +1628,6 @@
 		item_arg = malloc_or_die(sizeof(*item_arg));
 
 		arg->type = PRINT_TYPE;
-		if (ptr_cast) {
-			char *old = arg->atom.atom;
-
-			arg->atom.atom = malloc_or_die(strlen(old + 3));
-			sprintf(arg->atom.atom, "%s *", old);
-			free(old);
-		}
 		arg->typecast.type = arg->atom.atom;
 		arg->typecast.item = item_arg;
 		type = process_arg_token(event, item_arg, &token, type);
@@ -1540,7 +1645,7 @@
 	enum event_type type;
 	char *token;
 
-	if (read_expected(EVENT_DELIM, (char *)"(") < 0)
+	if (read_expected(EVENT_DELIM, "(") < 0)
 		return EVENT_ERROR;
 
 	if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -1550,7 +1655,7 @@
 	arg->string.string = token;
 	arg->string.offset = -1;
 
-	if (read_expected(EVENT_DELIM, (char *)")") < 0)
+	if (read_expected(EVENT_DELIM, ")") < 0)
 		return EVENT_ERROR;
 
 	type = read_token(&token);
@@ -1578,9 +1683,11 @@
 			type = process_entry(event, arg, &token);
 		} else if (strcmp(token, "__print_flags") == 0) {
 			free_token(token);
+			is_flag_field = 1;
 			type = process_flags(event, arg, &token);
 		} else if (strcmp(token, "__print_symbolic") == 0) {
 			free_token(token);
+			is_symbolic_field = 1;
 			type = process_symbols(event, arg, &token);
 		} else if (strcmp(token, "__get_str") == 0) {
 			free_token(token);
@@ -1637,12 +1744,18 @@
 
 static int event_read_print_args(struct event *event, struct print_arg **list)
 {
-	enum event_type type;
+	enum event_type type = EVENT_ERROR;
 	struct print_arg *arg;
 	char *token;
 	int args = 0;
 
 	do {
+		if (type == EVENT_NEWLINE) {
+			free_token(token);
+			type = read_token_item(&token);
+			continue;
+		}
+
 		arg = malloc_or_die(sizeof(*arg));
 		memset(arg, 0, sizeof(*arg));
 
@@ -1683,18 +1796,19 @@
 	char *token;
 	int ret;
 
-	if (read_expected_item(EVENT_ITEM, (char *)"print") < 0)
+	if (read_expected_item(EVENT_ITEM, "print") < 0)
 		return -1;
 
-	if (read_expected(EVENT_ITEM, (char *)"fmt") < 0)
+	if (read_expected(EVENT_ITEM, "fmt") < 0)
 		return -1;
 
-	if (read_expected(EVENT_OP, (char *)":") < 0)
+	if (read_expected(EVENT_OP, ":") < 0)
 		return -1;
 
 	if (read_expect_type(EVENT_DQUOTE, &token) < 0)
 		goto fail;
 
+ concat:
 	event->print_fmt.format = token;
 	event->print_fmt.args = NULL;
 
@@ -1704,7 +1818,22 @@
 	if (type == EVENT_NONE)
 		return 0;
 
-	if (test_type_token(type, token, EVENT_DELIM, (char *)","))
+	/* Handle concatination of print lines */
+	if (type == EVENT_DQUOTE) {
+		char *cat;
+
+		cat = malloc_or_die(strlen(event->print_fmt.format) +
+				    strlen(token) + 1);
+		strcpy(cat, event->print_fmt.format);
+		strcat(cat, token);
+		free_token(token);
+		free_token(event->print_fmt.format);
+		event->print_fmt.format = NULL;
+		token = cat;
+		goto concat;
+	}
+
+	if (test_type_token(type, token, EVENT_DELIM, ","))
 		goto fail;
 
 	free_token(token);
@@ -1713,7 +1842,7 @@
 	if (ret < 0)
 		return -1;
 
-	return 0;
+	return ret;
 
  fail:
 	free_token(token);
@@ -1759,7 +1888,7 @@
 	return find_field(event, name);
 }
 
-static unsigned long long read_size(void *ptr, int size)
+unsigned long long read_size(void *ptr, int size)
 {
 	switch (size) {
 	case 1:
@@ -1822,37 +1951,67 @@
 	return 0;
 }
 
+static int __parse_common(void *data, int *size, int *offset,
+			  const char *name)
+{
+	int ret;
+
+	if (!*size) {
+		ret = get_common_info(name, offset, size);
+		if (ret < 0)
+			return ret;
+	}
+	return read_size(data + *offset, *size);
+}
+
 int trace_parse_common_type(void *data)
 {
 	static int type_offset;
 	static int type_size;
-	int ret;
 
-	if (!type_size) {
-		ret = get_common_info("common_type",
-				      &type_offset,
-				      &type_size);
-		if (ret < 0)
-			return ret;
-	}
-	return read_size(data + type_offset, type_size);
+	return __parse_common(data, &type_size, &type_offset,
+			      "common_type");
 }
 
-static int parse_common_pid(void *data)
+int trace_parse_common_pid(void *data)
 {
 	static int pid_offset;
 	static int pid_size;
+
+	return __parse_common(data, &pid_size, &pid_offset,
+			      "common_pid");
+}
+
+int parse_common_pc(void *data)
+{
+	static int pc_offset;
+	static int pc_size;
+
+	return __parse_common(data, &pc_size, &pc_offset,
+			      "common_preempt_count");
+}
+
+int parse_common_flags(void *data)
+{
+	static int flags_offset;
+	static int flags_size;
+
+	return __parse_common(data, &flags_size, &flags_offset,
+			      "common_flags");
+}
+
+int parse_common_lock_depth(void *data)
+{
+	static int ld_offset;
+	static int ld_size;
 	int ret;
 
-	if (!pid_size) {
-		ret = get_common_info("common_pid",
-				      &pid_offset,
-				      &pid_size);
-		if (ret < 0)
-			return ret;
-	}
+	ret = __parse_common(data, &ld_size, &ld_offset,
+			     "common_lock_depth");
+	if (ret < 0)
+		return -1;
 
-	return read_size(data + pid_offset, pid_size);
+	return ret;
 }
 
 struct event *trace_find_event(int id)
@@ -1866,11 +2025,20 @@
 	return event;
 }
 
+struct event *trace_find_next_event(struct event *event)
+{
+	if (!event)
+		return event_list;
+
+	return event->next;
+}
+
 static unsigned long long eval_num_arg(void *data, int size,
 				   struct event *event, struct print_arg *arg)
 {
 	unsigned long long val = 0;
 	unsigned long long left, right;
+	struct print_arg *larg;
 
 	switch (arg->type) {
 	case PRINT_NULL:
@@ -1897,6 +2065,26 @@
 		return 0;
 		break;
 	case PRINT_OP:
+		if (strcmp(arg->op.op, "[") == 0) {
+			/*
+			 * Arrays are special, since we don't want
+			 * to read the arg as is.
+			 */
+			if (arg->op.left->type != PRINT_FIELD)
+				goto default_op; /* oops, all bets off */
+			larg = arg->op.left;
+			if (!larg->field.field) {
+				larg->field.field =
+					find_any_field(event, larg->field.name);
+				if (!larg->field.field)
+					die("field %s not found", larg->field.name);
+			}
+			right = eval_num_arg(data, size, event, arg->op.right);
+			val = read_size(data + larg->field.field->offset +
+					right * long_size, long_size);
+			break;
+		}
+ default_op:
 		left = eval_num_arg(data, size, event, arg->op.left);
 		right = eval_num_arg(data, size, event, arg->op.right);
 		switch (arg->op.op[0]) {
@@ -1947,6 +2135,12 @@
 				die("unknown op '%s'", arg->op.op);
 			val = left == right;
 			break;
+		case '-':
+			val = left - right;
+			break;
+		case '+':
+			val = left + right;
+			break;
 		default:
 			die("unknown op '%s'", arg->op.op);
 		}
@@ -1978,7 +2172,7 @@
 	{ "HRTIMER_RESTART", 1 },
 };
 
-static unsigned long long eval_flag(const char *flag)
+unsigned long long eval_flag(const char *flag)
 {
 	int i;
 
@@ -2145,8 +2339,9 @@
 			case 'u':
 			case 'x':
 			case 'i':
-				bptr = (void *)(((unsigned long)bptr + (long_size - 1)) &
-						~(long_size - 1));
+				/* the pointers are always 4 bytes aligned */
+				bptr = (void *)(((unsigned long)bptr + 3) &
+						~3);
 				switch (ls) {
 				case 0:
 				case 1:
@@ -2270,7 +2465,27 @@
 
 	for (; *ptr; ptr++) {
 		ls = 0;
-		if (*ptr == '%') {
+		if (*ptr == '\\') {
+			ptr++;
+			switch (*ptr) {
+			case 'n':
+				printf("\n");
+				break;
+			case 't':
+				printf("\t");
+				break;
+			case 'r':
+				printf("\r");
+				break;
+			case '\\':
+				printf("\\");
+				break;
+			default:
+				printf("%c", *ptr);
+				break;
+			}
+
+		} else if (*ptr == '%') {
 			saveptr = ptr;
 			show_func = 0;
  cont_process:
@@ -2377,6 +2592,41 @@
 	return 1;
 }
 
+static void print_lat_fmt(void *data, int size __unused)
+{
+	unsigned int lat_flags;
+	unsigned int pc;
+	int lock_depth;
+	int hardirq;
+	int softirq;
+
+	lat_flags = parse_common_flags(data);
+	pc = parse_common_pc(data);
+	lock_depth = parse_common_lock_depth(data);
+
+	hardirq = lat_flags & TRACE_FLAG_HARDIRQ;
+	softirq = lat_flags & TRACE_FLAG_SOFTIRQ;
+
+	printf("%c%c%c",
+	       (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+	       (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
+	       'X' : '.',
+	       (lat_flags & TRACE_FLAG_NEED_RESCHED) ?
+	       'N' : '.',
+	       (hardirq && softirq) ? 'H' :
+	       hardirq ? 'h' : softirq ? 's' : '.');
+
+	if (pc)
+		printf("%x", pc);
+	else
+		printf(".");
+
+	if (lock_depth < 0)
+		printf(".");
+	else
+		printf("%d", lock_depth);
+}
+
 /* taken from Linux, written by Frederic Weisbecker */
 static void print_graph_cpu(int cpu)
 {
@@ -2452,7 +2702,7 @@
 	if (!(event->flags & EVENT_FL_ISFUNCRET))
 		return NULL;
 
-	pid = parse_common_pid(next->data);
+	pid = trace_parse_common_pid(next->data);
 	field = find_field(event, "func");
 	if (!field)
 		die("function return does not have field func");
@@ -2620,6 +2870,11 @@
 
 	printf(" | ");
 
+	if (latency_format) {
+		print_lat_fmt(data, size);
+		printf(" | ");
+	}
+
 	field = find_field(event, "func");
 	if (!field)
 		die("function entry does not have func field");
@@ -2663,6 +2918,11 @@
 
 	printf(" | ");
 
+	if (latency_format) {
+		print_lat_fmt(data, size);
+		printf(" | ");
+	}
+
 	field = find_field(event, "rettime");
 	if (!field)
 		die("can't find rettime in return graph");
@@ -2724,19 +2984,30 @@
 
 	event = trace_find_event(type);
 	if (!event) {
-		printf("ug! no event found for type %d\n", type);
+		warning("ug! no event found for type %d", type);
 		return;
 	}
 
-	pid = parse_common_pid(data);
+	pid = trace_parse_common_pid(data);
 
 	if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET))
 		return pretty_print_func_graph(data, size, event, cpu,
 					       pid, comm, secs, usecs);
 
-	printf("%16s-%-5d [%03d] %5lu.%09Lu: %s: ",
-	       comm, pid,  cpu,
-	       secs, nsecs, event->name);
+	if (latency_format) {
+		printf("%8.8s-%-5d %3d",
+		       comm, pid, cpu);
+		print_lat_fmt(data, size);
+	} else
+		printf("%16s-%-5d [%03d]", comm, pid,  cpu);
+
+	printf(" %5lu.%06lu: %s: ", secs, usecs, event->name);
+
+	if (event->flags & EVENT_FL_FAILED) {
+		printf("EVENT '%s' FAILED TO PARSE\n",
+		       event->name);
+		return;
+	}
 
 	pretty_print(data, size, event);
 	printf("\n");
@@ -2807,46 +3078,71 @@
 	}
 }
 
-static void parse_header_field(char *type,
+static void parse_header_field(const char *field,
 			       int *offset, int *size)
 {
 	char *token;
+	int type;
 
-	if (read_expected(EVENT_ITEM, (char *)"field") < 0)
+	if (read_expected(EVENT_ITEM, "field") < 0)
 		return;
-	if (read_expected(EVENT_OP, (char *)":") < 0)
+	if (read_expected(EVENT_OP, ":") < 0)
 		return;
+
 	/* type */
 	if (read_expect_type(EVENT_ITEM, &token) < 0)
-		return;
+		goto fail;
 	free_token(token);
 
-	if (read_expected(EVENT_ITEM, type) < 0)
+	if (read_expected(EVENT_ITEM, field) < 0)
 		return;
-	if (read_expected(EVENT_OP, (char *)";") < 0)
+	if (read_expected(EVENT_OP, ";") < 0)
 		return;
-	if (read_expected(EVENT_ITEM, (char *)"offset") < 0)
+	if (read_expected(EVENT_ITEM, "offset") < 0)
 		return;
-	if (read_expected(EVENT_OP, (char *)":") < 0)
+	if (read_expected(EVENT_OP, ":") < 0)
 		return;
 	if (read_expect_type(EVENT_ITEM, &token) < 0)
-		return;
+		goto fail;
 	*offset = atoi(token);
 	free_token(token);
-	if (read_expected(EVENT_OP, (char *)";") < 0)
+	if (read_expected(EVENT_OP, ";") < 0)
 		return;
-	if (read_expected(EVENT_ITEM, (char *)"size") < 0)
+	if (read_expected(EVENT_ITEM, "size") < 0)
 		return;
-	if (read_expected(EVENT_OP, (char *)":") < 0)
+	if (read_expected(EVENT_OP, ":") < 0)
 		return;
 	if (read_expect_type(EVENT_ITEM, &token) < 0)
-		return;
+		goto fail;
 	*size = atoi(token);
 	free_token(token);
-	if (read_expected(EVENT_OP, (char *)";") < 0)
+	if (read_expected(EVENT_OP, ";") < 0)
 		return;
-	if (read_expect_type(EVENT_NEWLINE, &token) < 0)
-		return;
+	type = read_token(&token);
+	if (type != EVENT_NEWLINE) {
+		/* newer versions of the kernel have a "signed" type */
+		if (type != EVENT_ITEM)
+			goto fail;
+
+		if (strcmp(token, "signed") != 0)
+			goto fail;
+
+		free_token(token);
+
+		if (read_expected(EVENT_OP, ":") < 0)
+			return;
+
+		if (read_expect_type(EVENT_ITEM, &token))
+			goto fail;
+
+		free_token(token);
+		if (read_expected(EVENT_OP, ";") < 0)
+			return;
+
+		if (read_expect_type(EVENT_NEWLINE, &token))
+			goto fail;
+	}
+ fail:
 	free_token(token);
 }
 
@@ -2854,11 +3150,11 @@
 {
 	init_input_buf(buf, size);
 
-	parse_header_field((char *)"timestamp", &header_page_ts_offset,
+	parse_header_field("timestamp", &header_page_ts_offset,
 			   &header_page_ts_size);
-	parse_header_field((char *)"commit", &header_page_size_offset,
+	parse_header_field("commit", &header_page_size_offset,
 			   &header_page_size_size);
-	parse_header_field((char *)"data", &header_page_data_offset,
+	parse_header_field("data", &header_page_data_offset,
 			   &header_page_data_size);
 
 	return 0;
@@ -2909,6 +3205,9 @@
 	if (ret < 0)
 		die("failed to read ftrace event print fmt");
 
+	/* New ftrace handles args */
+	if (ret > 0)
+		return 0;
 	/*
 	 * The arguments for ftrace files are parsed by the fields.
 	 * Set up the fields as their arguments.
@@ -2926,7 +3225,7 @@
 	return 0;
 }
 
-int parse_event_file(char *buf, unsigned long size, char *system__unused __unused)
+int parse_event_file(char *buf, unsigned long size, char *sys)
 {
 	struct event *event;
 	int ret;
@@ -2946,12 +3245,18 @@
 		die("failed to read event id");
 
 	ret = event_read_format(event);
-	if (ret < 0)
-		die("failed to read event format");
+	if (ret < 0) {
+		warning("failed to read event format for %s", event->name);
+		goto event_failed;
+	}
 
 	ret = event_read_print(event);
-	if (ret < 0)
-		die("failed to read event print fmt");
+	if (ret < 0) {
+		warning("failed to read event print fmt for %s", event->name);
+		goto event_failed;
+	}
+
+	event->system = strdup(sys);
 
 #define PRINT_ARGS 0
 	if (PRINT_ARGS && event->print_fmt.args)
@@ -2959,6 +3264,12 @@
 
 	add_event(event);
 	return 0;
+
+ event_failed:
+	event->flags |= EVENT_FL_FAILED;
+	/* still add it even if it failed */
+	add_event(event);
+	return -1;
 }
 
 void parse_set_info(int nr_cpus, int long_sz)
diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c
new file mode 100644
index 0000000..51e833f
--- /dev/null
+++ b/tools/perf/util/trace-event-perl.c
@@ -0,0 +1,598 @@
+/*
+ * trace-event-perl.  Feed perf trace events to an embedded Perl interpreter.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "trace-event.h"
+#include "trace-event-perl.h"
+
+void xs_init(pTHX);
+
+void boot_Perf__Trace__Context(pTHX_ CV *cv);
+void boot_DynaLoader(pTHX_ CV *cv);
+
+void xs_init(pTHX)
+{
+	const char *file = __FILE__;
+	dXSUB_SYS;
+
+	newXS("Perf::Trace::Context::bootstrap", boot_Perf__Trace__Context,
+	      file);
+	newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file);
+}
+
+INTERP my_perl;
+
+#define FTRACE_MAX_EVENT				\
+	((1 << (sizeof(unsigned short) * 8)) - 1)
+
+struct event *events[FTRACE_MAX_EVENT];
+
+static struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static void define_symbolic_value(const char *ev_name,
+				  const char *field_name,
+				  const char *field_value,
+				  const char *field_str)
+{
+	unsigned long long value;
+	dSP;
+
+	value = eval_flag(field_value);
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+	XPUSHs(sv_2mortal(newSVuv(value)));
+	XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_symbolic_value", 0))
+		call_pv("main::define_symbolic_value", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_symbolic_values(struct print_flag_sym *field,
+				   const char *ev_name,
+				   const char *field_name)
+{
+	define_symbolic_value(ev_name, field_name, field->value, field->str);
+	if (field->next)
+		define_symbolic_values(field->next, ev_name, field_name);
+}
+
+static void define_symbolic_field(const char *ev_name,
+				  const char *field_name)
+{
+	dSP;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_symbolic_field", 0))
+		call_pv("main::define_symbolic_field", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_flag_value(const char *ev_name,
+			      const char *field_name,
+			      const char *field_value,
+			      const char *field_str)
+{
+	unsigned long long value;
+	dSP;
+
+	value = eval_flag(field_value);
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+	XPUSHs(sv_2mortal(newSVuv(value)));
+	XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_flag_value", 0))
+		call_pv("main::define_flag_value", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_flag_values(struct print_flag_sym *field,
+			       const char *ev_name,
+			       const char *field_name)
+{
+	define_flag_value(ev_name, field_name, field->value, field->str);
+	if (field->next)
+		define_flag_values(field->next, ev_name, field_name);
+}
+
+static void define_flag_field(const char *ev_name,
+			      const char *field_name,
+			      const char *delim)
+{
+	dSP;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(delim, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_flag_field", 0))
+		call_pv("main::define_flag_field", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_event_symbols(struct event *event,
+				 const char *ev_name,
+				 struct print_arg *args)
+{
+	switch (args->type) {
+	case PRINT_NULL:
+		break;
+	case PRINT_ATOM:
+		define_flag_value(ev_name, cur_field_name, "0",
+				  args->atom.atom);
+		zero_flag_atom = 0;
+		break;
+	case PRINT_FIELD:
+		if (cur_field_name)
+			free(cur_field_name);
+		cur_field_name = strdup(args->field.name);
+		break;
+	case PRINT_FLAGS:
+		define_event_symbols(event, ev_name, args->flags.field);
+		define_flag_field(ev_name, cur_field_name, args->flags.delim);
+		define_flag_values(args->flags.flags, ev_name, cur_field_name);
+		break;
+	case PRINT_SYMBOL:
+		define_event_symbols(event, ev_name, args->symbol.field);
+		define_symbolic_field(ev_name, cur_field_name);
+		define_symbolic_values(args->symbol.symbols, ev_name,
+				       cur_field_name);
+		break;
+	case PRINT_STRING:
+		break;
+	case PRINT_TYPE:
+		define_event_symbols(event, ev_name, args->typecast.item);
+		break;
+	case PRINT_OP:
+		if (strcmp(args->op.op, ":") == 0)
+			zero_flag_atom = 1;
+		define_event_symbols(event, ev_name, args->op.left);
+		define_event_symbols(event, ev_name, args->op.right);
+		break;
+	default:
+		/* we should warn... */
+		return;
+	}
+
+	if (args->next)
+		define_event_symbols(event, ev_name, args->next);
+}
+
+static inline struct event *find_cache_event(int type)
+{
+	static char ev_name[256];
+	struct event *event;
+
+	if (events[type])
+		return events[type];
+
+	events[type] = event = trace_find_event(type);
+	if (!event)
+		return NULL;
+
+	sprintf(ev_name, "%s::%s", event->system, event->name);
+
+	define_event_symbols(event, ev_name, event->print_fmt.args);
+
+	return event;
+}
+
+int common_pc(struct scripting_context *context)
+{
+	int pc;
+
+	pc = parse_common_pc(context->event_data);
+
+	return pc;
+}
+
+int common_flags(struct scripting_context *context)
+{
+	int flags;
+
+	flags = parse_common_flags(context->event_data);
+
+	return flags;
+}
+
+int common_lock_depth(struct scripting_context *context)
+{
+	int lock_depth;
+
+	lock_depth = parse_common_lock_depth(context->event_data);
+
+	return lock_depth;
+}
+
+static void perl_process_event(int cpu, void *data,
+			       int size __attribute((unused)),
+			       unsigned long long nsecs, char *comm)
+{
+	struct format_field *field;
+	static char handler[256];
+	unsigned long long val;
+	unsigned long s, ns;
+	struct event *event;
+	int type;
+	int pid;
+
+	dSP;
+
+	type = trace_parse_common_type(data);
+
+	event = find_cache_event(type);
+	if (!event)
+		die("ug! no event found for type %d", type);
+
+	pid = trace_parse_common_pid(data);
+
+	sprintf(handler, "%s::%s", event->system, event->name);
+
+	s = nsecs / NSECS_PER_SEC;
+	ns = nsecs - s * NSECS_PER_SEC;
+
+	scripting_context->event_data = data;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+	XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+	XPUSHs(sv_2mortal(newSVuv(cpu)));
+	XPUSHs(sv_2mortal(newSVuv(s)));
+	XPUSHs(sv_2mortal(newSVuv(ns)));
+	XPUSHs(sv_2mortal(newSViv(pid)));
+	XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+
+	/* common fields other than pid can be accessed via xsub fns */
+
+	for (field = event->format.fields; field; field = field->next) {
+		if (field->flags & FIELD_IS_STRING) {
+			int offset;
+			if (field->flags & FIELD_IS_DYNAMIC) {
+				offset = *(int *)(data + field->offset);
+				offset &= 0xffff;
+			} else
+				offset = field->offset;
+			XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
+		} else { /* FIELD_IS_NUMERIC */
+			val = read_size(data + field->offset, field->size);
+			if (field->flags & FIELD_IS_SIGNED) {
+				XPUSHs(sv_2mortal(newSViv(val)));
+			} else {
+				XPUSHs(sv_2mortal(newSVuv(val)));
+			}
+		}
+	}
+
+	PUTBACK;
+
+	if (get_cv(handler, 0))
+		call_pv(handler, G_SCALAR);
+	else if (get_cv("main::trace_unhandled", 0)) {
+		XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+		XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+		XPUSHs(sv_2mortal(newSVuv(cpu)));
+		XPUSHs(sv_2mortal(newSVuv(nsecs)));
+		XPUSHs(sv_2mortal(newSViv(pid)));
+		XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+		call_pv("main::trace_unhandled", G_SCALAR);
+	}
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void run_start_sub(void)
+{
+	dSP; /* access to Perl stack */
+	PUSHMARK(SP);
+
+	if (get_cv("main::trace_begin", 0))
+		call_pv("main::trace_begin", G_DISCARD | G_NOARGS);
+}
+
+/*
+ * Start trace script
+ */
+static int perl_start_script(const char *script)
+{
+	const char *command_line[2] = { "", NULL };
+
+	command_line[1] = script;
+
+	my_perl = perl_alloc();
+	perl_construct(my_perl);
+
+	if (perl_parse(my_perl, xs_init, 2, (char **)command_line,
+		       (char **)NULL))
+		return -1;
+
+	perl_run(my_perl);
+	if (SvTRUE(ERRSV))
+		return -1;
+
+	run_start_sub();
+
+	fprintf(stderr, "perf trace started with Perl script %s\n\n", script);
+
+	return 0;
+}
+
+/*
+ * Stop trace script
+ */
+static int perl_stop_script(void)
+{
+	dSP; /* access to Perl stack */
+	PUSHMARK(SP);
+
+	if (get_cv("main::trace_end", 0))
+		call_pv("main::trace_end", G_DISCARD | G_NOARGS);
+
+	perl_destruct(my_perl);
+	perl_free(my_perl);
+
+	fprintf(stderr, "\nperf trace Perl script stopped\n");
+
+	return 0;
+}
+
+static int perl_generate_script(const char *outfile)
+{
+	struct event *event = NULL;
+	struct format_field *f;
+	char fname[PATH_MAX];
+	int not_first, count;
+	FILE *ofp;
+
+	sprintf(fname, "%s.pl", outfile);
+	ofp = fopen(fname, "w");
+	if (ofp == NULL) {
+		fprintf(stderr, "couldn't open %s\n", fname);
+		return -1;
+	}
+
+	fprintf(ofp, "# perf trace event handlers, "
+		"generated by perf trace -g perl\n");
+
+	fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+		" License version 2\n\n");
+
+	fprintf(ofp, "# The common_* event handler fields are the most useful "
+		"fields common to\n");
+
+	fprintf(ofp, "# all events.  They don't necessarily correspond to "
+		"the 'common_*' fields\n");
+
+	fprintf(ofp, "# in the format files.  Those fields not available as "
+		"handler params can\n");
+
+	fprintf(ofp, "# be retrieved using Perl functions of the form "
+		"common_*($context).\n");
+
+	fprintf(ofp, "# See Context.pm for the list of available "
+		"functions.\n\n");
+
+	fprintf(ofp, "use lib \"$ENV{'PERF_EXEC_PATH'}/scripts/perl/"
+		"Perf-Trace-Util/lib\";\n");
+
+	fprintf(ofp, "use lib \"./Perf-Trace-Util/lib\";\n");
+	fprintf(ofp, "use Perf::Trace::Core;\n");
+	fprintf(ofp, "use Perf::Trace::Context;\n");
+	fprintf(ofp, "use Perf::Trace::Util;\n\n");
+
+	fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
+	fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n");
+
+	while ((event = trace_find_next_event(event))) {
+		fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
+		fprintf(ofp, "\tmy (");
+
+		fprintf(ofp, "$event_name, ");
+		fprintf(ofp, "$context, ");
+		fprintf(ofp, "$common_cpu, ");
+		fprintf(ofp, "$common_secs, ");
+		fprintf(ofp, "$common_nsecs,\n");
+		fprintf(ofp, "\t    $common_pid, ");
+		fprintf(ofp, "$common_comm,\n\t    ");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t    ");
+
+			fprintf(ofp, "$%s", f->name);
+		}
+		fprintf(ofp, ") = @_;\n\n");
+
+		fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+			"$common_secs, $common_nsecs,\n\t             "
+			"$common_pid, $common_comm);\n\n");
+
+		fprintf(ofp, "\tprintf(\"");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (count && count % 4 == 0) {
+				fprintf(ofp, "\".\n\t       \"");
+			}
+			count++;
+
+			fprintf(ofp, "%s=", f->name);
+			if (f->flags & FIELD_IS_STRING ||
+			    f->flags & FIELD_IS_FLAG ||
+			    f->flags & FIELD_IS_SYMBOLIC)
+				fprintf(ofp, "%%s");
+			else if (f->flags & FIELD_IS_SIGNED)
+				fprintf(ofp, "%%d");
+			else
+				fprintf(ofp, "%%u");
+		}
+
+		fprintf(ofp, "\\n\",\n\t       ");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t       ");
+
+			if (f->flags & FIELD_IS_FLAG) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t       ");
+					count = 4;
+				}
+				fprintf(ofp, "flag_str(\"");
+				fprintf(ofp, "%s::%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", $%s)", f->name,
+					f->name);
+			} else if (f->flags & FIELD_IS_SYMBOLIC) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t       ");
+					count = 4;
+				}
+				fprintf(ofp, "symbol_str(\"");
+				fprintf(ofp, "%s::%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", $%s)", f->name,
+					f->name);
+			} else
+				fprintf(ofp, "$%s", f->name);
+		}
+
+		fprintf(ofp, ");\n");
+		fprintf(ofp, "}\n\n");
+	}
+
+	fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
+		"$common_cpu, $common_secs, $common_nsecs,\n\t    "
+		"$common_pid, $common_comm) = @_;\n\n");
+
+	fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+		"$common_secs, $common_nsecs,\n\t             $common_pid, "
+		"$common_comm);\n}\n\n");
+
+	fprintf(ofp, "sub print_header\n{\n"
+		"\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
+		"\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t       "
+		"$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}");
+
+	fclose(ofp);
+
+	fprintf(stderr, "generated Perl script: %s\n", fname);
+
+	return 0;
+}
+
+struct scripting_ops perl_scripting_ops = {
+	.name = "Perl",
+	.start_script = perl_start_script,
+	.stop_script = perl_stop_script,
+	.process_event = perl_process_event,
+	.generate_script = perl_generate_script,
+};
+
+#ifdef NO_LIBPERL
+void setup_perl_scripting(void)
+{
+	fprintf(stderr, "Perl scripting not supported."
+		"  Install libperl and rebuild perf to enable it.  e.g. "
+		"apt-get install libperl-dev (ubuntu), yum install "
+		"perl-ExtUtils-Embed (Fedora), etc.\n");
+}
+#else
+void setup_perl_scripting(void)
+{
+	int err;
+	err = script_spec_register("Perl", &perl_scripting_ops);
+	if (err)
+		die("error registering Perl script extension");
+
+	err = script_spec_register("pl", &perl_scripting_ops);
+	if (err)
+		die("error registering pl script extension");
+
+	scripting_context = malloc(sizeof(struct scripting_context));
+}
+#endif
diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h
new file mode 100644
index 0000000..8fe0d86
--- /dev/null
+++ b/tools/perf/util/trace-event-perl.h
@@ -0,0 +1,51 @@
+#ifndef __PERF_TRACE_EVENT_PERL_H
+#define __PERF_TRACE_EVENT_PERL_H
+#ifdef NO_LIBPERL
+typedef int INTERP;
+#define dSP
+#define ENTER
+#define SAVETMPS
+#define PUTBACK
+#define SPAGAIN
+#define FREETMPS
+#define LEAVE
+#define SP
+#define ERRSV
+#define G_SCALAR		(0)
+#define G_DISCARD		(0)
+#define G_NOARGS		(0)
+#define PUSHMARK(a)
+#define SvTRUE(a)		(0)
+#define XPUSHs(s)
+#define sv_2mortal(a)
+#define newSVpv(a,b)
+#define newSVuv(a)
+#define newSViv(a)
+#define get_cv(a,b)		(0)
+#define call_pv(a,b)		(0)
+#define perl_alloc()		(0)
+#define perl_construct(a)	(0)
+#define perl_parse(a,b,c,d,e)	(0)
+#define perl_run(a)		(0)
+#define perl_destruct(a)	(0)
+#define perl_free(a)		(0)
+#define pTHX			void
+#define CV			void
+#define dXSUB_SYS
+#define pTHX_
+static inline void newXS(const char *a, void *b, const char *c) {}
+#else
+#include <EXTERN.h>
+#include <perl.h>
+typedef PerlInterpreter * INTERP;
+#endif
+
+struct scripting_context {
+	void *event_data;
+};
+
+int common_pc(struct scripting_context *context);
+int common_flags(struct scripting_context *context);
+int common_lock_depth(struct scripting_context *context);
+
+#endif /* __PERF_TRACE_EVENT_PERL_H */
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 1b5c847..342dfdd 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -458,9 +458,8 @@
 	return data;
 }
 
-void trace_report(void)
+void trace_report(int fd)
 {
-	const char *input_file = "trace.info";
 	char buf[BUFSIZ];
 	char test[] = { 23, 8, 68 };
 	char *version;
@@ -468,17 +467,15 @@
 	int show_funcs = 0;
 	int show_printk = 0;
 
-	input_fd = open(input_file, O_RDONLY);
-	if (input_fd < 0)
-		die("opening '%s'\n", input_file);
+	input_fd = fd;
 
 	read_or_die(buf, 3);
 	if (memcmp(buf, test, 3) != 0)
-		die("not an trace data file");
+		die("no trace data in the file");
 
 	read_or_die(buf, 7);
 	if (memcmp(buf, "tracing", 7) != 0)
-		die("not a trace file (missing tracing)");
+		die("not a trace file (missing 'tracing' tag)");
 
 	version = read_string();
 	if (show_version)
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 693f815..81698d5 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -1,5 +1,5 @@
-#ifndef _TRACE_EVENTS_H
-#define _TRACE_EVENTS_H
+#ifndef __PERF_TRACE_EVENTS_H
+#define __PERF_TRACE_EVENTS_H
 
 #include "parse-events.h"
 
@@ -26,6 +26,11 @@
 enum format_flags {
 	FIELD_IS_ARRAY		= 1,
 	FIELD_IS_POINTER	= 2,
+	FIELD_IS_SIGNED		= 4,
+	FIELD_IS_STRING		= 8,
+	FIELD_IS_DYNAMIC	= 16,
+	FIELD_IS_FLAG		= 32,
+	FIELD_IS_SYMBOLIC	= 64,
 };
 
 struct format_field {
@@ -132,15 +137,18 @@
 	int			flags;
 	struct format		format;
 	struct print_fmt	print_fmt;
+	char			*system;
 };
 
 enum {
-	EVENT_FL_ISFTRACE	= 1,
-	EVENT_FL_ISPRINT	= 2,
-	EVENT_FL_ISBPRINT	= 4,
-	EVENT_FL_ISFUNC		= 8,
-	EVENT_FL_ISFUNCENT	= 16,
-	EVENT_FL_ISFUNCRET	= 32,
+	EVENT_FL_ISFTRACE	= 0x01,
+	EVENT_FL_ISPRINT	= 0x02,
+	EVENT_FL_ISBPRINT	= 0x04,
+	EVENT_FL_ISFUNC		= 0x08,
+	EVENT_FL_ISFUNCENT	= 0x10,
+	EVENT_FL_ISFUNCRET	= 0x20,
+
+	EVENT_FL_FAILED		= 0x80000000
 };
 
 struct record {
@@ -154,7 +162,7 @@
 
 void parse_set_info(int nr_cpus, int long_sz);
 
-void trace_report(void);
+void trace_report(int fd);
 
 void *malloc_or_die(unsigned int size);
 
@@ -166,7 +174,7 @@
 void print_printk(void);
 
 int parse_ftrace_file(char *buf, unsigned long size);
-int parse_event_file(char *buf, unsigned long size, char *system);
+int parse_event_file(char *buf, unsigned long size, char *sys);
 void print_event(int cpu, void *data, int size, unsigned long long nsecs,
 		  char *comm);
 
@@ -233,13 +241,45 @@
 extern int header_page_data_offset;
 extern int header_page_data_size;
 
+extern int latency_format;
+
 int parse_header_page(char *buf, unsigned long size);
 int trace_parse_common_type(void *data);
+int trace_parse_common_pid(void *data);
+int parse_common_pc(void *data);
+int parse_common_flags(void *data);
+int parse_common_lock_depth(void *data);
 struct event *trace_find_event(int id);
+struct event *trace_find_next_event(struct event *event);
+unsigned long long read_size(void *ptr, int size);
 unsigned long long
 raw_field_value(struct event *event, const char *name, void *data);
 void *raw_field_ptr(struct event *event, const char *name, void *data);
+unsigned long long eval_flag(const char *flag);
 
-void read_tracing_data(struct perf_event_attr *pattrs, int nb_events);
+int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events);
 
-#endif /* _TRACE_EVENTS_H */
+/* taken from kernel/trace/trace.h */
+enum trace_flag_type {
+	TRACE_FLAG_IRQS_OFF		= 0x01,
+	TRACE_FLAG_IRQS_NOSUPPORT	= 0x02,
+	TRACE_FLAG_NEED_RESCHED		= 0x04,
+	TRACE_FLAG_HARDIRQ		= 0x08,
+	TRACE_FLAG_SOFTIRQ		= 0x10,
+};
+
+struct scripting_ops {
+	const char *name;
+	int (*start_script) (const char *);
+	int (*stop_script) (void);
+	void (*process_event) (int cpu, void *data, int size,
+			       unsigned long long nsecs, char *comm);
+	int (*generate_script) (const char *outfile);
+};
+
+int script_spec_register(const char *spec, struct scripting_ops *ops);
+
+extern struct scripting_ops perl_scripting_ops;
+void setup_perl_scripting(void);
+
+#endif /* __PERF_TRACE_EVENTS_H */
diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h
index 5e75f90..7d6b833 100644
--- a/tools/perf/util/types.h
+++ b/tools/perf/util/types.h
@@ -1,5 +1,5 @@
-#ifndef _PERF_TYPES_H
-#define _PERF_TYPES_H
+#ifndef __PERF_TYPES_H
+#define __PERF_TYPES_H
 
 /*
  * We define u64 as unsigned long long for every architecture
@@ -14,4 +14,4 @@
 typedef unsigned char	   u8;
 typedef signed char	   s8;
 
-#endif /* _PERF_TYPES_H */
+#endif /* __PERF_TYPES_H */
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9de2329..c673d88 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -84,6 +84,9 @@
 #include <iconv.h>
 #endif
 
+extern const char *graph_line;
+extern const char *graph_dotted_line;
+
 /* On most systems <limits.h> would have given us this, but
  * not on some systems (e.g. GNU/Hurd).
  */
@@ -134,6 +137,15 @@
 extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
 extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
 
+#include "../../../include/linux/stringify.h"
+
+#define DIE_IF(cnd)	\
+	do { if (cnd)	\
+		die(" at (" __FILE__ ":" __stringify(__LINE__) "): "	\
+		    __stringify(cnd) "\n");				\
+	} while (0)
+
+
 extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
 
 extern int prefixcmp(const char *str, const char *prefix);
@@ -278,17 +290,15 @@
  * Wrappers:
  */
 extern char *xstrdup(const char *str);
-extern void *xmalloc(size_t size);
+extern void *xmalloc(size_t size) __attribute__((weak));
 extern void *xmemdupz(const void *data, size_t len);
 extern char *xstrndup(const char *str, size_t len);
-extern void *xrealloc(void *ptr, size_t size);
-extern void *xcalloc(size_t nmemb, size_t size);
-extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
-extern ssize_t xread(int fd, void *buf, size_t len);
-extern ssize_t xwrite(int fd, const void *buf, size_t len);
-extern int xdup(int fd);
-extern FILE *xfdopen(int fd, const char *mode);
-extern int xmkstemp(char *template);
+extern void *xrealloc(void *ptr, size_t size) __attribute__((weak));
+
+static inline void *zalloc(size_t size)
+{
+	return calloc(1, size);
+}
 
 static inline size_t xsize_t(off_t len)
 {
@@ -306,6 +316,7 @@
 #undef isascii
 #undef isspace
 #undef isdigit
+#undef isxdigit
 #undef isalpha
 #undef isprint
 #undef isalnum
@@ -323,6 +334,8 @@
 #define isascii(x) (((x) & ~0x7f) == 0)
 #define isspace(x) sane_istest(x,GIT_SPACE)
 #define isdigit(x) sane_istest(x,GIT_DIGIT)
+#define isxdigit(x)	\
+	(sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G')
 #define isalpha(x) sane_istest(x,GIT_ALPHA)
 #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
 #define isprint(x) sane_istest(x,GIT_PRINT)
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h
index cadf8cf..2fa967e 100644
--- a/tools/perf/util/values.h
+++ b/tools/perf/util/values.h
@@ -1,5 +1,5 @@
-#ifndef _PERF_VALUES_H
-#define _PERF_VALUES_H
+#ifndef __PERF_VALUES_H
+#define __PERF_VALUES_H
 
 #include "types.h"
 
@@ -24,4 +24,4 @@
 void perf_read_values_display(FILE *fp, struct perf_read_values *values,
 			      int raw);
 
-#endif /* _PERF_VALUES_H */
+#endif /* __PERF_VALUES_H */
diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
index 4574ac2..bf44ca8 100644
--- a/tools/perf/util/wrapper.c
+++ b/tools/perf/util/wrapper.c
@@ -79,43 +79,12 @@
 	return ret;
 }
 
-void *xcalloc(size_t nmemb, size_t size)
-{
-	void *ret = calloc(nmemb, size);
-	if (!ret && (!nmemb || !size))
-		ret = calloc(1, 1);
-	if (!ret) {
-		release_pack_memory(nmemb * size, -1);
-		ret = calloc(nmemb, size);
-		if (!ret && (!nmemb || !size))
-			ret = calloc(1, 1);
-		if (!ret)
-			die("Out of memory, calloc failed");
-	}
-	return ret;
-}
-
-void *xmmap(void *start, size_t length,
-	int prot, int flags, int fd, off_t offset)
-{
-	void *ret = mmap(start, length, prot, flags, fd, offset);
-	if (ret == MAP_FAILED) {
-		if (!length)
-			return NULL;
-		release_pack_memory(length, fd);
-		ret = mmap(start, length, prot, flags, fd, offset);
-		if (ret == MAP_FAILED)
-			die("Out of memory? mmap failed: %s", strerror(errno));
-	}
-	return ret;
-}
-
 /*
  * xread() is the same a read(), but it automatically restarts read()
  * operations with a recoverable error (EAGAIN and EINTR). xread()
  * DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
  */
-ssize_t xread(int fd, void *buf, size_t len)
+static ssize_t xread(int fd, void *buf, size_t len)
 {
 	ssize_t nr;
 	while (1) {
@@ -131,7 +100,7 @@
  * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
  * GUARANTEE that "len" bytes is written even if the operation is successful.
  */
-ssize_t xwrite(int fd, const void *buf, size_t len)
+static ssize_t xwrite(int fd, const void *buf, size_t len)
 {
 	ssize_t nr;
 	while (1) {
@@ -179,29 +148,3 @@
 
 	return total;
 }
-
-int xdup(int fd)
-{
-	int ret = dup(fd);
-	if (ret < 0)
-		die("dup failed: %s", strerror(errno));
-	return ret;
-}
-
-FILE *xfdopen(int fd, const char *mode)
-{
-	FILE *stream = fdopen(fd, mode);
-	if (stream == NULL)
-		die("Out of memory? fdopen failed: %s", strerror(errno));
-	return stream;
-}
-
-int xmkstemp(char *template)
-{
-	int fd;
-
-	fd = mkstemp(template);
-	if (fd < 0)
-		die("Unable to create temporary file: %s", strerror(errno));
-	return fd;
-}