Merge tag 'perf-core-for-mingo-2' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - List perf probes to stdout. (Masami Hiramatsu)

  - Return error when none of the requested probes were
    installed. (Masami Hiramatsu)

  - Cut off the gcc optimization postfixes from
    function name in 'perf probe'. (Masami Hiramatsu)

  - Allow disabling/enabling events dynamicly in 'perf top':
    a 'perf top' session can instantly become a 'perf report'
    one, i.e. going from dynamic analysis to a static one,
    returning to a dynamic one is possible, to toogle the
    modes, just press CTRL+z. (Arnaldo Carvalho de Melo)

  - Greatly speed up 'perf probe --list' by caching debuginfo.
    (Masami Hiramatsu)

  - Fix 'perf trace' race condition at the end of started
    workloads. (Sukadev Bhattiprolu)

  - Fix a problem when opening old perf.data with different
    byte order. (Wang Nan)

Infrastructure changes:

  - Replace map->referenced & maps->removed_maps with
    map->refcnt. (Arnaldo Carvalho de Melo)

  - Introduce the xyarray__reset() function. (Jiri Olsa)

  - Add thread_map__(alloc|realloc)() helpers. (Jiri Olsa)

  - Move perf_evsel__(alloc|free|reset)_counts into stat object. (Jiri Olsa)

  - Introduce perf_counts__(new|delete|reset)() functions. (Jiri Olsa)

  - Ignore .config-detected in .gitignore. (Wang Nan)

  - Move libtraceevent dynamic list to separated LDFLAGS
    variable. (Wang Nan)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 812f904..09db62b 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -28,3 +28,4 @@
 *-flex.*
 *.pyc
 *.pyo
+.config-detected
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index b1dfcd8..1af0cfe 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -174,7 +174,7 @@
 export LIBTRACEEVENT
 
 LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
-LDFLAGS += -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
+LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
 
 LIBAPI = $(LIB_PATH)libapi.a
 export LIBAPI
@@ -190,8 +190,9 @@
 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
 
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
-	$(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
+	$(QUIET_GEN)CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
+	  $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
 	mkdir -p $(OUTPUT)python && \
 	cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
@@ -282,7 +283,8 @@
 	$(Q)$(MAKE) $(build)=perf
 
 $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
+		$(PERF_IN) $(LIBS) -o $@
 
 $(GTK_IN): FORCE
 	$(Q)$(MAKE) $(build)=gtk
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b24ecee..fcf99bd 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -178,24 +178,19 @@
 
 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
 {
-	void *addr;
-	size_t sz;
+	struct perf_counts *counts;
 
-	sz = sizeof(*evsel->counts) +
-	     (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
+	counts = perf_counts__new(perf_evsel__nr_cpus(evsel));
+	if (counts)
+		evsel->prev_raw_counts = counts;
 
-	addr = zalloc(sz);
-	if (!addr)
-		return -ENOMEM;
-
-	evsel->prev_raw_counts =  addr;
-
-	return 0;
+	return counts ? 0 : -ENOMEM;
 }
 
 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 {
-	zfree(&evsel->prev_raw_counts);
+	perf_counts__delete(evsel->prev_raw_counts);
+	evsel->prev_raw_counts = NULL;
 }
 
 static void perf_evlist__free_stats(struct perf_evlist *evlist)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6b98742..72d8a7a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -235,10 +235,13 @@
 
 	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel,
 				       0, top->sym_pcnt_filter, top->print_entries, 4);
-	if (top->zero)
-		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
-	else
-		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+
+	if (top->evlist->enabled) {
+		if (top->zero)
+			symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
+		else
+			symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+	}
 	if (more != 0)
 		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
 out_unlock:
@@ -276,11 +279,13 @@
 		return;
 	}
 
-	if (top->zero) {
-		hists__delete_entries(hists);
-	} else {
-		hists__decay_entries(hists, top->hide_user_symbols,
-				     top->hide_kernel_symbols);
+	if (top->evlist->enabled) {
+		if (top->zero) {
+			hists__delete_entries(hists);
+		} else {
+			hists__decay_entries(hists, top->hide_user_symbols,
+					     top->hide_kernel_symbols);
+		}
 	}
 
 	hists__collapse_resort(hists, NULL);
@@ -545,11 +550,13 @@
 
 	hists = evsel__hists(t->sym_evsel);
 
-	if (t->zero) {
-		hists__delete_entries(hists);
-	} else {
-		hists__decay_entries(hists, t->hide_user_symbols,
-				     t->hide_kernel_symbols);
+	if (t->evlist->enabled) {
+		if (t->zero) {
+			hists__delete_entries(hists);
+		} else {
+			hists__decay_entries(hists, t->hide_user_symbols,
+					     t->hide_kernel_symbols);
+		}
 	}
 
 	hists__collapse_resort(hists, NULL);
@@ -579,8 +586,21 @@
 		hists->uid_filter_str = top->record_opts.target.uid_str;
 	}
 
-	perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
-				      &top->session->header.env);
+	while (true)  {
+		int key = perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+							top->min_percent,
+							&top->session->header.env);
+
+		if (key != CTRL('z'))
+			break;
+
+		perf_evlist__toggle_enable(top->evlist);
+		/*
+		 * No need to refresh, resort/decay histogram entries
+		 * if we are not collecting samples:
+		 */
+		hbt.refresh = top->evlist->enabled ? top->delay_secs : 0;
+	}
 
 	done = 1;
 	return NULL;
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index e34dfdf..9a7a116 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -3,6 +3,7 @@
 #include "thread_map.h"
 #include "cpumap.h"
 #include "debug.h"
+#include "stat.h"
 
 int test__openat_syscall_event_on_all_cpus(void)
 {
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e64893f..8f7c4d4 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1736,6 +1736,7 @@
 	"t             Zoom into current Thread\n"
 	"V             Verbose (DSO names in callchains, etc)\n"
 	"z             Toggle zeroing of samples\n"
+	"CTRL+z        Enable/Disable events\n"
 	"/             Filter symbol by name";
 
 	if (browser == NULL)
@@ -1900,6 +1901,7 @@
 			/* Fall thru */
 		case 'q':
 		case CTRL('c'):
+		case CTRL('z'):
 			goto out_free_stack;
 		default:
 			continue;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index dc1dc2c..8366511 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -297,6 +297,8 @@
 				      PERF_EVENT_IOC_DISABLE, 0);
 		}
 	}
+
+	evlist->enabled = false;
 }
 
 void perf_evlist__enable(struct perf_evlist *evlist)
@@ -316,6 +318,13 @@
 				      PERF_EVENT_IOC_ENABLE, 0);
 		}
 	}
+
+	evlist->enabled = true;
+}
+
+void perf_evlist__toggle_enable(struct perf_evlist *evlist)
+{
+	(evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
 }
 
 int perf_evlist__disable_event(struct perf_evlist *evlist,
@@ -634,11 +643,18 @@
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
 	struct perf_mmap *md = &evlist->mmap[idx];
-	u64 head = perf_mmap__read_head(md);
+	u64 head;
 	u64 old = md->prev;
 	unsigned char *data = md->base + page_size;
 	union perf_event *event = NULL;
 
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!atomic_read(&md->refcnt))
+		return NULL;
+
+	head = perf_mmap__read_head(md);
 	if (evlist->overwrite) {
 		/*
 		 * If we're further behind than half the buffer, there's a chance
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 955bf31..a8489b9 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -41,6 +41,7 @@
 	int		 nr_groups;
 	int		 nr_mmaps;
 	bool		 overwrite;
+	bool		 enabled;
 	size_t		 mmap_len;
 	int		 id_pos;
 	int		 is_pos;
@@ -139,6 +140,7 @@
 
 void perf_evlist__disable(struct perf_evlist *evlist);
 void perf_evlist__enable(struct perf_evlist *evlist);
+void perf_evlist__toggle_enable(struct perf_evlist *evlist);
 
 int perf_evlist__disable_event(struct perf_evlist *evlist,
 			       struct perf_evsel *evsel);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d4f9994..33449de 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -26,6 +26,7 @@
 #include "perf_regs.h"
 #include "debug.h"
 #include "trace-event.h"
+#include "stat.h"
 
 static struct {
 	bool sample_id_all;
@@ -851,19 +852,6 @@
 	return 0;
 }
 
-void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
-{
-	memset(evsel->counts, 0, (sizeof(*evsel->counts) +
-				 (ncpus * sizeof(struct perf_counts_values))));
-}
-
-int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
-{
-	evsel->counts = zalloc((sizeof(*evsel->counts) +
-				(ncpus * sizeof(struct perf_counts_values))));
-	return evsel->counts != NULL ? 0 : -ENOMEM;
-}
-
 static void perf_evsel__free_fd(struct perf_evsel *evsel)
 {
 	xyarray__delete(evsel->fd);
@@ -891,11 +879,6 @@
 		}
 }
 
-void perf_evsel__free_counts(struct perf_evsel *evsel)
-{
-	zfree(&evsel->counts);
-}
-
 void perf_evsel__exit(struct perf_evsel *evsel)
 {
 	assert(list_empty(&evsel->node));
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 21ec082..bb0579e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -170,9 +170,6 @@
 int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
 
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
-int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
-void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
-void perf_evsel__free_counts(struct perf_evsel *evsel);
 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f53d017..6f28d53 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -313,8 +313,7 @@
 				memset(&he->stat, 0, sizeof(he->stat));
 		}
 
-		if (he->ms.map)
-			he->ms.map->referenced = true;
+		map__get(he->ms.map);
 
 		if (he->branch_info) {
 			/*
@@ -324,6 +323,7 @@
 			 */
 			he->branch_info = malloc(sizeof(*he->branch_info));
 			if (he->branch_info == NULL) {
+				map__zput(he->ms.map);
 				free(he->stat_acc);
 				free(he);
 				return NULL;
@@ -332,17 +332,13 @@
 			memcpy(he->branch_info, template->branch_info,
 			       sizeof(*he->branch_info));
 
-			if (he->branch_info->from.map)
-				he->branch_info->from.map->referenced = true;
-			if (he->branch_info->to.map)
-				he->branch_info->to.map->referenced = true;
+			map__get(he->branch_info->from.map);
+			map__get(he->branch_info->to.map);
 		}
 
 		if (he->mem_info) {
-			if (he->mem_info->iaddr.map)
-				he->mem_info->iaddr.map->referenced = true;
-			if (he->mem_info->daddr.map)
-				he->mem_info->daddr.map->referenced = true;
+			map__get(he->mem_info->iaddr.map);
+			map__get(he->mem_info->daddr.map);
 		}
 
 		if (symbol_conf.use_callchain)
@@ -407,9 +403,8 @@
 			 * the history counter to increment.
 			 */
 			if (he->ms.map != entry->ms.map) {
-				he->ms.map = entry->ms.map;
-				if (he->ms.map)
-					he->ms.map->referenced = true;
+				map__put(he->ms.map);
+				he->ms.map = map__get(entry->ms.map);
 			}
 			goto out;
 		}
@@ -933,8 +928,20 @@
 void hist_entry__delete(struct hist_entry *he)
 {
 	thread__zput(he->thread);
-	zfree(&he->branch_info);
-	zfree(&he->mem_info);
+	map__zput(he->ms.map);
+
+	if (he->branch_info) {
+		map__zput(he->branch_info->from.map);
+		map__zput(he->branch_info->to.map);
+		zfree(&he->branch_info);
+	}
+
+	if (he->mem_info) {
+		map__zput(he->mem_info->iaddr.map);
+		map__zput(he->mem_info->daddr.map);
+		zfree(&he->mem_info);
+	}
+
 	zfree(&he->stat_acc);
 	free_srcline(he->srcline);
 	free_callchain(he->callchain);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 1241ab9..b5a5e9c 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -137,7 +137,6 @@
 	map->unmap_ip = map__unmap_ip;
 	RB_CLEAR_NODE(&map->rb_node);
 	map->groups   = NULL;
-	map->referenced = false;
 	map->erange_warned = false;
 	atomic_set(&map->refcnt, 1);
 }
@@ -439,7 +438,6 @@
 {
 	maps->entries = RB_ROOT;
 	pthread_rwlock_init(&maps->lock, NULL);
-	INIT_LIST_HEAD(&maps->removed_maps);
 }
 
 void map_groups__init(struct map_groups *mg, struct machine *machine)
@@ -466,21 +464,10 @@
 	}
 }
 
-static void __maps__purge_removed_maps(struct maps *maps)
-{
-	struct map *pos, *n;
-
-	list_for_each_entry_safe(pos, n, &maps->removed_maps, node) {
-		list_del_init(&pos->node);
-		map__put(pos);
-	}
-}
-
 static void maps__exit(struct maps *maps)
 {
 	pthread_rwlock_wrlock(&maps->lock);
 	__maps__purge(maps);
-	__maps__purge_removed_maps(maps);
 	pthread_rwlock_unlock(&maps->lock);
 }
 
@@ -499,8 +486,6 @@
 	for (i = 0; i < MAP__NR_TYPES; ++i) {
 		if (maps__first(&mg->maps[i]))
 			return false;
-		if (!list_empty(&mg->maps[i].removed_maps))
-			return false;
 	}
 
 	return true;
@@ -621,7 +606,7 @@
 	return printed += maps__fprintf(&mg->maps[type], fp);
 }
 
-static size_t map_groups__fprintf_maps(struct map_groups *mg, FILE *fp)
+size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
 {
 	size_t printed = 0, i;
 	for (i = 0; i < MAP__NR_TYPES; ++i)
@@ -629,39 +614,6 @@
 	return printed;
 }
 
-static size_t __map_groups__fprintf_removed_maps(struct map_groups *mg,
-						 enum map_type type, FILE *fp)
-{
-	struct map *pos;
-	size_t printed = 0;
-
-	list_for_each_entry(pos, &mg->maps[type].removed_maps, node) {
-		printed += fprintf(fp, "Map:");
-		printed += map__fprintf(pos, fp);
-		if (verbose > 1) {
-			printed += dso__fprintf(pos->dso, type, fp);
-			printed += fprintf(fp, "--\n");
-		}
-	}
-	return printed;
-}
-
-static size_t map_groups__fprintf_removed_maps(struct map_groups *mg,
-					       FILE *fp)
-{
-	size_t printed = 0, i;
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		printed += __map_groups__fprintf_removed_maps(mg, i, fp);
-	return printed;
-}
-
-size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
-{
-	size_t printed = map_groups__fprintf_maps(mg, fp);
-	printed += fprintf(fp, "Removed maps:\n");
-	return printed + map_groups__fprintf_removed_maps(mg, fp);
-}
-
 static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 {
 	struct rb_root *root;
@@ -719,13 +671,7 @@
 				map__fprintf(after, fp);
 		}
 put_map:
-		/*
-		 * If we have references, just move them to a separate list.
-		 */
-		if (pos->referenced)
-			list_add_tail(&pos->node, &maps->removed_maps);
-		else
-			map__put(pos);
+		map__put(pos);
 
 		if (err)
 			goto out;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index b8df09d..d73e687 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -34,7 +34,6 @@
 	u64			start;
 	u64			end;
 	u8 /* enum map_type */	type;
-	bool			referenced;
 	bool			erange_warned;
 	u32			priv;
 	u32			prot;
@@ -63,7 +62,6 @@
 struct maps {
 	struct rb_root	 entries;
 	pthread_rwlock_t lock;
-	struct list_head removed_maps;
 };
 
 struct map_groups {
@@ -161,6 +159,14 @@
 
 void map__put(struct map *map);
 
+static inline void __map__zput(struct map **map)
+{
+	map__put(*map);
+	*map = NULL;
+}
+
+#define map__zput(map) __map__zput(&map)
+
 int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *map, FILE *fp);
 size_t map__fprintf_dsoname(struct map *map, FILE *fp);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d4cf50b..076527b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -246,6 +246,20 @@
 		clear_probe_trace_event(tevs + i);
 }
 
+static bool kprobe_blacklist__listed(unsigned long address);
+static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
+{
+	/* Get the address of _etext for checking non-probable text symbol */
+	if (kernel_get_symbol_address_by_name("_etext", false) < address)
+		pr_warning("%s is out of .text, skip it.\n", symbol);
+	else if (kprobe_blacklist__listed(address))
+		pr_warning("%s is blacklisted function, skip it.\n", symbol);
+	else
+		return false;
+
+	return true;
+}
+
 #ifdef HAVE_DWARF_SUPPORT
 
 static int kernel_get_module_dso(const char *module, struct dso **pdso)
@@ -415,6 +429,41 @@
 	return ret;
 }
 
+/* For caching the last debuginfo */
+static struct debuginfo *debuginfo_cache;
+static char *debuginfo_cache_path;
+
+static struct debuginfo *debuginfo_cache__open(const char *module, bool silent)
+{
+	if ((debuginfo_cache_path && !strcmp(debuginfo_cache_path, module)) ||
+	    (!debuginfo_cache_path && !module && debuginfo_cache))
+		goto out;
+
+	/* Copy module path */
+	free(debuginfo_cache_path);
+	if (module) {
+		debuginfo_cache_path = strdup(module);
+		if (!debuginfo_cache_path) {
+			debuginfo__delete(debuginfo_cache);
+			debuginfo_cache = NULL;
+			goto out;
+		}
+	}
+
+	debuginfo_cache = open_debuginfo(module, silent);
+	if (!debuginfo_cache)
+		zfree(&debuginfo_cache_path);
+out:
+	return debuginfo_cache;
+}
+
+static void debuginfo_cache__exit(void)
+{
+	debuginfo__delete(debuginfo_cache);
+	debuginfo_cache = NULL;
+	zfree(&debuginfo_cache_path);
+}
+
 
 static int get_text_start_address(const char *exec, unsigned long *address)
 {
@@ -476,12 +525,11 @@
 	pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
 		 tp->module ? : "kernel");
 
-	dinfo = open_debuginfo(tp->module, verbose == 0);
-	if (dinfo) {
+	dinfo = debuginfo_cache__open(tp->module, verbose == 0);
+	if (dinfo)
 		ret = debuginfo__find_probe_point(dinfo,
 						 (unsigned long)addr, pp);
-		debuginfo__delete(dinfo);
-	} else
+	else
 		ret = -ENOENT;
 
 	if (ret > 0) {
@@ -559,7 +607,6 @@
 					   bool uprobe)
 {
 	struct ref_reloc_sym *reloc_sym;
-	u64 etext_addr;
 	char *tmp;
 	int i, skipped = 0;
 
@@ -575,31 +622,28 @@
 		pr_warning("Relocated base symbol is not found!\n");
 		return -EINVAL;
 	}
-	/* Get the address of _etext for checking non-probable text symbol */
-	etext_addr = kernel_get_symbol_address_by_name("_etext", false);
 
 	for (i = 0; i < ntevs; i++) {
-		if (tevs[i].point.address && !tevs[i].point.retprobe) {
-			/* If we found a wrong one, mark it by NULL symbol */
-			if (etext_addr < tevs[i].point.address) {
-				pr_warning("%s+%lu is out of .text, skip it.\n",
-				   tevs[i].point.symbol, tevs[i].point.offset);
-				tmp = NULL;
-				skipped++;
-			} else {
-				tmp = strdup(reloc_sym->name);
-				if (!tmp)
-					return -ENOMEM;
-			}
-			/* If we have no realname, use symbol for it */
-			if (!tevs[i].point.realname)
-				tevs[i].point.realname = tevs[i].point.symbol;
-			else
-				free(tevs[i].point.symbol);
-			tevs[i].point.symbol = tmp;
-			tevs[i].point.offset = tevs[i].point.address -
-					       reloc_sym->unrelocated_addr;
+		if (!tevs[i].point.address || tevs[i].point.retprobe)
+			continue;
+		/* If we found a wrong one, mark it by NULL symbol */
+		if (kprobe_warn_out_range(tevs[i].point.symbol,
+					  tevs[i].point.address)) {
+			tmp = NULL;
+			skipped++;
+		} else {
+			tmp = strdup(reloc_sym->name);
+			if (!tmp)
+				return -ENOMEM;
 		}
+		/* If we have no realname, use symbol for it */
+		if (!tevs[i].point.realname)
+			tevs[i].point.realname = tevs[i].point.symbol;
+		else
+			free(tevs[i].point.symbol);
+		tevs[i].point.symbol = tmp;
+		tevs[i].point.offset = tevs[i].point.address -
+				       reloc_sym->unrelocated_addr;
 	}
 	return skipped;
 }
@@ -920,6 +964,10 @@
 
 #else	/* !HAVE_DWARF_SUPPORT */
 
+static void debuginfo_cache__exit(void)
+{
+}
+
 static int
 find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
 				 struct perf_probe_point *pp __maybe_unused,
@@ -2126,9 +2174,31 @@
 	return NULL;
 }
 
-/* Show an event */
-static int show_perf_probe_event(struct perf_probe_event *pev,
-				 const char *module)
+static LIST_HEAD(kprobe_blacklist);
+
+static void kprobe_blacklist__init(void)
+{
+	if (!list_empty(&kprobe_blacklist))
+		return;
+
+	if (kprobe_blacklist__load(&kprobe_blacklist) < 0)
+		pr_debug("No kprobe blacklist support, ignored\n");
+}
+
+static void kprobe_blacklist__release(void)
+{
+	kprobe_blacklist__delete(&kprobe_blacklist);
+}
+
+static bool kprobe_blacklist__listed(unsigned long address)
+{
+	return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
+}
+
+static int perf_probe_event__sprintf(const char *group, const char *event,
+				     struct perf_probe_event *pev,
+				     const char *module,
+				     struct strbuf *result)
 {
 	int i, ret;
 	char buf[128];
@@ -2139,29 +2209,50 @@
 	if (!place)
 		return -EINVAL;
 
-	ret = e_snprintf(buf, 128, "%s:%s", pev->group, pev->event);
+	ret = e_snprintf(buf, 128, "%s:%s", group, event);
 	if (ret < 0)
-		return ret;
+		goto out;
 
-	pr_info("  %-20s (on %s", buf, place);
+	strbuf_addf(result, "  %-20s (on %s", buf, place);
 	if (module)
-		pr_info(" in %s", module);
+		strbuf_addf(result, " in %s", module);
 
 	if (pev->nargs > 0) {
-		pr_info(" with");
+		strbuf_addstr(result, " with");
 		for (i = 0; i < pev->nargs; i++) {
 			ret = synthesize_perf_probe_arg(&pev->args[i],
 							buf, 128);
 			if (ret < 0)
-				break;
-			pr_info(" %s", buf);
+				goto out;
+			strbuf_addf(result, " %s", buf);
 		}
 	}
-	pr_info(")\n");
+	strbuf_addch(result, ')');
+out:
 	free(place);
 	return ret;
 }
 
+/* Show an event */
+static int show_perf_probe_event(const char *group, const char *event,
+				 struct perf_probe_event *pev,
+				 const char *module, bool use_stdout)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int ret;
+
+	ret = perf_probe_event__sprintf(group, event, pev, module, &buf);
+	if (ret >= 0) {
+		if (use_stdout)
+			printf("%s\n", buf.buf);
+		else
+			pr_info("%s\n", buf.buf);
+	}
+	strbuf_release(&buf);
+
+	return ret;
+}
+
 static bool filter_probe_trace_event(struct probe_trace_event *tev,
 				     struct strfilter *filter)
 {
@@ -2200,9 +2291,11 @@
 				goto next;
 			ret = convert_to_perf_probe_event(&tev, &pev,
 								is_kprobe);
-			if (ret >= 0)
-				ret = show_perf_probe_event(&pev,
-							    tev.point.module);
+			if (ret < 0)
+				goto next;
+			ret = show_perf_probe_event(pev.group, pev.event,
+						    &pev, tev.point.module,
+						    true);
 		}
 next:
 		clear_perf_probe_event(&pev);
@@ -2211,6 +2304,8 @@
 			break;
 	}
 	strlist__delete(rawlist);
+	/* Cleanup cached debuginfo if needed */
+	debuginfo_cache__exit();
 
 	return ret;
 }
@@ -2316,6 +2411,7 @@
 			      struct strlist *namelist, bool allow_suffix)
 {
 	int i, ret;
+	char *p;
 
 	if (*base == '.')
 		base++;
@@ -2326,6 +2422,10 @@
 		pr_debug("snprintf() failed: %d\n", ret);
 		return ret;
 	}
+	/* Cut off the postfixes (e.g. .const, .isra)*/
+	p = strchr(buf, '.');
+	if (p && p != buf)
+		*p = '\0';
 	if (!strlist__has_entry(namelist, buf))
 		return 0;
 
@@ -2381,10 +2481,8 @@
 	int i, fd, ret;
 	struct probe_trace_event *tev = NULL;
 	char buf[64];
-	const char *event, *group;
+	const char *event = NULL, *group = NULL;
 	struct strlist *namelist;
-	LIST_HEAD(blacklist);
-	struct kprobe_blacklist_node *node;
 	bool safename;
 
 	if (pev->uprobes)
@@ -2404,28 +2502,15 @@
 		ret = -ENOMEM;
 		goto close_out;
 	}
-	/* Get kprobe blacklist if exists */
-	if (!pev->uprobes) {
-		ret = kprobe_blacklist__load(&blacklist);
-		if (ret < 0)
-			pr_debug("No kprobe blacklist support, ignored\n");
-	}
 
 	safename = (pev->point.function && !strisglob(pev->point.function));
 	ret = 0;
 	pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
 	for (i = 0; i < ntevs; i++) {
 		tev = &tevs[i];
-		/* Skip if the symbol is out of .text (marked previously) */
+		/* Skip if the symbol is out of .text or blacklisted */
 		if (!tev->point.symbol)
 			continue;
-		/* Ensure that the address is NOT blacklisted */
-		node = kprobe_blacklist__find_by_address(&blacklist,
-							 tev->point.address);
-		if (node) {
-			pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol);
-			continue;
-		}
 
 		if (pev->event)
 			event = pev->event;
@@ -2458,15 +2543,12 @@
 		/* Add added event name to namelist */
 		strlist__add(namelist, event);
 
-		/* Trick here - save current event/group */
-		event = pev->event;
-		group = pev->group;
-		pev->event = tev->event;
-		pev->group = tev->group;
-		show_perf_probe_event(pev, tev->point.module);
-		/* Trick here - restore current event/group */
-		pev->event = (char *)event;
-		pev->group = (char *)group;
+		/* We use tev's name for showing new events */
+		show_perf_probe_event(tev->group, tev->event, pev,
+				      tev->point.module, false);
+		/* Save the last valid name */
+		event = tev->event;
+		group = tev->group;
 
 		/*
 		 * Probes after the first probe which comes from same
@@ -2480,14 +2562,12 @@
 		warn_uprobe_event_compat(tev);
 
 	/* Note that it is possible to skip all events because of blacklist */
-	if (ret >= 0 && tev->event) {
+	if (ret >= 0 && event) {
 		/* Show how to use the event. */
 		pr_info("\nYou can now use it in all perf tools, such as:\n\n");
-		pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
-			 tev->event);
+		pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
 	}
 
-	kprobe_blacklist__delete(&blacklist);
 	strlist__delete(namelist);
 close_out:
 	close(fd);
@@ -2537,7 +2617,7 @@
 	struct perf_probe_point *pp = &pev->point;
 	struct probe_trace_point *tp;
 	int num_matched_functions;
-	int ret, i, j;
+	int ret, i, j, skipped = 0;
 
 	map = get_target_map(pev->target, pev->uprobes);
 	if (!map) {
@@ -2605,7 +2685,12 @@
 		}
 		/* Add one probe point */
 		tp->address = map->unmap_ip(map, sym->start) + pp->offset;
-		if (reloc_sym) {
+		/* If we found a wrong one, mark it by NULL symbol */
+		if (!pev->uprobes &&
+		    kprobe_warn_out_range(sym->name, tp->address)) {
+			tp->symbol = NULL;	/* Skip it */
+			skipped++;
+		} else if (reloc_sym) {
 			tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out);
 			tp->offset = tp->address - reloc_sym->addr;
 		} else {
@@ -2641,6 +2726,10 @@
 		}
 		arch__fix_tev_from_maps(pev, tev, map);
 	}
+	if (ret == skipped) {
+		ret = -ENOENT;
+		goto err_out;
+	}
 
 out:
 	put_target_map(map, pev->uprobes);
@@ -2711,6 +2800,9 @@
 	/* Loop 1: convert all events */
 	for (i = 0; i < npevs; i++) {
 		pkgs[i].pev = &pevs[i];
+		/* Init kprobe blacklist if needed */
+		if (!pkgs[i].pev->uprobes)
+			kprobe_blacklist__init();
 		/* Convert with or without debuginfo */
 		ret  = convert_to_probe_trace_events(pkgs[i].pev,
 						     &pkgs[i].tevs);
@@ -2718,6 +2810,8 @@
 			goto end;
 		pkgs[i].ntevs = ret;
 	}
+	/* This just release blacklist only if allocated */
+	kprobe_blacklist__release();
 
 	/* Loop 2: add all events */
 	for (i = 0; i < npevs; i++) {
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 4d28624..5925fec 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -16,6 +16,7 @@
 util/xyarray.c
 util/cgroup.c
 util/rblist.c
+util/stat.c
 util/strlist.c
 util/trace-event.c
 ../../lib/rbtree.c
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f31e024..e1cd17c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -517,20 +517,42 @@
 {
 	attr->type		= bswap_32(attr->type);
 	attr->size		= bswap_32(attr->size);
-	attr->config		= bswap_64(attr->config);
-	attr->sample_period	= bswap_64(attr->sample_period);
-	attr->sample_type	= bswap_64(attr->sample_type);
-	attr->read_format	= bswap_64(attr->read_format);
-	attr->wakeup_events	= bswap_32(attr->wakeup_events);
-	attr->bp_type		= bswap_32(attr->bp_type);
-	attr->bp_addr		= bswap_64(attr->bp_addr);
-	attr->bp_len		= bswap_64(attr->bp_len);
-	attr->branch_sample_type = bswap_64(attr->branch_sample_type);
-	attr->sample_regs_user	 = bswap_64(attr->sample_regs_user);
-	attr->sample_stack_user  = bswap_32(attr->sample_stack_user);
-	attr->aux_watermark	 = bswap_32(attr->aux_watermark);
 
-	swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));
+#define bswap_safe(f, n) 					\
+	(attr->size > (offsetof(struct perf_event_attr, f) + 	\
+		       sizeof(attr->f) * (n)))
+#define bswap_field(f, sz) 			\
+do { 						\
+	if (bswap_safe(f, 0))			\
+		attr->f = bswap_##sz(attr->f);	\
+} while(0)
+#define bswap_field_32(f) bswap_field(f, 32)
+#define bswap_field_64(f) bswap_field(f, 64)
+
+	bswap_field_64(config);
+	bswap_field_64(sample_period);
+	bswap_field_64(sample_type);
+	bswap_field_64(read_format);
+	bswap_field_32(wakeup_events);
+	bswap_field_32(bp_type);
+	bswap_field_64(bp_addr);
+	bswap_field_64(bp_len);
+	bswap_field_64(branch_sample_type);
+	bswap_field_64(sample_regs_user);
+	bswap_field_32(sample_stack_user);
+	bswap_field_32(aux_watermark);
+
+	/*
+	 * After read_format are bitfields. Check read_format because
+	 * we are unable to use offsetof on bitfield.
+	 */
+	if (bswap_safe(read_format, 1))
+		swap_bitfield((u8 *) (&attr->read_format + 1),
+			      sizeof(u64));
+#undef bswap_field_64
+#undef bswap_field_32
+#undef bswap_field
+#undef bswap_safe
 }
 
 static void perf_event__hdr_attr_swap(union perf_event *event,
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 60b9282..4014b70 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -94,3 +94,39 @@
 		}
 	}
 }
+
+struct perf_counts *perf_counts__new(int ncpus)
+{
+	int size = sizeof(struct perf_counts) +
+		   ncpus * sizeof(struct perf_counts_values);
+
+	return zalloc(size);
+}
+
+void perf_counts__delete(struct perf_counts *counts)
+{
+	free(counts);
+}
+
+static void perf_counts__reset(struct perf_counts *counts, int ncpus)
+{
+	memset(counts, 0, (sizeof(*counts) +
+	       (ncpus * sizeof(struct perf_counts_values))));
+}
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
+{
+	perf_counts__reset(evsel->counts, ncpus);
+}
+
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
+{
+	evsel->counts = perf_counts__new(ncpus);
+	return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evsel__free_counts(struct perf_evsel *evsel)
+{
+	perf_counts__delete(evsel->counts);
+	evsel->counts = NULL;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 615c779..093dc3c 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -62,4 +62,10 @@
 void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
 				   double avg, int cpu, enum aggr_mode aggr);
 
+struct perf_counts *perf_counts__new(int ncpus);
+void perf_counts__delete(struct perf_counts *counts);
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
+void perf_evsel__free_counts(struct perf_evsel *evsel);
 #endif
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index f93b973..f4822bd 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -20,6 +20,15 @@
 		return 1;
 }
 
+static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
+{
+	size_t size = sizeof(*map) + sizeof(pid_t) * nr;
+
+	return realloc(map, size);
+}
+
+#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr)
+
 struct thread_map *thread_map__new_by_pid(pid_t pid)
 {
 	struct thread_map *threads;
@@ -33,7 +42,7 @@
 	if (items <= 0)
 		return NULL;
 
-	threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+	threads = thread_map__alloc(items);
 	if (threads != NULL) {
 		for (i = 0; i < items; i++)
 			threads->map[i] = atoi(namelist[i]->d_name);
@@ -49,7 +58,7 @@
 
 struct thread_map *thread_map__new_by_tid(pid_t tid)
 {
-	struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+	struct thread_map *threads = thread_map__alloc(1);
 
 	if (threads != NULL) {
 		threads->map[0] = tid;
@@ -65,8 +74,8 @@
 	int max_threads = 32, items, i;
 	char path[256];
 	struct dirent dirent, *next, **namelist = NULL;
-	struct thread_map *threads = malloc(sizeof(*threads) +
-					    max_threads * sizeof(pid_t));
+	struct thread_map *threads = thread_map__alloc(max_threads);
+
 	if (threads == NULL)
 		goto out;
 
@@ -185,8 +194,7 @@
 			goto out_free_threads;
 
 		total_tasks += items;
-		nt = realloc(threads, (sizeof(*threads) +
-				       sizeof(pid_t) * total_tasks));
+		nt = thread_map__realloc(threads, total_tasks);
 		if (nt == NULL)
 			goto out_free_namelist;
 
@@ -216,7 +224,7 @@
 
 struct thread_map *thread_map__new_dummy(void)
 {
-	struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+	struct thread_map *threads = thread_map__alloc(1);
 
 	if (threads != NULL) {
 		threads->map[0]	= -1;
@@ -253,7 +261,7 @@
 			continue;
 
 		ntasks++;
-		nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
+		nt = thread_map__realloc(threads, ntasks);
 
 		if (nt == NULL)
 			goto out_free_threads;
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index f079b63..4c00507 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -360,7 +360,7 @@
 		unw_word_t base = is_exec ? 0 : map->start;
 
 		if (fd >= 0)
-			dso__data_put_fd(dso);
+			dso__data_put_fd(map->dso);
 
 		memset(&di, 0, sizeof(di));
 		if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name,
diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
index 22afbf6..c10ba41 100644
--- a/tools/perf/util/xyarray.c
+++ b/tools/perf/util/xyarray.c
@@ -9,11 +9,19 @@
 	if (xy != NULL) {
 		xy->entry_size = entry_size;
 		xy->row_size   = row_size;
+		xy->entries    = xlen * ylen;
 	}
 
 	return xy;
 }
 
+void xyarray__reset(struct xyarray *xy)
+{
+	size_t n = xy->entries * xy->entry_size;
+
+	memset(xy->contents, 0, n);
+}
+
 void xyarray__delete(struct xyarray *xy)
 {
 	free(xy);
diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h
index c488a07..7f30af3 100644
--- a/tools/perf/util/xyarray.h
+++ b/tools/perf/util/xyarray.h
@@ -6,11 +6,13 @@
 struct xyarray {
 	size_t row_size;
 	size_t entry_size;
+	size_t entries;
 	char contents[];
 };
 
 struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
 void xyarray__delete(struct xyarray *xy);
+void xyarray__reset(struct xyarray *xy);
 
 static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
 {