Merge tag 'perf-core-for-mingo-20160803' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Add --sample-cpu to 'perf record', to explicitely ask for sampling
  the CPU (Jiri Olsa)

Fixes:

- Fix processing of multi byte chunks in objdump output, fixing
  disassemble processing for annotation on at least ARM64 (Jan Stancek)

- Use SyS_epoll_wait in a BPF 'perf test' entry instead of sys_epoll_wait, that
  is not present in the DWARF info in vmlinux files (Arnaldo Carvalho de Melo)

- Add -wno-shadow when processing files using perl headers, fixing
  the build on Fedora Rawhide and Arch Linux (Namhyung Kim)

Infrastructure changes:

- Annotate prep work to better catch and report errors related to
  using objdump to disassemble DSOs (Arnaldo Carvalho de Melo)

- Add 'alloc', 'scnprintf' and 'and' methods for bitmap processing (Jiri Olsa)

- Add nested output resorting callback in hists processing (Jiri Olsa)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 28f5493..43c1c50 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -3,6 +3,7 @@
 
 #include <string.h>
 #include <linux/bitops.h>
+#include <stdlib.h>
 
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
@@ -10,6 +11,8 @@
 int __bitmap_weight(const unsigned long *bitmap, int bits);
 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, int bits);
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+		 const unsigned long *bitmap2, unsigned int bits);
 
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
 
@@ -65,4 +68,38 @@
 	return (old & mask) != 0;
 }
 
+/**
+ * bitmap_alloc - Allocate bitmap
+ * @nr: Bit to set
+ */
+static inline unsigned long *bitmap_alloc(int nbits)
+{
+	return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
+}
+
+/*
+ * bitmap_scnprintf - print bitmap list into buffer
+ * @bitmap: bitmap
+ * @nbits: size of bitmap
+ * @buf: buffer to store output
+ * @size: size of @buf
+ */
+size_t bitmap_scnprintf(unsigned long *bitmap, int nbits,
+			char *buf, size_t size);
+
+/**
+ * bitmap_and - Do logical and on bitmaps
+ * @dst: resulting bitmap
+ * @src1: operand 1
+ * @src2: operand 2
+ * @nbits: size of bitmap
+ */
+static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
+			     const unsigned long *src2, unsigned int nbits)
+{
+	if (small_const_nbits(nbits))
+		return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+	return __bitmap_and(dst, src1, src2, nbits);
+}
+
 #endif /* _PERF_BITOPS_H */
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c
index 0a1adc1..38748b0 100644
--- a/tools/lib/bitmap.c
+++ b/tools/lib/bitmap.c
@@ -29,3 +29,47 @@
 	for (k = 0; k < nr; k++)
 		dst[k] = bitmap1[k] | bitmap2[k];
 }
+
+size_t bitmap_scnprintf(unsigned long *bitmap, int nbits,
+			char *buf, size_t size)
+{
+	/* current bit is 'cur', most recently seen range is [rbot, rtop] */
+	int cur, rbot, rtop;
+	bool first = true;
+	size_t ret = 0;
+
+	rbot = cur = find_first_bit(bitmap, nbits);
+	while (cur < nbits) {
+		rtop = cur;
+		cur = find_next_bit(bitmap, nbits, cur + 1);
+		if (cur < nbits && cur <= rtop + 1)
+			continue;
+
+		if (!first)
+			ret += scnprintf(buf + ret, size - ret, ",");
+
+		first = false;
+
+		ret += scnprintf(buf + ret, size - ret, "%d", rbot);
+		if (rbot < rtop)
+			ret += scnprintf(buf + ret, size - ret, "-%d", rtop);
+
+		rbot = cur;
+	}
+	return ret;
+}
+
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+		 const unsigned long *bitmap2, unsigned int bits)
+{
+	unsigned int k;
+	unsigned int lim = bits/BITS_PER_LONG;
+	unsigned long result = 0;
+
+	for (k = 0; k < lim; k++)
+		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+	if (bits % BITS_PER_LONG)
+		result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+			   BITMAP_LAST_WORD_MASK(bits));
+	return result != 0;
+}
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
index 3c60335..9e9f25f 100644
--- a/tools/lib/traceevent/.gitignore
+++ b/tools/lib/traceevent/.gitignore
@@ -1,2 +1,3 @@
 TRACEEVENT-CFLAGS
 libtraceevent-dynamic-list
+libtraceevent.so.*
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 69966ab..379a2be 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -192,6 +192,9 @@
 --period::
 	Record the sample period.
 
+--sample-cpu::
+	Record the sample cpu.
+
 -n::
 --no-samples::
 	Don't sample.
diff --git a/tools/perf/config/Makefile b/tools/perf/Makefile.config
similarity index 100%
rename from tools/perf/config/Makefile
rename to tools/perf/Makefile.config
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 6641abb..2d90875 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -161,7 +161,7 @@
 BPF_DIR		= $(srctree)/tools/lib/bpf/
 SUBCMD_DIR	= $(srctree)/tools/lib/subcmd/
 
-# include config/Makefile by default and rule out
+# include Makefile.config by default and rule out
 # non-config cases
 config := 1
 
@@ -183,7 +183,7 @@
 FEATURE_TESTS := all
 endif
 endif
-include config/Makefile
+include Makefile.config
 endif
 
 ifeq ($(config),0)
@@ -706,7 +706,7 @@
 ### Cleaning rules
 
 #
-# This is here, not in config/Makefile, because config/Makefile does
+# This is here, not in Makefile.config, because Makefile.config does
 # not get included for the clean target:
 #
 config-clean:
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8f2c16d..6355902 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1434,6 +1434,7 @@
 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
 		    "per thread counts"),
 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
+	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
 			&record.opts.sample_time_set,
 			"Record the sample timestamps"),
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index bd10868..418ed94 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -128,10 +128,14 @@
 		return err;
 	}
 
-	err = symbol__annotate(sym, map, 0);
+	err = symbol__disassemble(sym, map, 0);
 	if (err == 0) {
 out_assign:
 		top->sym_filter_entry = he;
+	} else {
+		char msg[BUFSIZ];
+		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
+		pr_err("Couldn't annotate %s: %s\n", sym->name, msg);
 	}
 
 	pthread_mutex_unlock(&notes->lock);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index a7e0f14..cb0f135 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -52,6 +52,7 @@
 	bool	     sample_weight;
 	bool	     sample_time;
 	bool	     sample_time_set;
+	bool	     sample_cpu;
 	bool	     period;
 	bool	     running_time;
 	bool	     full_auxtrace;
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build
index 928e110..34faecf 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Build
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build
@@ -1,3 +1,5 @@
 libperf-y += Context.o
 
-CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default
+CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes
+CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef
+CFLAGS_Context.o += -Wno-switch-default -Wno-shadow
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index cb20ae1..dc51bc5 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -41,6 +41,7 @@
 perf-y += backward-ring-buffer.o
 perf-y += sdt.o
 perf-y += is_printable_array.o
+perf-y += bitmap.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
new file mode 100644
index 0000000..9abe6c1
--- /dev/null
+++ b/tools/perf/tests/bitmap.c
@@ -0,0 +1,53 @@
+#include <linux/compiler.h>
+#include <linux/bitmap.h>
+#include "tests.h"
+#include "cpumap.h"
+#include "debug.h"
+
+#define NBITS 100
+
+static unsigned long *get_bitmap(const char *str, int nbits)
+{
+	struct cpu_map *map = cpu_map__new(str);
+	unsigned long *bm = NULL;
+	int i;
+
+	bm = bitmap_alloc(nbits);
+
+	if (map && bm) {
+		bitmap_zero(bm, nbits);
+
+		for (i = 0; i < map->nr; i++)
+			set_bit(map->map[i], bm);
+	}
+
+	if (map)
+		cpu_map__put(map);
+	return bm;
+}
+
+static int test_bitmap(const char *str)
+{
+	unsigned long *bm = get_bitmap(str, NBITS);
+	char buf[100];
+	int ret;
+
+	bitmap_scnprintf(bm, NBITS, buf, sizeof(buf));
+	pr_debug("bitmap: %s\n", buf);
+
+	ret = !strcmp(buf, str);
+	free(bm);
+	return ret;
+}
+
+int test__bitmap_print(int subtest __maybe_unused)
+{
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,5"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3,5,7,9,11,13,15,17,19,21-40"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("2-5"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3-6,8-10,24,35-37"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3-6,8-10,24,35-37"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1-10,12-20,22-30,32-40"));
+	return 0;
+}
diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c
index e53bc91..268e5f8 100644
--- a/tools/perf/tests/bpf-script-example.c
+++ b/tools/perf/tests/bpf-script-example.c
@@ -31,8 +31,8 @@
 	.max_entries = 1,
 };
 
-SEC("func=sys_epoll_wait")
-int bpf_func__sys_epoll_wait(void *ctx)
+SEC("func=SyS_epoll_wait")
+int bpf_func__SyS_epoll_wait(void *ctx)
 {
 	int ind =0;
 	int *flag = bpf_map_lookup_elem(&flip_table, &ind);
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 10eb306..778668a 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -226,6 +226,10 @@
 		.func = test__is_printable_array,
 	},
 	{
+		.desc = "Test bitmap print",
+		.func = test__bitmap_print,
+	},
+	{
 		.func = NULL,
 	},
 };
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 68a69a1..2af156a8 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -33,44 +33,86 @@
 	return c - 'A' + 10;
 }
 
-static size_t read_objdump_line(const char *line, size_t line_len, void *buf,
-			      size_t len)
+static size_t read_objdump_chunk(const char **line, unsigned char **buf,
+				 size_t *buf_len)
+{
+	size_t bytes_read = 0;
+	unsigned char *chunk_start = *buf;
+
+	/* Read bytes */
+	while (*buf_len > 0) {
+		char c1, c2;
+
+		/* Get 2 hex digits */
+		c1 = *(*line)++;
+		if (!isxdigit(c1))
+			break;
+		c2 = *(*line)++;
+		if (!isxdigit(c2))
+			break;
+
+		/* Store byte and advance buf */
+		**buf = (hex(c1) << 4) | hex(c2);
+		(*buf)++;
+		(*buf_len)--;
+		bytes_read++;
+
+		/* End of chunk? */
+		if (isspace(**line))
+			break;
+	}
+
+	/*
+	 * objdump will display raw insn as LE if code endian
+	 * is LE and bytes_per_chunk > 1. In that case reverse
+	 * the chunk we just read.
+	 *
+	 * see disassemble_bytes() at binutils/objdump.c for details
+	 * how objdump chooses display endian)
+	 */
+	if (bytes_read > 1 && !bigendian()) {
+		unsigned char *chunk_end = chunk_start + bytes_read - 1;
+		unsigned char tmp;
+
+		while (chunk_start < chunk_end) {
+			tmp = *chunk_start;
+			*chunk_start = *chunk_end;
+			*chunk_end = tmp;
+			chunk_start++;
+			chunk_end--;
+		}
+	}
+
+	return bytes_read;
+}
+
+static size_t read_objdump_line(const char *line, unsigned char *buf,
+				size_t buf_len)
 {
 	const char *p;
-	size_t i, j = 0;
+	size_t ret, bytes_read = 0;
 
 	/* Skip to a colon */
 	p = strchr(line, ':');
 	if (!p)
 		return 0;
-	i = p + 1 - line;
+	p++;
 
-	/* Read bytes */
-	while (j < len) {
-		char c1, c2;
-
-		/* Skip spaces */
-		for (; i < line_len; i++) {
-			if (!isspace(line[i]))
-				break;
-		}
-		/* Get 2 hex digits */
-		if (i >= line_len || !isxdigit(line[i]))
+	/* Skip initial spaces */
+	while (*p) {
+		if (!isspace(*p))
 			break;
-		c1 = line[i++];
-		if (i >= line_len || !isxdigit(line[i]))
-			break;
-		c2 = line[i++];
-		/* Followed by a space */
-		if (i < line_len && line[i] && !isspace(line[i]))
-			break;
-		/* Store byte */
-		*(unsigned char *)buf = (hex(c1) << 4) | hex(c2);
-		buf += 1;
-		j++;
+		p++;
 	}
+
+	do {
+		ret = read_objdump_chunk(&p, &buf, &buf_len);
+		bytes_read += ret;
+		p++;
+	} while (ret > 0);
+
 	/* return number of successfully read bytes */
-	return j;
+	return bytes_read;
 }
 
 static int read_objdump_output(FILE *f, void *buf, size_t *len, u64 start_addr)
@@ -95,7 +137,7 @@
 		}
 
 		/* read objdump data into temporary buffer */
-		read_bytes = read_objdump_line(line, ret, tmp, sizeof(tmp));
+		read_bytes = read_objdump_line(line, tmp, sizeof(tmp));
 		if (!read_bytes)
 			continue;
 
@@ -152,7 +194,7 @@
 
 	ret = read_objdump_output(f, buf, &len, addr);
 	if (len) {
-		pr_debug("objdump read too few bytes\n");
+		pr_debug("objdump read too few bytes: %zd\n", len);
 		if (!ret)
 			ret = len;
 	}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 9bfc0e0..7c196c5 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -90,6 +90,7 @@
 int test__cpu_map_print(int subtest);
 int test__sdt_event(int subtest);
 int test__is_printable_array(int subtest);
+int test__bitmap_print(int subtest);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 29dc6d2..2e2d100 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1026,7 +1026,7 @@
 			.use_navkeypressed = true,
 		},
 	};
-	int ret = -1;
+	int ret = -1, err;
 	int nr_pcnt = 1;
 	size_t sizeof_bdl = sizeof(struct browser_disasm_line);
 
@@ -1050,8 +1050,11 @@
 		  (nr_pcnt - 1);
 	}
 
-	if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
-		ui__error("%s", ui_helpline__last_msg);
+	err = symbol__disassemble(sym, map, sizeof_bdl);
+	if (err) {
+		char msg[BUFSIZ];
+		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
+		ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
 		goto out_free_offsets;
 	}
 
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 9c7ff8d..42d3199 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -162,12 +162,16 @@
 	GtkWidget *notebook;
 	GtkWidget *scrolled_window;
 	GtkWidget *tab_label;
+	int err;
 
 	if (map->dso->annotate_warned)
 		return -1;
 
-	if (symbol__annotate(sym, map, 0) < 0) {
-		ui__error("%s", ui_helpline__current);
+	err = symbol__disassemble(sym, map, 0);
+	if (err) {
+		char msg[BUFSIZ];
+		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
+		ui__error("Couldn't annotate %s: %s\n", sym->name, msg);
 		return -1;
 	}
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index e9825fe..4024d30 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1123,7 +1123,46 @@
 	}
 }
 
-int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
+int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map *map,
+			      int errnum, char *buf, size_t buflen)
+{
+	struct dso *dso = map->dso;
+
+	BUG_ON(buflen == 0);
+
+	if (errnum >= 0) {
+		str_error_r(errnum, buf, buflen);
+		return 0;
+	}
+
+	switch (errnum) {
+	case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: {
+		char bf[SBUILD_ID_SIZE + 15] = " with build id ";
+		char *build_id_msg = NULL;
+
+		if (dso->has_build_id) {
+			build_id__sprintf(dso->build_id,
+					  sizeof(dso->build_id), bf + 15);
+			build_id_msg = bf;
+		}
+		scnprintf(buf, buflen,
+			  "No vmlinux file%s\nwas found in the path.\n\n"
+			  "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
+			  "Please use:\n\n"
+			  "  perf buildid-cache -vu vmlinux\n\n"
+			  "or:\n\n"
+			  "  --vmlinux vmlinux\n", build_id_msg ?: "");
+	}
+		break;
+	default:
+		scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
+		break;
+	}
+
+	return 0;
+}
+
+int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
 {
 	struct dso *dso = map->dso;
 	char *filename = dso__build_id_filename(dso, NULL, 0);
@@ -1134,22 +1173,20 @@
 	char symfs_filename[PATH_MAX];
 	struct kcore_extract kce;
 	bool delete_extract = false;
+	int stdout_fd[2];
 	int lineno = 0;
 	int nline;
+	pid_t pid;
 
 	if (filename)
 		symbol__join_symfs(symfs_filename, filename);
 
 	if (filename == NULL) {
-		if (dso->has_build_id) {
-			pr_err("Can't annotate %s: not enough memory\n",
-			       sym->name);
-			return -ENOMEM;
-		}
+		if (dso->has_build_id)
+			return ENOMEM;
 		goto fallback;
-	} else if (dso__is_kcore(dso)) {
-		goto fallback;
-	} else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
+	} else if (dso__is_kcore(dso) ||
+		   readlink(symfs_filename, command, sizeof(command)) < 0 ||
 		   strstr(command, DSO__NAME_KALLSYMS) ||
 		   access(symfs_filename, R_OK)) {
 		free(filename);
@@ -1166,27 +1203,7 @@
 
 	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
 	    !dso__is_kcore(dso)) {
-		char bf[SBUILD_ID_SIZE + 15] = " with build id ";
-		char *build_id_msg = NULL;
-
-		if (dso->annotate_warned)
-			goto out_free_filename;
-
-		if (dso->has_build_id) {
-			build_id__sprintf(dso->build_id,
-					  sizeof(dso->build_id), bf + 15);
-			build_id_msg = bf;
-		}
-		err = -ENOENT;
-		dso->annotate_warned = 1;
-		pr_err("Can't annotate %s:\n\n"
-		       "No vmlinux file%s\nwas found in the path.\n\n"
-		       "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
-		       "Please use:\n\n"
-		       "  perf buildid-cache -vu vmlinux\n\n"
-		       "or:\n\n"
-		       "  --vmlinux vmlinux\n",
-		       sym->name, build_id_msg ?: "");
+		err = SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
 		goto out_free_filename;
 	}
 
@@ -1258,9 +1275,32 @@
 
 	pr_debug("Executing: %s\n", command);
 
-	file = popen(command, "r");
+	err = -1;
+	if (pipe(stdout_fd) < 0) {
+		pr_err("Failure creating the pipe to run %s\n", command);
+		goto out_remove_tmp;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		pr_err("Failure forking to run %s\n", command);
+		goto out_close_stdout;
+	}
+
+	if (pid == 0) {
+		close(stdout_fd[0]);
+		dup2(stdout_fd[1], 1);
+		close(stdout_fd[1]);
+		execl("/bin/sh", "sh", "-c", command, NULL);
+		perror(command);
+		exit(-1);
+	}
+
+	close(stdout_fd[1]);
+
+	file = fdopen(stdout_fd[0], "r");
 	if (!file) {
-		pr_err("Failure running %s\n", command);
+		pr_err("Failure creating FILE stream for %s\n", command);
 		/*
 		 * If we were using debug info should retry with
 		 * original binary.
@@ -1286,9 +1326,11 @@
 	if (dso__is_kcore(dso))
 		delete_last_nop(sym);
 
-	pclose(file);
-
+	fclose(file);
+	err = 0;
 out_remove_tmp:
+	close(stdout_fd[0]);
+
 	if (dso__needs_decompress(dso))
 		unlink(symfs_filename);
 out_free_filename:
@@ -1297,6 +1339,10 @@
 	if (free_filename)
 		free(filename);
 	return err;
+
+out_close_stdout:
+	close(stdout_fd[1]);
+	goto out_remove_tmp;
 }
 
 static void insert_source_line(struct rb_root *root, struct source_line *src_line)
@@ -1663,7 +1709,7 @@
 	struct rb_root source_line = RB_ROOT;
 	u64 len;
 
-	if (symbol__annotate(sym, map, 0) < 0)
+	if (symbol__disassemble(sym, map, 0) < 0)
 		return -1;
 
 	len = symbol__size(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index a23084f..f67ccb0 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -155,7 +155,27 @@
 int symbol__alloc_hist(struct symbol *sym);
 void symbol__annotate_zero_histograms(struct symbol *sym);
 
-int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
+int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize);
+
+enum symbol_disassemble_errno {
+	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__SYMBOL_ANNOTATE_ERRNO__START		= -10000,
+
+	SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX	= __SYMBOL_ANNOTATE_ERRNO__START,
+
+	__SYMBOL_ANNOTATE_ERRNO__END,
+};
+
+int symbol__strerror_disassemble(struct symbol *sym, struct map *map,
+				 int errnum, char *buf, size_t buflen);
 
 int symbol__annotate_init(struct map *map, struct symbol *sym);
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 2a40b8e..097b3ed 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -239,31 +239,13 @@
 
 int perf_evlist__add_default(struct perf_evlist *evlist)
 {
-	struct perf_event_attr attr = {
-		.type = PERF_TYPE_HARDWARE,
-		.config = PERF_COUNT_HW_CPU_CYCLES,
-	};
-	struct perf_evsel *evsel;
+	struct perf_evsel *evsel = perf_evsel__new_cycles();
 
-	event_attr_init(&attr);
-
-	perf_event_attr__set_max_precise_ip(&attr);
-
-	evsel = perf_evsel__new(&attr);
 	if (evsel == NULL)
-		goto error;
-
-	/* use asprintf() because free(evsel) assumes name is allocated */
-	if (asprintf(&evsel->name, "cycles%.*s",
-		     attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0)
-		goto error_free;
+		return -ENOMEM;
 
 	perf_evlist__add(evlist, evsel);
 	return 0;
-error_free:
-	perf_evsel__delete(evsel);
-error:
-	return -ENOMEM;
 }
 
 int perf_evlist__add_dummy(struct perf_evlist *evlist)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8c54df6..d9b80ef 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -253,6 +253,34 @@
 	return evsel;
 }
 
+struct perf_evsel *perf_evsel__new_cycles(void)
+{
+	struct perf_event_attr attr = {
+		.type	= PERF_TYPE_HARDWARE,
+		.config	= PERF_COUNT_HW_CPU_CYCLES,
+	};
+	struct perf_evsel *evsel;
+
+	event_attr_init(&attr);
+
+	perf_event_attr__set_max_precise_ip(&attr);
+
+	evsel = perf_evsel__new(&attr);
+	if (evsel == NULL)
+		goto out;
+
+	/* use asprintf() because free(evsel) assumes name is allocated */
+	if (asprintf(&evsel->name, "cycles%.*s",
+		     attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0)
+		goto error_free;
+out:
+	return evsel;
+error_free:
+	perf_evsel__delete(evsel);
+	evsel = NULL;
+	goto out;
+}
+
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
@@ -854,7 +882,7 @@
 		perf_evsel__set_sample_bit(evsel, REGS_INTR);
 	}
 
-	if (target__has_cpu(&opts->target))
+	if (target__has_cpu(&opts->target) || opts->sample_cpu)
 		perf_evsel__set_sample_bit(evsel, CPU);
 
 	if (opts->period)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 8a4a6c9..4d44129 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -175,6 +175,8 @@
 	return perf_evsel__newtp_idx(sys, name, 0);
 }
 
+struct perf_evsel *perf_evsel__new_cycles(void);
+
 struct event_format *event_format__new(const char *sys, const char *name);
 
 void perf_evsel__init(struct perf_evsel *evsel,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index a18d142..de15dbc 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1672,7 +1672,7 @@
 }
 
 static void output_resort(struct hists *hists, struct ui_progress *prog,
-			  bool use_callchain)
+			  bool use_callchain, hists__resort_cb_t cb)
 {
 	struct rb_root *root;
 	struct rb_node *next;
@@ -1711,6 +1711,9 @@
 		n = rb_entry(next, struct hist_entry, rb_node_in);
 		next = rb_next(&n->rb_node_in);
 
+		if (cb && cb(n))
+			continue;
+
 		__hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
 		hists__inc_stats(hists, n);
 
@@ -1731,12 +1734,18 @@
 	else
 		use_callchain = symbol_conf.use_callchain;
 
-	output_resort(evsel__hists(evsel), prog, use_callchain);
+	output_resort(evsel__hists(evsel), prog, use_callchain, NULL);
 }
 
 void hists__output_resort(struct hists *hists, struct ui_progress *prog)
 {
-	output_resort(hists, prog, symbol_conf.use_callchain);
+	output_resort(hists, prog, symbol_conf.use_callchain, NULL);
+}
+
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+			     hists__resort_cb_t cb)
+{
+	output_resort(hists, prog, symbol_conf.use_callchain, cb);
 }
 
 static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 49aa4fa..0a1edf1 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -153,8 +153,12 @@
 				   struct perf_hpp_fmt *fmt, int printed);
 void hist_entry__delete(struct hist_entry *he);
 
+typedef int (*hists__resort_cb_t)(struct hist_entry *he);
+
 void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog);
 void hists__output_resort(struct hists *hists, struct ui_progress *prog);
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+			     hists__resort_cb_t cb);
 int hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
 
 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
index 8cdcf46..21c4d9b 100644
--- a/tools/perf/util/target.c
+++ b/tools/perf/util/target.c
@@ -122,11 +122,7 @@
 	BUG_ON(buflen == 0);
 
 	if (errnum >= 0) {
-		const char *err = str_error_r(errnum, buf, buflen);
-
-		if (err != buf)
-			scnprintf(buf, buflen, "%s", err);
-
+		str_error_r(errnum, buf, buflen);
 		return 0;
 	}