Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Includes smaller fixes and improvements plus the exclude_{host,guest} feature
test and fallback to handle older kernels.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index ff9a66e..a5766b4 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -52,11 +52,11 @@
 
 -p::
 --pid=::
-	Record events on existing process ID.
+	Record events on existing process ID (comma separated list).
 
 -t::
 --tid=::
-        Record events on existing thread ID.
+        Record events on existing thread ID (comma separated list).
 
 -u::
 --uid=::
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 8966b9a..2fa173b 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -35,11 +35,11 @@
         child tasks do not inherit counters
 -p::
 --pid=<pid>::
-        stat events on existing process id
+        stat events on existing process id (comma separated list)
 
 -t::
 --tid=<tid>::
-        stat events on existing thread id
+        stat events on existing thread id (comma separated list)
 
 
 -a::
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index ab1454e..4a5680c 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -72,11 +72,11 @@
 
 -p <pid>::
 --pid=<pid>::
-	Profile events on existing Process ID.
+	Profile events on existing Process ID (comma separated list).
 
 -t <tid>::
 --tid=<tid>::
-        Profile events on existing thread ID.
+        Profile events on existing thread ID (comma separated list).
 
 -u::
 --uid=::
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 1078c5f..5476bc0 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -9,6 +9,7 @@
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
 arch/*/lib/memcpy*.S
+arch/*/lib/memset*.S
 include/linux/poison.h
 include/linux/magic.h
 include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 64df5de..e011b50 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -183,7 +183,10 @@
 grep-libs = $(filter -l%,$(1))
 strip-libs = $(filter-out -l%,$(1))
 
-$(OUTPUT)python/perf.so: $(PYRF_OBJS)
+PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
+
+$(OUTPUT)python/perf.so: $(PYRF_OBJS) $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
 	$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
 	mkdir -p $(OUTPUT)python && \
@@ -256,6 +259,7 @@
 LIB_H += util/build-id.h
 LIB_H += util/debug.h
 LIB_H += util/debugfs.h
+LIB_H += util/sysfs.h
 LIB_H += util/event.h
 LIB_H += util/evsel.h
 LIB_H += util/evlist.h
@@ -302,6 +306,7 @@
 LIB_OBJS += $(OUTPUT)util/config.o
 LIB_OBJS += $(OUTPUT)util/ctype.o
 LIB_OBJS += $(OUTPUT)util/debugfs.o
+LIB_OBJS += $(OUTPUT)util/sysfs.o
 LIB_OBJS += $(OUTPUT)util/environment.o
 LIB_OBJS += $(OUTPUT)util/event.o
 LIB_OBJS += $(OUTPUT)util/evlist.o
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f8d9a54..75d230f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -205,8 +205,11 @@
 
 		if (opts->group && pos != first)
 			group_fd = first->fd;
+fallback_missing_features:
+		if (opts->exclude_guest_missing)
+			attr->exclude_guest = attr->exclude_host = 0;
 retry_sample_id:
-		attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
+		attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
 try_again:
 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
 				     opts->group, group_fd) < 0) {
@@ -218,15 +221,23 @@
 			} else if (err ==  ENODEV && opts->cpu_list) {
 				die("No such device - did you specify"
 					" an out-of-range profile CPU?\n");
-			} else if (err == EINVAL && opts->sample_id_all_avail) {
-				/*
-				 * Old kernel, no attr->sample_id_type_all field
-				 */
-				opts->sample_id_all_avail = false;
-				if (!opts->sample_time && !opts->raw_samples && !time_needed)
-					attr->sample_type &= ~PERF_SAMPLE_TIME;
+			} else if (err == EINVAL) {
+				if (!opts->exclude_guest_missing &&
+				    (attr->exclude_guest || attr->exclude_host)) {
+					pr_debug("Old kernel, cannot exclude "
+						 "guest or host samples.\n");
+					opts->exclude_guest_missing = true;
+					goto fallback_missing_features;
+				} else if (!opts->sample_id_all_missing) {
+					/*
+					 * Old kernel, no attr->sample_id_type_all field
+					 */
+					opts->sample_id_all_missing = true;
+					if (!opts->sample_time && !opts->raw_samples && !time_needed)
+						attr->sample_type &= ~PERF_SAMPLE_TIME;
 
-				goto retry_sample_id;
+					goto retry_sample_id;
+				}
 			}
 
 			/*
@@ -494,9 +505,9 @@
 			return err;
 	}
 
-	if (!!rec->no_buildid
+	if (!rec->no_buildid
 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
-		pr_err("Couldn't generating buildids. "
+		pr_err("Couldn't generate buildids. "
 		       "Use --no-buildid to profile anyway.\n");
 		return -1;
 	}
@@ -645,13 +656,10 @@
  */
 static struct perf_record record = {
 	.opts = {
-		.target_pid	     = -1,
-		.target_tid	     = -1,
 		.mmap_pages	     = UINT_MAX,
 		.user_freq	     = UINT_MAX,
 		.user_interval	     = ULLONG_MAX,
 		.freq		     = 1000,
-		.sample_id_all_avail = true,
 	},
 	.write_mode = WRITE_FORCE,
 	.file_new   = true,
@@ -670,9 +678,9 @@
 		     parse_events_option),
 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
 		     "event filter", parse_filter),
-	OPT_INTEGER('p', "pid", &record.opts.target_pid,
+	OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
 		    "record events on existing process id"),
-	OPT_INTEGER('t', "tid", &record.opts.target_tid,
+	OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
 		    "record events on existing thread id"),
 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
 		    "collect data with this RT SCHED_FIFO priority"),
@@ -739,7 +747,7 @@
 
 	argc = parse_options(argc, argv, record_options, record_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
-	if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
+	if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
 		!rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
 		usage_with_options(record_usage, record_options);
 
@@ -785,7 +793,7 @@
 	if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
 		goto out_free_fd;
 
-	if (rec->opts.target_pid != -1)
+	if (rec->opts.target_pid)
 		rec->opts.target_tid = rec->opts.target_pid;
 
 	if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d14b37a..ea40e4e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -182,8 +182,8 @@
 static bool			no_inherit			= false;
 static bool			scale				=  true;
 static bool			no_aggr				= false;
-static pid_t			target_pid			= -1;
-static pid_t			target_tid			= -1;
+static const char		*target_pid;
+static const char		*target_tid;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
@@ -296,7 +296,7 @@
 	if (system_wide)
 		return perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
 						group, group_fd);
-	if (target_pid == -1 && target_tid == -1) {
+	if (!target_pid && !target_tid) {
 		attr->disabled = 1;
 		attr->enable_on_exec = 1;
 	}
@@ -446,7 +446,7 @@
 			exit(-1);
 		}
 
-		if (target_tid == -1 && target_pid == -1 && !system_wide)
+		if (!target_tid && !target_pid && !system_wide)
 			evsel_list->threads->map[0] = child_pid;
 
 		/*
@@ -968,14 +968,14 @@
 	if (!csv_output) {
 		fprintf(output, "\n");
 		fprintf(output, " Performance counter stats for ");
-		if(target_pid == -1 && target_tid == -1) {
+		if (!target_pid && !target_tid) {
 			fprintf(output, "\'%s", argv[0]);
 			for (i = 1; i < argc; i++)
 				fprintf(output, " %s", argv[i]);
-		} else if (target_pid != -1)
-			fprintf(output, "process id \'%d", target_pid);
+		} else if (target_pid)
+			fprintf(output, "process id \'%s", target_pid);
 		else
-			fprintf(output, "thread id \'%d", target_tid);
+			fprintf(output, "thread id \'%s", target_tid);
 
 		fprintf(output, "\'");
 		if (run_count > 1)
@@ -1049,10 +1049,10 @@
 		     "event filter", parse_filter),
 	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
 		    "child tasks do not inherit counters"),
-	OPT_INTEGER('p', "pid", &target_pid,
-		    "stat events on existing process id"),
-	OPT_INTEGER('t', "tid", &target_tid,
-		    "stat events on existing thread id"),
+	OPT_STRING('p', "pid", &target_pid, "pid",
+		   "stat events on existing process id"),
+	OPT_STRING('t', "tid", &target_tid, "tid",
+		   "stat events on existing thread id"),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_BOOLEAN('g', "group", &group,
@@ -1190,7 +1190,7 @@
 	} else if (big_num_opt == 0) /* User passed --no-big-num */
 		big_num = false;
 
-	if (!argc && target_pid == -1 && target_tid == -1)
+	if (!argc && !target_pid && !target_tid)
 		usage_with_options(stat_usage, options);
 	if (run_count <= 0)
 		usage_with_options(stat_usage, options);
@@ -1206,10 +1206,11 @@
 	if (add_default_attributes())
 		goto out;
 
-	if (target_pid != -1)
+	if (target_pid)
 		target_tid = target_pid;
 
-	evsel_list->threads = thread_map__new(target_pid, target_tid, UINT_MAX);
+	evsel_list->threads = thread_map__new_str(target_pid,
+						  target_tid, UINT_MAX);
 	if (evsel_list->threads == NULL) {
 		pr_err("Problems finding threads of monitor\n");
 		usage_with_options(stat_usage, options);
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 70c4eb2..3e087ce 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -1010,12 +1010,9 @@
 static int test__PERF_RECORD(void)
 {
 	struct perf_record_opts opts = {
-		.target_pid = -1,
-		.target_tid = -1,
 		.no_delay   = true,
 		.freq	    = 10,
 		.mmap_pages = 256,
-		.sample_id_all_avail = true,
 	};
 	cpu_set_t *cpu_mask = NULL;
 	size_t cpu_mask_size = 0;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index d869b21..e3c63ae 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -677,6 +677,12 @@
 		return;
 	}
 
+	if (!machine) {
+		pr_err("%u unprocessable samples recorded.",
+		       top->session->hists.stats.nr_unprocessable_samples++);
+		return;
+	}
+
 	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
 		top->exact_samples++;
 
@@ -866,8 +872,11 @@
 		attr->mmap = 1;
 		attr->comm = 1;
 		attr->inherit = top->inherit;
+fallback_missing_features:
+		if (top->exclude_guest_missing)
+			attr->exclude_guest = attr->exclude_host = 0;
 retry_sample_id:
-		attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
+		attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
 try_again:
 		if (perf_evsel__open(counter, top->evlist->cpus,
 				     top->evlist->threads, top->group,
@@ -877,12 +886,20 @@
 			if (err == EPERM || err == EACCES) {
 				ui__error_paranoid();
 				goto out_err;
-			} else if (err == EINVAL && top->sample_id_all_avail) {
-				/*
-				 * Old kernel, no attr->sample_id_type_all field
-				 */
-				top->sample_id_all_avail = false;
-				goto retry_sample_id;
+			} else if (err == EINVAL) {
+				if (!top->exclude_guest_missing &&
+				    (attr->exclude_guest || attr->exclude_host)) {
+					pr_debug("Old kernel, cannot exclude "
+						 "guest or host samples.\n");
+					top->exclude_guest_missing = true;
+					goto fallback_missing_features;
+				} else if (!top->sample_id_all_missing) {
+					/*
+					 * Old kernel, no attr->sample_id_type_all field
+					 */
+					top->sample_id_all_missing = true;
+					goto retry_sample_id;
+				}
 			}
 			/*
 			 * If it's cycles then fall back to hrtimer
@@ -965,7 +982,7 @@
 	if (ret)
 		goto out_delete;
 
-	if (top->target_tid != -1 || top->uid != UINT_MAX)
+	if (top->target_tid || top->uid != UINT_MAX)
 		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
 						  perf_event__process,
 						  &top->session->host_machine);
@@ -1103,11 +1120,8 @@
 	struct perf_top top = {
 		.count_filter	     = 5,
 		.delay_secs	     = 2,
-		.target_pid	     = -1,
-		.target_tid	     = -1,
 		.uid		     = UINT_MAX,
 		.freq		     = 1000, /* 1 KHz */
-		.sample_id_all_avail = true,
 		.mmap_pages	     = 128,
 		.sym_pcnt_filter     = 5,
 	};
@@ -1118,9 +1132,9 @@
 		     parse_events_option),
 	OPT_INTEGER('c', "count", &top.default_interval,
 		    "event period to sample"),
-	OPT_INTEGER('p', "pid", &top.target_pid,
+	OPT_STRING('p', "pid", &top.target_pid, "pid",
 		    "profile events on existing process id"),
-	OPT_INTEGER('t', "tid", &top.target_tid,
+	OPT_STRING('t', "tid", &top.target_tid, "tid",
 		    "profile events on existing thread id"),
 	OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
 			    "system-wide collection from all CPUs"),
@@ -1210,13 +1224,13 @@
 		goto out_delete_evlist;
 
 	/* CPU and PID are mutually exclusive */
-	if (top.target_tid > 0 && top.cpu_list) {
+	if (top.target_tid && top.cpu_list) {
 		printf("WARNING: PID switch overriding CPU\n");
 		sleep(1);
 		top.cpu_list = NULL;
 	}
 
-	if (top.target_pid != -1)
+	if (top.target_pid)
 		top.target_tid = top.target_pid;
 
 	if (perf_evlist__create_maps(top.evlist, top.target_pid,
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 92af168..f0227e9 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -167,7 +167,6 @@
 		      pid_t pid, int cpu, int group_fd,
 		      unsigned long flags)
 {
-	attr->size = sizeof(*attr);
 	return syscall(__NR_perf_event_open, attr, pid, cpu,
 		       group_fd, flags);
 }
@@ -186,8 +185,8 @@
 void pthread__unblock_sigwinch(void);
 
 struct perf_record_opts {
-	pid_t	     target_pid;
-	pid_t	     target_tid;
+	const char   *target_pid;
+	const char   *target_tid;
 	uid_t	     uid;
 	bool	     call_graph;
 	bool	     group;
@@ -199,7 +198,8 @@
 	bool	     raw_samples;
 	bool	     sample_address;
 	bool	     sample_time;
-	bool	     sample_id_all_avail;
+	bool	     sample_id_all_missing;
+	bool	     exclude_guest_missing;
 	bool	     system_wide;
 	bool	     period;
 	unsigned int freq;
diff --git a/tools/perf/util/bitmap.c b/tools/perf/util/bitmap.c
index 5e230ac..0a1adc1 100644
--- a/tools/perf/util/bitmap.c
+++ b/tools/perf/util/bitmap.c
@@ -19,3 +19,13 @@
 
 	return w;
 }
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+		 const unsigned long *bitmap2, int bits)
+{
+	int k;
+	int nr = BITS_TO_LONGS(bits);
+
+	for (k = 0; k < nr; k++)
+		dst[k] = bitmap1[k] | bitmap2[k];
+}
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index 3507362..aada3ac 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c
@@ -3,7 +3,7 @@
  *
  * No surprises, and works with signed and unsigned chars.
  */
-#include "cache.h"
+#include "util.h"
 
 enum {
 	S = GIT_SPACE,
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index ffc35e7..dd8b193 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -15,32 +15,6 @@
 	0,
 };
 
-/* use this to force a umount */
-void debugfs_force_cleanup(void)
-{
-	debugfs_find_mountpoint();
-	debugfs_premounted = 0;
-	debugfs_umount();
-}
-
-/* construct a full path to a debugfs element */
-int debugfs_make_path(const char *element, char *buffer, int size)
-{
-	int len;
-
-	if (strlen(debugfs_mountpoint) == 0) {
-		buffer[0] = '\0';
-		return -1;
-	}
-
-	len = strlen(debugfs_mountpoint) + strlen(element) + 1;
-	if (len >= size)
-		return len+1;
-
-	snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element);
-	return 0;
-}
-
 static int debugfs_found;
 
 /* find the path to the mounted debugfs */
@@ -97,17 +71,6 @@
 	return 0;
 }
 
-
-int debugfs_valid_entry(const char *path)
-{
-	struct stat st;
-
-	if (stat(path, &st))
-		return -errno;
-
-	return 0;
-}
-
 static void debugfs_set_tracing_events_path(const char *mountpoint)
 {
 	snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s",
@@ -149,107 +112,3 @@
 	snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint);
 	debugfs_set_tracing_events_path(mountpoint);
 }
-
-/* umount the debugfs */
-
-int debugfs_umount(void)
-{
-	char umountcmd[128];
-	int ret;
-
-	/* if it was already mounted, leave it */
-	if (debugfs_premounted)
-		return 0;
-
-	/* make sure it's a valid mount point */
-	ret = debugfs_valid_mountpoint(debugfs_mountpoint);
-	if (ret)
-		return ret;
-
-	snprintf(umountcmd, sizeof(umountcmd),
-		 "/bin/umount %s", debugfs_mountpoint);
-	return system(umountcmd);
-}
-
-int debugfs_write(const char *entry, const char *value)
-{
-	char path[PATH_MAX + 1];
-	int ret, count;
-	int fd;
-
-	/* construct the path */
-	snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
-
-	/* verify that it exists */
-	ret = debugfs_valid_entry(path);
-	if (ret)
-		return ret;
-
-	/* get how many chars we're going to write */
-	count = strlen(value);
-
-	/* open the debugfs entry */
-	fd = open(path, O_RDWR);
-	if (fd < 0)
-		return -errno;
-
-	while (count > 0) {
-		/* write it */
-		ret = write(fd, value, count);
-		if (ret <= 0) {
-			if (ret == EAGAIN)
-				continue;
-			close(fd);
-			return -errno;
-		}
-		count -= ret;
-	}
-
-	/* close it */
-	close(fd);
-
-	/* return success */
-	return 0;
-}
-
-/*
- * read a debugfs entry
- * returns the number of chars read or a negative errno
- */
-int debugfs_read(const char *entry, char *buffer, size_t size)
-{
-	char path[PATH_MAX + 1];
-	int ret;
-	int fd;
-
-	/* construct the path */
-	snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
-
-	/* verify that it exists */
-	ret = debugfs_valid_entry(path);
-	if (ret)
-		return ret;
-
-	/* open the debugfs entry */
-	fd = open(path, O_RDONLY);
-	if (fd < 0)
-		return -errno;
-
-	do {
-		/* read it */
-		ret = read(fd, buffer, size);
-		if (ret == 0) {
-			close(fd);
-			return EOF;
-		}
-	} while (ret < 0 && errno == EAGAIN);
-
-	/* close it */
-	close(fd);
-
-	/* make *sure* there's a null character at the end */
-	buffer[ret] = '\0';
-
-	/* return the number of chars read */
-	return ret;
-}
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 4a878f7..68f3e87 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -3,14 +3,8 @@
 
 const char *debugfs_find_mountpoint(void);
 int debugfs_valid_mountpoint(const char *debugfs);
-int debugfs_valid_entry(const char *path);
 char *debugfs_mount(const char *mountpoint);
-int debugfs_umount(void);
 void debugfs_set_path(const char *mountpoint);
-int debugfs_write(const char *entry, const char *value);
-int debugfs_read(const char *entry, char *buffer, size_t size);
-void debugfs_force_cleanup(void);
-int debugfs_make_path(const char *element, char *buffer, int size);
 
 extern char debugfs_mountpoint[];
 extern char tracing_events_path[];
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index a57a8cf..5c61dc5 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -593,15 +593,15 @@
 	return perf_evlist__mmap_per_cpu(evlist, prot, mask);
 }
 
-int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
-			     pid_t target_tid, uid_t uid, const char *cpu_list)
+int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
+			     const char *target_tid, uid_t uid, const char *cpu_list)
 {
-	evlist->threads = thread_map__new(target_pid, target_tid, uid);
+	evlist->threads = thread_map__new_str(target_pid, target_tid, uid);
 
 	if (evlist->threads == NULL)
 		return -1;
 
-	if (uid != UINT_MAX || (cpu_list == NULL && target_tid != -1))
+	if (uid != UINT_MAX || (cpu_list == NULL && target_tid))
 		evlist->cpus = cpu_map__dummy_new();
 	else
 		evlist->cpus = cpu_map__new(cpu_list);
@@ -820,7 +820,7 @@
 		exit(-1);
 	}
 
-	if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1)
+	if (!opts->system_wide && !opts->target_tid && !opts->target_pid)
 		evlist->threads->map[0] = evlist->workload.pid;
 
 	close(child_ready_pipe[1]);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 1b4282b..21f1c9e 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -106,8 +106,8 @@
 	evlist->threads	= threads;
 }
 
-int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
-			     pid_t tid, uid_t uid, const char *cpu_list);
+int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
+			     const char *tid, uid_t uid, const char *cpu_list);
 void perf_evlist__delete_maps(struct perf_evlist *evlist);
 int perf_evlist__set_filters(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9a11f9e..302d49a 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -68,7 +68,7 @@
 	struct perf_event_attr *attr = &evsel->attr;
 	int track = !evsel->idx; /* only the first counter needs these */
 
-	attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
+	attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
 	attr->inherit	    = !opts->no_inherit;
 	attr->read_format   = PERF_FORMAT_TOTAL_TIME_ENABLED |
 			      PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -111,7 +111,7 @@
 	if (opts->period)
 		attr->sample_type	|= PERF_SAMPLE_PERIOD;
 
-	if (opts->sample_id_all_avail &&
+	if (!opts->sample_id_all_missing &&
 	    (opts->sample_time || opts->system_wide ||
 	     !opts->no_inherit || opts->cpu_list))
 		attr->sample_type	|= PERF_SAMPLE_TIME;
@@ -130,7 +130,7 @@
 	attr->mmap = track;
 	attr->comm = track;
 
-	if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) {
+	if (!opts->target_pid && !opts->target_tid && !opts->system_wide) {
 		attr->disabled = 1;
 		attr->enable_on_exec = 1;
 	}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ecd7f4d..9f867d9 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -63,9 +63,20 @@
 	return NULL;
 }
 
-static const char *__perf_magic = "PERFFILE";
+/*
+ * magic2 = "PERFILE2"
+ * must be a numerical value to let the endianness
+ * determine the memory layout. That way we are able
+ * to detect endianness when reading the perf.data file
+ * back.
+ *
+ * we check for legacy (PERFFILE) format.
+ */
+static const char *__perf_magic1 = "PERFFILE";
+static const u64 __perf_magic2    = 0x32454c4946524550ULL;
+static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
 
-#define PERF_MAGIC	(*(u64 *)__perf_magic)
+#define PERF_MAGIC	__perf_magic2
 
 struct perf_file_attr {
 	struct perf_event_attr	attr;
@@ -1305,25 +1316,198 @@
 	free(str);
 }
 
+static int __event_process_build_id(struct build_id_event *bev,
+				    char *filename,
+				    struct perf_session *session)
+{
+	int err = -1;
+	struct list_head *head;
+	struct machine *machine;
+	u16 misc;
+	struct dso *dso;
+	enum dso_kernel_type dso_type;
+
+	machine = perf_session__findnew_machine(session, bev->pid);
+	if (!machine)
+		goto out;
+
+	misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+	switch (misc) {
+	case PERF_RECORD_MISC_KERNEL:
+		dso_type = DSO_TYPE_KERNEL;
+		head = &machine->kernel_dsos;
+		break;
+	case PERF_RECORD_MISC_GUEST_KERNEL:
+		dso_type = DSO_TYPE_GUEST_KERNEL;
+		head = &machine->kernel_dsos;
+		break;
+	case PERF_RECORD_MISC_USER:
+	case PERF_RECORD_MISC_GUEST_USER:
+		dso_type = DSO_TYPE_USER;
+		head = &machine->user_dsos;
+		break;
+	default:
+		goto out;
+	}
+
+	dso = __dsos__findnew(head, filename);
+	if (dso != NULL) {
+		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+		dso__set_build_id(dso, &bev->build_id);
+
+		if (filename[0] == '[')
+			dso->kernel = dso_type;
+
+		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
+				  sbuild_id);
+		pr_debug("build id event received for %s: %s\n",
+			 dso->long_name, sbuild_id);
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
+						 int input, u64 offset, u64 size)
+{
+	struct perf_session *session = container_of(header, struct perf_session, header);
+	struct {
+		struct perf_event_header   header;
+		u8			   build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+		char			   filename[0];
+	} old_bev;
+	struct build_id_event bev;
+	char filename[PATH_MAX];
+	u64 limit = offset + size;
+
+	while (offset < limit) {
+		ssize_t len;
+
+		if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+			return -1;
+
+		if (header->needs_swap)
+			perf_event_header__bswap(&old_bev.header);
+
+		len = old_bev.header.size - sizeof(old_bev);
+		if (read(input, filename, len) != len)
+			return -1;
+
+		bev.header = old_bev.header;
+
+		/*
+		 * As the pid is the missing value, we need to fill
+		 * it properly. The header.misc value give us nice hint.
+		 */
+		bev.pid	= HOST_KERNEL_ID;
+		if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
+		    bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
+			bev.pid	= DEFAULT_GUEST_KERNEL_ID;
+
+		memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
+		__event_process_build_id(&bev, filename, session);
+
+		offset += bev.header.size;
+	}
+
+	return 0;
+}
+
+static int perf_header__read_build_ids(struct perf_header *header,
+				       int input, u64 offset, u64 size)
+{
+	struct perf_session *session = container_of(header, struct perf_session, header);
+	struct build_id_event bev;
+	char filename[PATH_MAX];
+	u64 limit = offset + size, orig_offset = offset;
+	int err = -1;
+
+	while (offset < limit) {
+		ssize_t len;
+
+		if (read(input, &bev, sizeof(bev)) != sizeof(bev))
+			goto out;
+
+		if (header->needs_swap)
+			perf_event_header__bswap(&bev.header);
+
+		len = bev.header.size - sizeof(bev);
+		if (read(input, filename, len) != len)
+			goto out;
+		/*
+		 * The a1645ce1 changeset:
+		 *
+		 * "perf: 'perf kvm' tool for monitoring guest performance from host"
+		 *
+		 * Added a field to struct build_id_event that broke the file
+		 * format.
+		 *
+		 * Since the kernel build-id is the first entry, process the
+		 * table using the old format if the well known
+		 * '[kernel.kallsyms]' string for the kernel build-id has the
+		 * first 4 characters chopped off (where the pid_t sits).
+		 */
+		if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
+			if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
+				return -1;
+			return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
+		}
+
+		__event_process_build_id(&bev, filename, session);
+
+		offset += bev.header.size;
+	}
+	err = 0;
+out:
+	return err;
+}
+
+static int process_trace_info(struct perf_file_section *section __unused,
+			      struct perf_header *ph __unused,
+			      int feat __unused, int fd)
+{
+	trace_report(fd, false);
+	return 0;
+}
+
+static int process_build_id(struct perf_file_section *section,
+			    struct perf_header *ph,
+			    int feat __unused, int fd)
+{
+	if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
+		pr_debug("Failed to read buildids, continuing...\n");
+	return 0;
+}
+
 struct feature_ops {
 	int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
 	void (*print)(struct perf_header *h, int fd, FILE *fp);
+	int (*process)(struct perf_file_section *section,
+		       struct perf_header *h, int feat, int fd);
 	const char *name;
 	bool full_only;
 };
 
 #define FEAT_OPA(n, func) \
 	[n] = { .name = #n, .write = write_##func, .print = print_##func }
+#define FEAT_OPP(n, func) \
+	[n] = { .name = #n, .write = write_##func, .print = print_##func, \
+		.process = process_##func }
 #define FEAT_OPF(n, func) \
-	[n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true }
+	[n] = { .name = #n, .write = write_##func, .print = print_##func, \
+		.full_only = true }
 
 /* feature_ops not implemented: */
 #define print_trace_info		NULL
 #define print_build_id			NULL
 
 static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
-	FEAT_OPA(HEADER_TRACE_INFO,	trace_info),
-	FEAT_OPA(HEADER_BUILD_ID,	build_id),
+	FEAT_OPP(HEADER_TRACE_INFO,	trace_info),
+	FEAT_OPP(HEADER_BUILD_ID,	build_id),
 	FEAT_OPA(HEADER_HOSTNAME,	hostname),
 	FEAT_OPA(HEADER_OSRELEASE,	osrelease),
 	FEAT_OPA(HEADER_VERSION,	version),
@@ -1620,24 +1804,59 @@
 	return err;
 }
 
+static int check_magic_endian(u64 *magic, struct perf_file_header *header,
+			      struct perf_header *ph)
+{
+	int ret;
+
+	/* check for legacy format */
+	ret = memcmp(magic, __perf_magic1, sizeof(*magic));
+	if (ret == 0) {
+		pr_debug("legacy perf.data format\n");
+		if (!header)
+			return -1;
+
+		if (header->attr_size != sizeof(struct perf_file_attr)) {
+			u64 attr_size = bswap_64(header->attr_size);
+
+			if (attr_size != sizeof(struct perf_file_attr))
+				return -1;
+
+			ph->needs_swap = true;
+		}
+		return 0;
+	}
+
+	/* check magic number with same endianness */
+	if (*magic == __perf_magic2)
+		return 0;
+
+	/* check magic number but opposite endianness */
+	if (*magic != __perf_magic2_sw)
+		return -1;
+
+	ph->needs_swap = true;
+
+	return 0;
+}
+
 int perf_file_header__read(struct perf_file_header *header,
 			   struct perf_header *ph, int fd)
 {
+	int ret;
+
 	lseek(fd, 0, SEEK_SET);
 
-	if (readn(fd, header, sizeof(*header)) <= 0 ||
-	    memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
+	ret = readn(fd, header, sizeof(*header));
+	if (ret <= 0)
 		return -1;
 
-	if (header->attr_size != sizeof(struct perf_file_attr)) {
-		u64 attr_size = bswap_64(header->attr_size);
+	if (check_magic_endian(&header->magic, header, ph) < 0)
+		return -1;
 
-		if (attr_size != sizeof(struct perf_file_attr))
-			return -1;
-
+	if (ph->needs_swap) {
 		mem_bswap_64(header, offsetof(struct perf_file_header,
-					    adds_features));
-		ph->needs_swap = true;
+			     adds_features));
 	}
 
 	if (header->size != sizeof(*header)) {
@@ -1689,156 +1908,6 @@
 	return 0;
 }
 
-static int __event_process_build_id(struct build_id_event *bev,
-				    char *filename,
-				    struct perf_session *session)
-{
-	int err = -1;
-	struct list_head *head;
-	struct machine *machine;
-	u16 misc;
-	struct dso *dso;
-	enum dso_kernel_type dso_type;
-
-	machine = perf_session__findnew_machine(session, bev->pid);
-	if (!machine)
-		goto out;
-
-	misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
-	switch (misc) {
-	case PERF_RECORD_MISC_KERNEL:
-		dso_type = DSO_TYPE_KERNEL;
-		head = &machine->kernel_dsos;
-		break;
-	case PERF_RECORD_MISC_GUEST_KERNEL:
-		dso_type = DSO_TYPE_GUEST_KERNEL;
-		head = &machine->kernel_dsos;
-		break;
-	case PERF_RECORD_MISC_USER:
-	case PERF_RECORD_MISC_GUEST_USER:
-		dso_type = DSO_TYPE_USER;
-		head = &machine->user_dsos;
-		break;
-	default:
-		goto out;
-	}
-
-	dso = __dsos__findnew(head, filename);
-	if (dso != NULL) {
-		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
-
-		dso__set_build_id(dso, &bev->build_id);
-
-		if (filename[0] == '[')
-			dso->kernel = dso_type;
-
-		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
-				  sbuild_id);
-		pr_debug("build id event received for %s: %s\n",
-			 dso->long_name, sbuild_id);
-	}
-
-	err = 0;
-out:
-	return err;
-}
-
-static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
-						 int input, u64 offset, u64 size)
-{
-	struct perf_session *session = container_of(header, struct perf_session, header);
-	struct {
-		struct perf_event_header   header;
-		u8			   build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
-		char			   filename[0];
-	} old_bev;
-	struct build_id_event bev;
-	char filename[PATH_MAX];
-	u64 limit = offset + size;
-
-	while (offset < limit) {
-		ssize_t len;
-
-		if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
-			return -1;
-
-		if (header->needs_swap)
-			perf_event_header__bswap(&old_bev.header);
-
-		len = old_bev.header.size - sizeof(old_bev);
-		if (read(input, filename, len) != len)
-			return -1;
-
-		bev.header = old_bev.header;
-
-		/*
-		 * As the pid is the missing value, we need to fill
-		 * it properly. The header.misc value give us nice hint.
-		 */
-		bev.pid	= HOST_KERNEL_ID;
-		if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
-		    bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
-			bev.pid	= DEFAULT_GUEST_KERNEL_ID;
-
-		memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
-		__event_process_build_id(&bev, filename, session);
-
-		offset += bev.header.size;
-	}
-
-	return 0;
-}
-
-static int perf_header__read_build_ids(struct perf_header *header,
-				       int input, u64 offset, u64 size)
-{
-	struct perf_session *session = container_of(header, struct perf_session, header);
-	struct build_id_event bev;
-	char filename[PATH_MAX];
-	u64 limit = offset + size, orig_offset = offset;
-	int err = -1;
-
-	while (offset < limit) {
-		ssize_t len;
-
-		if (read(input, &bev, sizeof(bev)) != sizeof(bev))
-			goto out;
-
-		if (header->needs_swap)
-			perf_event_header__bswap(&bev.header);
-
-		len = bev.header.size - sizeof(bev);
-		if (read(input, filename, len) != len)
-			goto out;
-		/*
-		 * The a1645ce1 changeset:
-		 *
-		 * "perf: 'perf kvm' tool for monitoring guest performance from host"
-		 *
-		 * Added a field to struct build_id_event that broke the file
-		 * format.
-		 *
-		 * Since the kernel build-id is the first entry, process the
-		 * table using the old format if the well known
-		 * '[kernel.kallsyms]' string for the kernel build-id has the
-		 * first 4 characters chopped off (where the pid_t sits).
-		 */
-		if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
-			if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
-				return -1;
-			return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
-		}
-
-		__event_process_build_id(&bev, filename, session);
-
-		offset += bev.header.size;
-	}
-	err = 0;
-out:
-	return err;
-}
-
 static int perf_file_section__process(struct perf_file_section *section,
 				      struct perf_header *ph,
 				      int feat, int fd, void *data __used)
@@ -1854,27 +1923,23 @@
 		return 0;
 	}
 
-	switch (feat) {
-	case HEADER_TRACE_INFO:
-		trace_report(fd, false);
-		break;
-	case HEADER_BUILD_ID:
-		if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
-			pr_debug("Failed to read buildids, continuing...\n");
-		break;
-	default:
-		break;
-	}
+	if (!feat_ops[feat].process)
+		return 0;
 
-	return 0;
+	return feat_ops[feat].process(section, ph, feat, fd);
 }
 
 static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
 				       struct perf_header *ph, int fd,
 				       bool repipe)
 {
-	if (readn(fd, header, sizeof(*header)) <= 0 ||
-	    memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
+	int ret;
+
+	ret = readn(fd, header, sizeof(*header));
+	if (ret <= 0)
+		return -1;
+
+	 if (check_magic_endian(&header->magic, NULL, ph) < 0)
 		return -1;
 
 	if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 0d48613..48e5acd 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -32,6 +32,7 @@
 	u32 nr_unknown_events;
 	u32 nr_invalid_chains;
 	u32 nr_unknown_id;
+	u32 nr_unprocessable_samples;
 };
 
 enum hist_column {
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h
index eda4416..bb162e4 100644
--- a/tools/perf/util/include/linux/bitmap.h
+++ b/tools/perf/util/include/linux/bitmap.h
@@ -5,6 +5,8 @@
 #include <linux/bitops.h>
 
 int __bitmap_weight(const unsigned long *bitmap, int bits);
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+		 const unsigned long *bitmap2, int bits);
 
 #define BITMAP_LAST_WORD_MASK(nbits)					\
 (									\
@@ -32,4 +34,13 @@
 	return __bitmap_weight(src, nbits);
 }
 
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+			     const unsigned long *src2, int nbits)
+{
+	if (small_const_nbits(nbits))
+		*dst = *src1 | *src2;
+	else
+		__bitmap_or(dst, src1, src2, nbits);
+}
+
 #endif /* _PERF_BITOPS_H */
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
new file mode 100644
index 0000000..2884e67
--- /dev/null
+++ b/tools/perf/util/python-ext-sources
@@ -0,0 +1,19 @@
+#
+# List of files needed by perf python extention
+#
+# Each source file must be placed on its own line so that it can be
+# processed by Makefile and util/setup.py accordingly.
+#
+
+util/python.c
+util/ctype.c
+util/evlist.c
+util/evsel.c
+util/cpumap.c
+util/thread_map.c
+util/util.c
+util/xyarray.c
+util/cgroup.c
+util/debugfs.c
+util/strlist.c
+../../lib/rbtree.c
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 552c1c5..9f833cf 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -796,6 +796,10 @@
 			++session->hists.stats.nr_unknown_id;
 			return -1;
 		}
+		if (machine == NULL) {
+			++session->hists.stats.nr_unprocessable_samples;
+			return -1;
+		}
 		return tool->sample(tool, event, sample, evsel, machine);
 	case PERF_RECORD_MMAP:
 		return tool->mmap(tool, event, sample, machine);
@@ -964,6 +968,12 @@
  			    session->hists.stats.nr_invalid_chains,
  			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
  	}
+
+	if (session->hists.stats.nr_unprocessable_samples != 0) {
+		ui__warning("%u unprocessable samples recorded.\n"
+			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
+			    session->hists.stats.nr_unprocessable_samples);
+	}
 }
 
 #define session_done()	(*(volatile int *)(&session_done))
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 36d4c56..d0f9f29 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -24,11 +24,11 @@
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 
+ext_sources = [f.strip() for f in file('util/python-ext-sources')
+				if len(f.strip()) > 0 and f[0] != '#']
+
 perf = Extension('perf',
-		  sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
-			     'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
-			     'util/util.c', 'util/xyarray.c', 'util/cgroup.c',
-			     'util/debugfs.c'],
+		  sources = ext_sources,
 		  include_dirs = ['util/include'],
 		  extra_compile_args = cflags,
                  )
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index fc6e12f..5dd83c3 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1,4 +1,3 @@
-#include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <libgen.h>
@@ -12,6 +11,7 @@
 #include <unistd.h>
 #include <inttypes.h>
 #include "build-id.h"
+#include "util.h"
 #include "debug.h"
 #include "symbol.h"
 #include "strlist.h"
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c
new file mode 100644
index 0000000..48c6902
--- /dev/null
+++ b/tools/perf/util/sysfs.c
@@ -0,0 +1,60 @@
+
+#include "util.h"
+#include "sysfs.h"
+
+static const char * const sysfs_known_mountpoints[] = {
+	"/sys",
+	0,
+};
+
+static int sysfs_found;
+char sysfs_mountpoint[PATH_MAX];
+
+static int sysfs_valid_mountpoint(const char *sysfs)
+{
+	struct statfs st_fs;
+
+	if (statfs(sysfs, &st_fs) < 0)
+		return -ENOENT;
+	else if (st_fs.f_type != (long) SYSFS_MAGIC)
+		return -ENOENT;
+
+	return 0;
+}
+
+const char *sysfs_find_mountpoint(void)
+{
+	const char * const *ptr;
+	char type[100];
+	FILE *fp;
+
+	if (sysfs_found)
+		return (const char *) sysfs_mountpoint;
+
+	ptr = sysfs_known_mountpoints;
+	while (*ptr) {
+		if (sysfs_valid_mountpoint(*ptr) == 0) {
+			sysfs_found = 1;
+			strcpy(sysfs_mountpoint, *ptr);
+			return sysfs_mountpoint;
+		}
+		ptr++;
+	}
+
+	/* give up and parse /proc/mounts */
+	fp = fopen("/proc/mounts", "r");
+	if (fp == NULL)
+		return NULL;
+
+	while (!sysfs_found &&
+	       fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+		      sysfs_mountpoint, type) == 2) {
+
+		if (strcmp(type, "sysfs") == 0)
+			sysfs_found = 1;
+	}
+
+	fclose(fp);
+
+	return sysfs_found ? sysfs_mountpoint : NULL;
+}
diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h
new file mode 100644
index 0000000..a813b72
--- /dev/null
+++ b/tools/perf/util/sysfs.h
@@ -0,0 +1,6 @@
+#ifndef __SYSFS_H__
+#define __SYSFS_H__
+
+const char *sysfs_find_mountpoint(void);
+
+#endif /* __DEBUGFS_H__ */
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 3d4b6c5..e15983c 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -6,6 +6,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include "strlist.h"
+#include <string.h>
 #include "thread_map.h"
 
 /* Skip "." and ".." directories */
@@ -152,6 +154,132 @@
 	return thread_map__new_by_tid(tid);
 }
 
+static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
+{
+	struct thread_map *threads = NULL, *nt;
+	char name[256];
+	int items, total_tasks = 0;
+	struct dirent **namelist = NULL;
+	int i, j = 0;
+	pid_t pid, prev_pid = INT_MAX;
+	char *end_ptr;
+	struct str_node *pos;
+	struct strlist *slist = strlist__new(false, pid_str);
+
+	if (!slist)
+		return NULL;
+
+	strlist__for_each(pos, slist) {
+		pid = strtol(pos->s, &end_ptr, 10);
+
+		if (pid == INT_MIN || pid == INT_MAX ||
+		    (*end_ptr != '\0' && *end_ptr != ','))
+			goto out_free_threads;
+
+		if (pid == prev_pid)
+			continue;
+
+		sprintf(name, "/proc/%d/task", pid);
+		items = scandir(name, &namelist, filter, NULL);
+		if (items <= 0)
+			goto out_free_threads;
+
+		total_tasks += items;
+		nt = realloc(threads, (sizeof(*threads) +
+				       sizeof(pid_t) * total_tasks));
+		if (nt == NULL)
+			goto out_free_threads;
+
+		threads = nt;
+
+		if (threads) {
+			for (i = 0; i < items; i++)
+				threads->map[j++] = atoi(namelist[i]->d_name);
+			threads->nr = total_tasks;
+		}
+
+		for (i = 0; i < items; i++)
+			free(namelist[i]);
+		free(namelist);
+
+		if (!threads)
+			break;
+	}
+
+out:
+	strlist__delete(slist);
+	return threads;
+
+out_free_threads:
+	free(threads);
+	threads = NULL;
+	goto out;
+}
+
+static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
+{
+	struct thread_map *threads = NULL, *nt;
+	int ntasks = 0;
+	pid_t tid, prev_tid = INT_MAX;
+	char *end_ptr;
+	struct str_node *pos;
+	struct strlist *slist;
+
+	/* perf-stat expects threads to be generated even if tid not given */
+	if (!tid_str) {
+		threads = malloc(sizeof(*threads) + sizeof(pid_t));
+		if (threads != NULL) {
+			threads->map[1] = -1;
+			threads->nr	= 1;
+		}
+		return threads;
+	}
+
+	slist = strlist__new(false, tid_str);
+	if (!slist)
+		return NULL;
+
+	strlist__for_each(pos, slist) {
+		tid = strtol(pos->s, &end_ptr, 10);
+
+		if (tid == INT_MIN || tid == INT_MAX ||
+		    (*end_ptr != '\0' && *end_ptr != ','))
+			goto out_free_threads;
+
+		if (tid == prev_tid)
+			continue;
+
+		ntasks++;
+		nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
+
+		if (nt == NULL)
+			goto out_free_threads;
+
+		threads = nt;
+		threads->map[ntasks - 1] = tid;
+		threads->nr		 = ntasks;
+	}
+out:
+	return threads;
+
+out_free_threads:
+	free(threads);
+	threads = NULL;
+	goto out;
+}
+
+struct thread_map *thread_map__new_str(const char *pid, const char *tid,
+				       uid_t uid)
+{
+	if (pid)
+		return thread_map__new_by_pid_str(pid);
+
+	if (!tid && uid != UINT_MAX)
+		return thread_map__new_by_uid(uid);
+
+	return thread_map__new_by_tid_str(tid);
+}
+
 void thread_map__delete(struct thread_map *threads)
 {
 	free(threads);
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index c75ddba..7da80f1 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -13,6 +13,10 @@
 struct thread_map *thread_map__new_by_tid(pid_t tid);
 struct thread_map *thread_map__new_by_uid(uid_t uid);
 struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+
+struct thread_map *thread_map__new_str(const char *pid,
+		const char *tid, uid_t uid);
+
 void thread_map__delete(struct thread_map *threads);
 
 size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index e4370ca..09fe579 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -69,11 +69,11 @@
 
 	ret += SNPRINTF(bf + ret, size - ret, "], ");
 
-	if (top->target_pid != -1)
-		ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d",
+	if (top->target_pid)
+		ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
 				top->target_pid);
-	else if (top->target_tid != -1)
-		ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d",
+	else if (top->target_tid)
+		ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
 				top->target_tid);
 	else if (top->uid_str != NULL)
 		ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
@@ -85,7 +85,7 @@
 		ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
 				top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
 	else {
-		if (top->target_tid != -1)
+		if (top->target_tid)
 			ret += SNPRINTF(bf + ret, size - ret, ")");
 		else
 			ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index def3e53..ce61cb2 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -23,7 +23,7 @@
 	u64		   guest_us_samples, guest_kernel_samples;
 	int		   print_entries, count_filter, delay_secs;
 	int		   freq;
-	pid_t		   target_pid, target_tid;
+	const char	   *target_pid, *target_tid;
 	uid_t		   uid;
 	bool		   hide_kernel_symbols, hide_user_symbols, zero;
 	bool		   system_wide;
@@ -34,7 +34,8 @@
 	bool		   vmlinux_warned;
 	bool		   inherit;
 	bool		   group;
-	bool		   sample_id_all_avail;
+	bool		   sample_id_all_missing;
+	bool		   exclude_guest_missing;
 	bool		   dump_symtab;
 	const char	   *cpu_list;
 	struct hist_entry  *sym_filter_entry;
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index d0c0139..52bb07c 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -83,7 +83,7 @@
 	va_end(params);
 }
 
-uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid)
+uid_t parse_target_uid(const char *str, const char *tid, const char *pid)
 {
 	struct passwd pwd, *result;
 	char buf[1024];
@@ -91,8 +91,8 @@
 	if (str == NULL)
 		return UINT_MAX;
 
-	/* CPU and PID are mutually exclusive */
-	if (tid > 0 || pid > 0) {
+	/* UID and PID are mutually exclusive */
+	if (tid || pid) {
 		ui__warning("PID/TID switch overriding UID\n");
 		sleep(1);
 		return UINT_MAX;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 8131410..8109a90 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -6,7 +6,7 @@
  * XXX We need to find a better place for these things...
  */
 bool perf_host  = true;
-bool perf_guest = true;
+bool perf_guest = false;
 
 void event_attr_init(struct perf_event_attr *attr)
 {
@@ -14,6 +14,8 @@
 		attr->exclude_host  = 1;
 	if (!perf_guest)
 		attr->exclude_guest = 1;
+	/* to capture ABI version */
+	attr->size = sizeof(*attr);
 }
 
 int mkdir_p(char *path, mode_t mode)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 232d17e..0f99f39 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -199,6 +199,8 @@
 #undef isalpha
 #undef isprint
 #undef isalnum
+#undef islower
+#undef isupper
 #undef tolower
 #undef toupper
 
@@ -219,6 +221,8 @@
 #define isalpha(x) sane_istest(x,GIT_ALPHA)
 #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
 #define isprint(x) sane_istest(x,GIT_PRINT)
+#define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20))
+#define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20))
 #define tolower(x) sane_case((unsigned char)(x), 0x20)
 #define toupper(x) sane_case((unsigned char)(x), 0)
 
@@ -245,7 +249,7 @@
 
 void event_attr_init(struct perf_event_attr *attr);
 
-uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid);
+uid_t parse_target_uid(const char *str, const char *tid, const char *pid);
 
 #define _STR(x) #x
 #define STR(x) _STR(x)