Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New user visible features:

  - Support multiple probes on different binaries on the same command line (Masami Hiramatsu)

User visible changes:

  - Fix synthesizing fork_event.ppid for non-main thread (David Ahern)

  - Fix cross-endian analysis (David Ahern)

  - Fix segfault in 'perf buildid-list' when show DSOs with hits (He Kuang)

Infrastructure changes:

  - Fix type for references to data_head/tail (David Ahern)

  - Fix error path to do closedir() when synthesizing threads (Arnaldo Carvalho de Melo)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 921bb69..f7b1af6 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -56,6 +56,7 @@
 	bool mod_events;
 	bool uprobes;
 	bool quiet;
+	bool target_used;
 	int nevents;
 	struct perf_probe_event events[MAX_PROBES];
 	struct strlist *dellist;
@@ -78,6 +79,12 @@
 	}
 
 	pev->uprobes = params.uprobes;
+	if (params.target) {
+		pev->target = strdup(params.target);
+		if (!pev->target)
+			return -ENOMEM;
+		params.target_used = true;
+	}
 
 	/* Parse a perf-probe command into event */
 	ret = parse_perf_probe_command(str, pev);
@@ -102,6 +109,7 @@
 		params.target = strdup(ptr);
 		if (!params.target)
 			return -ENOMEM;
+		params.target_used = false;
 
 		found = 1;
 		buf = ptr + (strlen(ptr) - 3);
@@ -178,7 +186,7 @@
 	int ret = -ENOENT;
 	char *tmp;
 
-	if  (str && !params.target) {
+	if  (str) {
 		if (!strcmp(opt->long_name, "exec"))
 			params.uprobes = true;
 #ifdef HAVE_DWARF_SUPPORT
@@ -200,7 +208,9 @@
 			if (!tmp)
 				return -ENOMEM;
 		}
+		free(params.target);
 		params.target = tmp;
+		params.target_used = false;
 		ret = 0;
 	}
 
@@ -485,9 +495,14 @@
 	}
 
 	if (params.nevents) {
+		/* Ensure the last given target is used */
+		if (params.target && !params.target_used) {
+			pr_warning("  Error: -x/-m must follow the probe definitions.\n");
+			usage_with_options(probe_usage, options);
+		}
+
 		ret = add_perf_probe_events(params.events, params.nevents,
 					    params.max_probe_points,
-					    params.target,
 					    params.force_add);
 		if (ret < 0) {
 			pr_err_with_code("  Error: Failed to add events.", ret);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ac61048..c3efdfb 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -70,8 +70,8 @@
 static int record__mmap_read(struct record *rec, int idx)
 {
 	struct perf_mmap *md = &rec->evlist->mmap[idx];
-	unsigned int head = perf_mmap__read_head(md);
-	unsigned int old = md->prev;
+	u64 head = perf_mmap__read_head(md);
+	u64 old = md->prev;
 	unsigned char *data = md->base + page_size;
 	unsigned long size;
 	void *buf;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index f7fb258..61867df 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -59,12 +59,8 @@
 	dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
 		    event->fork.ppid, event->fork.ptid);
 
-	if (thread) {
-		rb_erase(&thread->rb_node, &machine->threads);
-		if (machine->last_match == thread)
-			thread__zput(machine->last_match);
-		thread__put(thread);
-	}
+	if (thread)
+		machine__remove_thread(machine, thread);
 
 	return 0;
 }
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 5516236..ff866c4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -183,8 +183,18 @@
 {
 	memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
 
-	event->fork.ppid = ppid;
-	event->fork.ptid = ppid;
+	/*
+	 * for main thread set parent to ppid from status file. For other
+	 * threads set parent pid to main thread. ie., assume main thread
+	 * spawns all threads in a process
+	*/
+	if (tgid == pid) {
+		event->fork.ppid = ppid;
+		event->fork.ptid = ppid;
+	} else {
+		event->fork.ppid = tgid;
+		event->fork.ptid = tgid;
+	}
 	event->fork.pid  = tgid;
 	event->fork.tid  = pid;
 	event->fork.header.type = PERF_RECORD_FORK;
@@ -377,6 +387,7 @@
 	DIR *tasks;
 	struct dirent dirent, *next;
 	pid_t tgid, ppid;
+	int rc = 0;
 
 	/* special case: only send one comm event using passed in pid */
 	if (!full) {
@@ -404,38 +415,38 @@
 
 	while (!readdir_r(tasks, &dirent, &next) && next) {
 		char *end;
-		int rc = 0;
 		pid_t _pid;
 
 		_pid = strtol(dirent.d_name, &end, 10);
 		if (*end)
 			continue;
 
+		rc = -1;
 		if (perf_event__prepare_comm(comm_event, _pid, machine,
 					     &tgid, &ppid) != 0)
-			return -1;
+			break;
 
 		if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
 						ppid, process, machine) < 0)
-			return -1;
+			break;
 		/*
 		 * Send the prepared comm event
 		 */
 		if (process(tool, comm_event, &synth_sample, machine) != 0)
-			return -1;
+			break;
 
+		rc = 0;
 		if (_pid == pid) {
 			/* process the parent's maps too */
 			rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
 						process, machine, mmap_data);
+			if (rc)
+				break;
 		}
-
-		if (rc)
-			return rc;
 	}
 
 	closedir(tasks);
-	return 0;
+	return rc;
 }
 
 int perf_event__synthesize_thread_map(struct perf_tool *tool,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 76ef7ee..080be93 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -634,8 +634,8 @@
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
 	struct perf_mmap *md = &evlist->mmap[idx];
-	unsigned int head = perf_mmap__read_head(md);
-	unsigned int old = md->prev;
+	u64 head = perf_mmap__read_head(md);
+	u64 old = md->prev;
 	unsigned char *data = md->base + page_size;
 	union perf_event *event = NULL;
 
@@ -716,7 +716,7 @@
 	struct perf_mmap *md = &evlist->mmap[idx];
 
 	if (!evlist->overwrite) {
-		unsigned int old = md->prev;
+		u64 old = md->prev;
 
 		perf_mmap__write_tail(md, old);
 	}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index fb19c47..b5cce95 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -27,7 +27,7 @@
 	void		 *base;
 	int		 mask;
 	int		 refcnt;
-	unsigned int	 prev;
+	u64		 prev;
 	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
 };
 
@@ -189,16 +189,15 @@
 int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
 
-static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
+static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->base;
-	int head = ACCESS_ONCE(pc->data_head);
+	u64 head = ACCESS_ONCE(pc->data_head);
 	rmb();
 	return head;
 }
 
-static inline void perf_mmap__write_tail(struct perf_mmap *md,
-					 unsigned long tail)
+static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 {
 	struct perf_event_mmap_page *pc = md->base;
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index fff3b2a..918fd8a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2504,8 +2504,11 @@
 		if (read_attr(fd, header, &f_attr) < 0)
 			goto out_errno;
 
-		if (header->needs_swap)
+		if (header->needs_swap) {
+			f_attr.ids.size   = bswap_64(f_attr.ids.size);
+			f_attr.ids.offset = bswap_64(f_attr.ids.offset);
 			perf_event__attr_swap(&f_attr.attr);
+		}
 
 		tmp = lseek(fd, 0, SEEK_CUR);
 		evsel = perf_evsel__new(&f_attr.attr);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9c380a2..527e032 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -14,8 +14,6 @@
 #include "unwind.h"
 #include "linux/hash.h"
 
-static void machine__remove_thread(struct machine *machine, struct thread *th);
-
 static void dsos__init(struct dsos *dsos)
 {
 	INIT_LIST_HEAD(&dsos->head);
@@ -1256,7 +1254,7 @@
 	return 0;
 }
 
-static void machine__remove_thread(struct machine *machine, struct thread *th)
+void machine__remove_thread(struct machine *machine, struct thread *th)
 {
 	if (machine->last_match == th)
 		thread__zput(machine->last_match);
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index e2faf3b..6d64ced 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -120,6 +120,7 @@
 void machine__exit(struct machine *machine);
 void machine__delete_threads(struct machine *machine);
 void machine__delete(struct machine *machine);
+void machine__remove_thread(struct machine *machine, struct thread *th);
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 					   struct addr_location *al);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index b788517..30545ce 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1906,6 +1906,7 @@
 
 	free(pev->event);
 	free(pev->group);
+	free(pev->target);
 	clear_perf_probe_point(&pev->point);
 
 	for (i = 0; i < pev->nargs; i++) {
@@ -2654,7 +2655,7 @@
 };
 
 int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-			  int max_tevs, const char *target, bool force_add)
+			  int max_tevs, bool force_add)
 {
 	int i, j, ret;
 	struct __event_package *pkgs;
@@ -2678,7 +2679,7 @@
 		ret  = convert_to_probe_trace_events(pkgs[i].pev,
 						     &pkgs[i].tevs,
 						     max_tevs,
-						     target);
+						     pkgs[i].pev->target);
 		if (ret < 0)
 			goto end;
 		pkgs[i].ntevs = ret;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e01e994..d6b7834 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -73,7 +73,8 @@
 	char			*group;	/* Group name */
 	struct perf_probe_point	point;	/* Probe point */
 	int			nargs;	/* Number of arguments */
-	bool			uprobes;
+	bool			uprobes;	/* Uprobe event flag */
+	char			*target;	/* Target binary */
 	struct perf_probe_arg	*args;	/* Arguments */
 };
 
@@ -124,8 +125,7 @@
 extern const char *kernel_get_module_path(const char *module);
 
 extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-				 int max_probe_points, const char *module,
-				 bool force_add);
+				 int max_probe_points, bool force_add);
 extern int del_perf_probe_events(struct strlist *dellist);
 extern int show_perf_probe_events(void);
 extern int show_line_range(struct line_range *lr, const char *module,