perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info

Introduce a new option to record PERF_RECORD_NAMESPACES events emitted
by the kernel when fork, clone, setns or unshare are invoked. And update
perf-record documentation with the new option to record namespace
events.

Committer notes:

Combined it with a later patch to allow printing it via 'perf report -D'
and be able to test the feature introduced in this patch. Had to move
here also perf_ns__name(), that was introduced in another later patch.

Also used PRIu64 and PRIx64 to fix the build in some enfironments wrt:

  util/event.c:1129:39: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'long long unsigned int' [-Werror=format=]
     ret  += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx
                                         ^
Testing it:

  # perf record --namespaces -a
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 1.083 MB perf.data (423 samples) ]
  #
  # perf report -D
  <SNIP>
  3 2028902078892 0x115140 [0xa0]: PERF_RECORD_NAMESPACES 14783/14783 - nr_namespaces: 7
                [0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc,
                 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb]

  0x1151e0 [0x30]: event: 9
  .
  . ... raw event: size 48 bytes
  .  0000:  09 00 00 00 02 00 30 00 c4 71 82 68 0c 7f 00 00  ......0..q.h....
  .  0010:  a9 39 00 00 a9 39 00 00 94 28 fe 63 d8 01 00 00  .9...9...(.c....
  .  0020:  03 00 00 00 00 00 00 00 ce c4 02 00 00 00 00 00  ................
  <SNIP>
        NAMESPACES events:          1
  <SNIP>
  #

Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sargun Dhillon <sargun@sargun.me>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/148891930386.25309.18412039920746995488.stgit@hbathini.in.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5da376b..2ea5ee1 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -42,6 +42,7 @@
 libperf-y += session.o
 libperf-$(CONFIG_AUDIT) += syscalltbl.o
 libperf-y += ordered-events.o
+libperf-y += namespaces.o
 libperf-y += comm.o
 libperf-y += thread.o
 libperf-y += thread_map.o
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 4e6cbc9..89ece24 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1468,6 +1468,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
 			.lost            = perf_event__process_lost,
 			.tracing_data    = perf_event__process_tracing_data,
 			.build_id        = perf_event__process_build_id,
+			.namespaces      = perf_event__process_namespaces,
 			.ordered_events  = true,
 			.ordering_requires_timestamps = true,
 		},
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 4ea7ce7..fb52819 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -31,6 +31,7 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_LOST_SAMPLES]		= "LOST_SAMPLES",
 	[PERF_RECORD_SWITCH]			= "SWITCH",
 	[PERF_RECORD_SWITCH_CPU_WIDE]		= "SWITCH_CPU_WIDE",
+	[PERF_RECORD_NAMESPACES]		= "NAMESPACES",
 	[PERF_RECORD_HEADER_ATTR]		= "ATTR",
 	[PERF_RECORD_HEADER_EVENT_TYPE]		= "EVENT_TYPE",
 	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
@@ -49,6 +50,16 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_TIME_CONV]			= "TIME_CONV",
 };
 
+static const char *perf_ns__names[] = {
+	[NET_NS_INDEX]		= "net",
+	[UTS_NS_INDEX]		= "uts",
+	[IPC_NS_INDEX]		= "ipc",
+	[PID_NS_INDEX]		= "pid",
+	[USER_NS_INDEX]		= "user",
+	[MNT_NS_INDEX]		= "mnt",
+	[CGROUP_NS_INDEX]	= "cgroup",
+};
+
 const char *perf_event__name(unsigned int id)
 {
 	if (id >= ARRAY_SIZE(perf_event__names))
@@ -58,6 +69,13 @@ const char *perf_event__name(unsigned int id)
 	return perf_event__names[id];
 }
 
+static const char *perf_ns__name(unsigned int id)
+{
+	if (id >= ARRAY_SIZE(perf_ns__names))
+		return "UNKNOWN";
+	return perf_ns__names[id];
+}
+
 static int perf_tool__process_synth_event(struct perf_tool *tool,
 					  union perf_event *event,
 					  struct machine *machine,
@@ -1008,6 +1026,33 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
 	return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
 }
 
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
+{
+	size_t ret = 0;
+	struct perf_ns_link_info *ns_link_info;
+	u32 nr_namespaces, idx;
+
+	ns_link_info = event->namespaces.link_info;
+	nr_namespaces = event->namespaces.nr_namespaces;
+
+	ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
+		       event->namespaces.pid,
+		       event->namespaces.tid,
+		       nr_namespaces);
+
+	for (idx = 0; idx < nr_namespaces; idx++) {
+		if (idx && (idx % 4 == 0))
+			ret += fprintf(fp, "\n\t\t ");
+
+		ret  += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx,
+				perf_ns__name(idx), (u64)ns_link_info[idx].dev,
+				(u64)ns_link_info[idx].ino,
+				((idx + 1) != nr_namespaces) ? ", " : "]\n");
+	}
+
+	return ret;
+}
+
 int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
 			     struct perf_sample *sample,
@@ -1016,6 +1061,14 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
 	return machine__process_comm_event(machine, event, sample);
 }
 
+int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	return machine__process_namespaces_event(machine, event, sample);
+}
+
 int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
 			     struct perf_sample *sample,
@@ -1196,6 +1249,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 	case PERF_RECORD_MMAP:
 		ret += perf_event__fprintf_mmap(event, fp);
 		break;
+	case PERF_RECORD_NAMESPACES:
+		ret += perf_event__fprintf_namespaces(event, fp);
+		break;
 	case PERF_RECORD_MMAP2:
 		ret += perf_event__fprintf_mmap2(event, fp);
 		break;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index c735c53..b39ff79 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -39,6 +39,13 @@ struct comm_event {
 	char comm[16];
 };
 
+struct namespaces_event {
+	struct perf_event_header header;
+	u32 pid, tid;
+	u64 nr_namespaces;
+	struct perf_ns_link_info link_info[];
+};
+
 struct fork_event {
 	struct perf_event_header header;
 	u32 pid, ppid;
@@ -485,6 +492,7 @@ union perf_event {
 	struct mmap_event		mmap;
 	struct mmap2_event		mmap2;
 	struct comm_event		comm;
+	struct namespaces_event		namespaces;
 	struct fork_event		fork;
 	struct lost_event		lost;
 	struct lost_samples_event	lost_samples;
@@ -587,6 +595,10 @@ int perf_event__process_switch(struct perf_tool *tool,
 			       union perf_event *event,
 			       struct perf_sample *sample,
 			       struct machine *machine);
+int perf_event__process_namespaces(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine);
 int perf_event__process_mmap(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_sample *sample,
@@ -653,6 +665,7 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
 u64 kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ac59710..175dc23 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -932,6 +932,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 	attr->mmap2 = track && !perf_missing_features.mmap2;
 	attr->comm  = track;
 
+	if (opts->record_namespaces)
+		attr->namespaces  = track;
+
 	if (opts->record_switch_events)
 		attr->context_switch = track;
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b9974fe..dfc6004 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -13,6 +13,7 @@
 #include <symbol/kallsyms.h>
 #include "unwind.h"
 #include "linux/hash.h"
+#include "asm/bug.h"
 
 static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
 
@@ -501,6 +502,37 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
 	return err;
 }
 
+int machine__process_namespaces_event(struct machine *machine __maybe_unused,
+				      union perf_event *event,
+				      struct perf_sample *sample __maybe_unused)
+{
+	struct thread *thread = machine__findnew_thread(machine,
+							event->namespaces.pid,
+							event->namespaces.tid);
+	int err = 0;
+
+	WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES,
+		  "\nWARNING: kernel seems to support more namespaces than perf"
+		  " tool.\nTry updating the perf tool..\n\n");
+
+	WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES,
+		  "\nWARNING: perf tool seems to support more namespaces than"
+		  " the kernel.\nTry updating the kernel..\n\n");
+
+	if (dump_trace)
+		perf_event__fprintf_namespaces(event, stdout);
+
+	if (thread == NULL ||
+	    thread__set_namespaces(thread, sample->time, &event->namespaces)) {
+		dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
+		err = -1;
+	}
+
+	thread__put(thread);
+
+	return err;
+}
+
 int machine__process_lost_event(struct machine *machine __maybe_unused,
 				union perf_event *event, struct perf_sample *sample __maybe_unused)
 {
@@ -1538,6 +1570,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
 		ret = machine__process_comm_event(machine, event, sample); break;
 	case PERF_RECORD_MMAP:
 		ret = machine__process_mmap_event(machine, event, sample); break;
+	case PERF_RECORD_NAMESPACES:
+		ret = machine__process_namespaces_event(machine, event, sample); break;
 	case PERF_RECORD_MMAP2:
 		ret = machine__process_mmap2_event(machine, event, sample); break;
 	case PERF_RECORD_FORK:
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index a283050..3cdb134 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -97,6 +97,9 @@ int machine__process_itrace_start_event(struct machine *machine,
 					union perf_event *event);
 int machine__process_switch_event(struct machine *machine,
 				  union perf_event *event);
+int machine__process_namespaces_event(struct machine *machine,
+				      union perf_event *event,
+				      struct perf_sample *sample);
 int machine__process_mmap_event(struct machine *machine, union perf_event *event,
 				struct perf_sample *sample);
 int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
new file mode 100644
index 0000000..2de8da6
--- /dev/null
+++ b/tools/perf/util/namespaces.c
@@ -0,0 +1,36 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#include "namespaces.h"
+#include "util.h"
+#include "event.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+struct namespaces *namespaces__new(struct namespaces_event *event)
+{
+	struct namespaces *namespaces;
+	u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) *
+			      sizeof(struct perf_ns_link_info));
+
+	namespaces = zalloc(sizeof(struct namespaces) + link_info_size);
+	if (!namespaces)
+		return NULL;
+
+	namespaces->end_time = -1;
+
+	if (event)
+		memcpy(namespaces->link_info, event->link_info, link_info_size);
+
+	return namespaces;
+}
+
+void namespaces__free(struct namespaces *namespaces)
+{
+	free(namespaces);
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
new file mode 100644
index 0000000..468f1e9
--- /dev/null
+++ b/tools/perf/util/namespaces.h
@@ -0,0 +1,26 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#ifndef __PERF_NAMESPACES_H
+#define __PERF_NAMESPACES_H
+
+#include "../perf.h"
+#include <linux/list.h>
+
+struct namespaces_event;
+
+struct namespaces {
+	struct list_head list;
+	u64 end_time;
+	struct perf_ns_link_info link_info[];
+};
+
+struct namespaces *namespaces__new(struct namespaces_event *event);
+void namespaces__free(struct namespaces *namespaces);
+
+#endif  /* __PERF_NAMESPACES_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1dd617d..ae42e74 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1239,6 +1239,8 @@ static int machines__deliver_event(struct machines *machines,
 		return tool->mmap2(tool, event, sample, machine);
 	case PERF_RECORD_COMM:
 		return tool->comm(tool, event, sample, machine);
+	case PERF_RECORD_NAMESPACES:
+		return tool->namespaces(tool, event, sample, machine);
 	case PERF_RECORD_FORK:
 		return tool->fork(tool, event, sample, machine);
 	case PERF_RECORD_EXIT:
@@ -1494,6 +1496,11 @@ int perf_session__register_idle_thread(struct perf_session *session)
 		err = -1;
 	}
 
+	if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) {
+		pr_err("problem inserting idle task.\n");
+		err = -1;
+	}
+
 	/* machine__findnew_thread() got the thread, so put it */
 	thread__put(thread);
 	return err;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 74e79d2..dcdb87a 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,6 +7,7 @@
 #include "thread-stack.h"
 #include "util.h"
 #include "debug.h"
+#include "namespaces.h"
 #include "comm.h"
 #include "unwind.h"
 
@@ -40,6 +41,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
 		thread->tid = tid;
 		thread->ppid = -1;
 		thread->cpu = -1;
+		INIT_LIST_HEAD(&thread->namespaces_list);
 		INIT_LIST_HEAD(&thread->comm_list);
 
 		comm_str = malloc(32);
@@ -66,7 +68,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
 
 void thread__delete(struct thread *thread)
 {
-	struct comm *comm, *tmp;
+	struct namespaces *namespaces, *tmp_namespaces;
+	struct comm *comm, *tmp_comm;
 
 	BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
 
@@ -76,7 +79,12 @@ void thread__delete(struct thread *thread)
 		map_groups__put(thread->mg);
 		thread->mg = NULL;
 	}
-	list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) {
+	list_for_each_entry_safe(namespaces, tmp_namespaces,
+				 &thread->namespaces_list, list) {
+		list_del(&namespaces->list);
+		namespaces__free(namespaces);
+	}
+	list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
 		list_del(&comm->list);
 		comm__free(comm);
 	}
@@ -104,6 +112,38 @@ void thread__put(struct thread *thread)
 	}
 }
 
+struct namespaces *thread__namespaces(const struct thread *thread)
+{
+	if (list_empty(&thread->namespaces_list))
+		return NULL;
+
+	return list_first_entry(&thread->namespaces_list, struct namespaces, list);
+}
+
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+			   struct namespaces_event *event)
+{
+	struct namespaces *new, *curr = thread__namespaces(thread);
+
+	new = namespaces__new(event);
+	if (!new)
+		return -ENOMEM;
+
+	list_add(&new->list, &thread->namespaces_list);
+
+	if (timestamp && curr) {
+		/*
+		 * setns syscall must have changed few or all the namespaces
+		 * of this thread. Update end time for the namespaces
+		 * previously used.
+		 */
+		curr = list_next_entry(new, list);
+		curr->end_time = timestamp;
+	}
+
+	return 0;
+}
+
 struct comm *thread__comm(const struct thread *thread)
 {
 	if (list_empty(&thread->comm_list))
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index e571885..4eb849e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -28,6 +28,7 @@ struct thread {
 	bool			comm_set;
 	int			comm_len;
 	bool			dead; /* if set thread has exited */
+	struct list_head	namespaces_list;
 	struct list_head	comm_list;
 	u64			db_id;
 
@@ -40,6 +41,7 @@ struct thread {
 };
 
 struct machine;
+struct namespaces;
 struct comm;
 
 struct thread *thread__new(pid_t pid, pid_t tid);
@@ -62,6 +64,10 @@ static inline void thread__exited(struct thread *thread)
 	thread->dead = true;
 }
 
+struct namespaces *thread__namespaces(const struct thread *thread);
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+			   struct namespaces_event *event);
+
 int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp,
 		       bool exec);
 static inline int thread__set_comm(struct thread *thread, const char *comm,
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index ac2590a..829471a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -40,6 +40,7 @@ struct perf_tool {
 	event_op	mmap,
 			mmap2,
 			comm,
+			namespaces,
 			fork,
 			exit,
 			lost,
@@ -66,6 +67,7 @@ struct perf_tool {
 	event_op3	auxtrace;
 	bool		ordered_events;
 	bool		ordering_requires_timestamps;
+	bool		namespace_events;
 };
 
 #endif /* __PERF_TOOL_H */