Merge branches 'tracing/branch-tracer', 'tracing/ftrace', 'tracing/function-graph-tracer', 'tracing/markers', 'tracing/powerpc', 'tracing/stack-tracer' and 'tracing/tracepoints' into tracing/core
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index de05042..803b131 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -1251,7 +1251,11 @@
<match>*<match> will not work.
- # echo hrtimer_* > /debug/tracing/set_ftrace_filter
+Note: It is better to use quotes to enclose the wild cards, otherwise
+ the shell may expand the parameters into names of files in the local
+ directory.
+
+ # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter
Produces:
@@ -1306,7 +1310,7 @@
# echo sys_nanosleep > /debug/tracing/set_ftrace_filter
# cat /debug/tracing/set_ftrace_filter
sys_nanosleep
- # echo hrtimer_* >> /debug/tracing/set_ftrace_filter
+ # echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter
# cat /debug/tracing/set_ftrace_filter
hrtimer_run_queues
hrtimer_run_pending
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index 6d275e4..d2b3d0e 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -51,11 +51,16 @@
activated by calling marker_arm(). Marker deactivation can be done by calling
marker_disarm() as many times as marker_arm() has been called. Removing a probe
is done through marker_probe_unregister(); it will disarm the probe.
-marker_synchronize_unregister() must be called before the end of the module exit
-function to make sure there is no caller left using the probe. This, and the
-fact that preemption is disabled around the probe call, make sure that probe
-removal and module unload are safe. See the "Probe example" section below for a
-sample probe module.
+
+marker_synchronize_unregister() must be called between probe unregistration and
+the first occurrence of
+- the end of module exit function,
+ to make sure there is no caller left using the probe;
+- the free of any resource used by the probes,
+ to make sure the probes wont be accessing invalid data.
+This, and the fact that preemption is disabled around the probe call, make sure
+that probe removal and module unload are safe. See the "Probe example" section
+below for a sample probe module.
The marker mechanism supports inserting multiple instances of the same marker.
Markers can be put in inline functions, inlined static functions, and
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
index 2d42241..6f0a044 100644
--- a/Documentation/tracepoints.txt
+++ b/Documentation/tracepoints.txt
@@ -45,7 +45,7 @@
#include <linux/tracepoint.h>
DECLARE_TRACE(subsys_eventname,
- TPPTOTO(int firstarg, struct task_struct *p),
+ TPPROTO(int firstarg, struct task_struct *p),
TPARGS(firstarg, p));
In subsys/file.c (where the tracing statement must be added) :
@@ -66,7 +66,7 @@
- subsys is the name of your subsystem.
- eventname is the name of the event to trace.
-- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the
+- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the
function called by this tracepoint.
- TPARGS(firstarg, p) are the parameters names, same as found in the
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 92673b4..d17edb4 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -17,6 +17,7 @@
CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
ifdef CONFIG_DYNAMIC_FTRACE
# dynamic ftrace setup.
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 7ecc0d1..6f7eb7e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1162,39 +1162,17 @@
#ifdef CONFIG_DYNAMIC_FTRACE
_GLOBAL(mcount)
_GLOBAL(_mcount)
- stwu r1,-48(r1)
- stw r3, 12(r1)
- stw r4, 16(r1)
- stw r5, 20(r1)
- stw r6, 24(r1)
- mflr r3
- stw r7, 28(r1)
- mfcr r5
- stw r8, 32(r1)
- stw r9, 36(r1)
- stw r10,40(r1)
- stw r3, 44(r1)
- stw r5, 8(r1)
- subi r3, r3, MCOUNT_INSN_SIZE
- .globl mcount_call
-mcount_call:
- bl ftrace_stub
- nop
- lwz r6, 8(r1)
- lwz r0, 44(r1)
- lwz r3, 12(r1)
+ /*
+ * It is required that _mcount on PPC32 must preserve the
+ * link register. But we have r0 to play with. We use r0
+ * to push the return address back to the caller of mcount
+ * into the ctr register, restore the link register and
+ * then jump back using the ctr register.
+ */
+ mflr r0
mtctr r0
- lwz r4, 16(r1)
- mtcr r6
- lwz r5, 20(r1)
- lwz r6, 24(r1)
- lwz r0, 52(r1)
- lwz r7, 28(r1)
- lwz r8, 32(r1)
+ lwz r0, 4(r1)
mtlr r0
- lwz r9, 36(r1)
- lwz r10,40(r1)
- addi r1, r1, 48
bctr
_GLOBAL(ftrace_caller)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index e6d5284..b00982e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -888,18 +888,6 @@
#ifdef CONFIG_DYNAMIC_FTRACE
_GLOBAL(mcount)
_GLOBAL(_mcount)
- /* Taken from output of objdump from lib64/glibc */
- mflr r3
- stdu r1, -112(r1)
- std r3, 128(r1)
- subi r3, r3, MCOUNT_INSN_SIZE
- .globl mcount_call
-mcount_call:
- bl ftrace_stub
- nop
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
blr
_GLOBAL(ftrace_caller)
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 3271cd6..5355244 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -114,19 +114,9 @@
*/
static int test_24bit_addr(unsigned long ip, unsigned long addr)
{
- long diff;
- /*
- * Can we get to addr from ip in 24 bits?
- * (26 really, since we mulitply by 4 for 4 byte alignment)
- */
- diff = addr - ip;
-
- /*
- * Return true if diff is less than 1 << 25
- * and greater than -1 << 26.
- */
- return (diff < (1 << 25)) && (diff > (-1 << 26));
+ /* use the create_branch to verify that this offset can be branched */
+ return create_branch((unsigned int *)ip, addr, 0);
}
static int is_bl_op(unsigned int op)
@@ -134,11 +124,6 @@
return (op & 0xfc000003) == 0x48000001;
}
-static int test_offset(unsigned long offset)
-{
- return (offset + 0x2000000 > 0x3ffffff) || ((offset & 3) != 0);
-}
-
static unsigned long find_bl_target(unsigned long ip, unsigned int op)
{
static int offset;
@@ -151,37 +136,30 @@
return ip + (long)offset;
}
-static unsigned int branch_offset(unsigned long offset)
-{
- /* return "bl ip+offset" */
- return 0x48000001 | (offset & 0x03fffffc);
-}
-
#ifdef CONFIG_PPC64
static int
__ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned char replaced[MCOUNT_INSN_SIZE * 2];
- unsigned int *op = (unsigned *)&replaced;
- unsigned char jmp[8];
- unsigned long *ptr = (unsigned long *)&jmp;
+ unsigned int op;
+ unsigned int jmp[5];
+ unsigned long ptr;
unsigned long ip = rec->ip;
unsigned long tramp;
int offset;
/* read where this goes */
- if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
return -EFAULT;
/* Make sure that that this is still a 24bit jump */
- if (!is_bl_op(*op)) {
- printk(KERN_ERR "Not expected bl: opcode is %x\n", *op);
+ if (!is_bl_op(op)) {
+ printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
return -EINVAL;
}
/* lets find where the pointer goes */
- tramp = find_bl_target(ip, *op);
+ tramp = find_bl_target(ip, op);
/*
* On PPC64 the trampoline looks like:
@@ -200,19 +178,25 @@
DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
/* Find where the trampoline jumps to */
- if (probe_kernel_read(jmp, (void *)tramp, 8)) {
+ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
printk(KERN_ERR "Failed to read %lx\n", tramp);
return -EFAULT;
}
- DEBUGP(" %08x %08x",
- (unsigned)(*ptr >> 32),
- (unsigned)*ptr);
+ DEBUGP(" %08x %08x", jmp[0], jmp[1]);
- offset = (unsigned)jmp[2] << 24 |
- (unsigned)jmp[3] << 16 |
- (unsigned)jmp[6] << 8 |
- (unsigned)jmp[7];
+ /* verify that this is what we expect it to be */
+ if (((jmp[0] & 0xffff0000) != 0x3d820000) ||
+ ((jmp[1] & 0xffff0000) != 0x398c0000) ||
+ (jmp[2] != 0xf8410028) ||
+ (jmp[3] != 0xe96c0020) ||
+ (jmp[4] != 0xe84c0028)) {
+ printk(KERN_ERR "Not a trampoline\n");
+ return -EINVAL;
+ }
+
+ offset = (unsigned)((unsigned short)jmp[0]) << 16 |
+ (unsigned)((unsigned short)jmp[1]);
DEBUGP(" %x ", offset);
@@ -225,13 +209,13 @@
return -EFAULT;
}
- DEBUGP(" %08x %08x\n",
- (unsigned)(*ptr >> 32),
- (unsigned)*ptr);
+ DEBUGP(" %08x %08x\n", jmp[0], jmp[1]);
+
+ ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
/* This should match what was called */
- if (*ptr != GET_ADDR(addr)) {
- printk(KERN_ERR "addr does not match %lx\n", *ptr);
+ if (ptr != GET_ADDR(addr)) {
+ printk(KERN_ERR "addr does not match %lx\n", ptr);
return -EINVAL;
}
@@ -240,11 +224,11 @@
* 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1)
* This needs to be turned to a nop too.
*/
- if (probe_kernel_read(replaced, (void *)(ip+4), MCOUNT_INSN_SIZE))
+ if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE))
return -EFAULT;
- if (*op != 0xe8410028) {
- printk(KERN_ERR "Next line is not ld! (%08x)\n", *op);
+ if (op != 0xe8410028) {
+ printk(KERN_ERR "Next line is not ld! (%08x)\n", op);
return -EINVAL;
}
@@ -261,11 +245,14 @@
* ld r2,40(r1)
* 1:
*/
- op[0] = 0x48000008; /* b +8 */
+ op = 0x48000008; /* b +8 */
- if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
+ if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
return -EPERM;
+
+ flush_icache_range(ip, ip + 8);
+
return 0;
}
@@ -274,46 +261,52 @@
__ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned char replaced[MCOUNT_INSN_SIZE];
- unsigned int *op = (unsigned *)&replaced;
- unsigned char jmp[8];
- unsigned int *ptr = (unsigned int *)&jmp;
+ unsigned int op;
+ unsigned int jmp[4];
unsigned long ip = rec->ip;
unsigned long tramp;
- int offset;
- if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
/* Make sure that that this is still a 24bit jump */
- if (!is_bl_op(*op)) {
- printk(KERN_ERR "Not expected bl: opcode is %x\n", *op);
+ if (!is_bl_op(op)) {
+ printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
return -EINVAL;
}
/* lets find where the pointer goes */
- tramp = find_bl_target(ip, *op);
+ tramp = find_bl_target(ip, op);
/*
* On PPC32 the trampoline looks like:
- * lis r11,sym@ha
- * addi r11,r11,sym@l
- * mtctr r11
- * bctr
+ * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha
+ * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l
+ * 0x7d, 0x69, 0x03, 0xa6 mtctr r11
+ * 0x4e, 0x80, 0x04, 0x20 bctr
*/
DEBUGP("ip:%lx jumps to %lx", ip, tramp);
/* Find where the trampoline jumps to */
- if (probe_kernel_read(jmp, (void *)tramp, 8)) {
+ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
printk(KERN_ERR "Failed to read %lx\n", tramp);
return -EFAULT;
}
- DEBUGP(" %08x %08x ", ptr[0], ptr[1]);
+ DEBUGP(" %08x %08x ", jmp[0], jmp[1]);
- tramp = (ptr[1] & 0xffff) |
- ((ptr[0] & 0xffff) << 16);
+ /* verify that this is what we expect it to be */
+ if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
+ ((jmp[1] & 0xffff0000) != 0x396b0000) ||
+ (jmp[2] != 0x7d6903a6) ||
+ (jmp[3] != 0x4e800420)) {
+ printk(KERN_ERR "Not a trampoline\n");
+ return -EINVAL;
+ }
+
+ tramp = (jmp[1] & 0xffff) |
+ ((jmp[0] & 0xffff) << 16);
if (tramp & 0x8000)
tramp -= 0x10000;
@@ -326,11 +319,13 @@
return -EINVAL;
}
- op[0] = PPC_NOP_INSTR;
+ op = PPC_NOP_INSTR;
- if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
+ if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
return -EPERM;
+ flush_icache_range(ip, ip + 8);
+
return 0;
}
#endif /* PPC64 */
@@ -384,13 +379,11 @@
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned char replaced[MCOUNT_INSN_SIZE * 2];
- unsigned int *op = (unsigned *)&replaced;
+ unsigned int op[2];
unsigned long ip = rec->ip;
- unsigned long offset;
/* read where this goes */
- if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE * 2))
+ if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2))
return -EFAULT;
/*
@@ -409,43 +402,40 @@
return -EINVAL;
}
- /* now calculate a jump to the ftrace caller trampoline */
- offset = rec->arch.mod->arch.tramp - ip;
-
- if (test_offset(offset)) {
- printk(KERN_ERR "REL24 %li out of range!\n",
- (long int)offset);
+ /* create the branch to the trampoline */
+ op[0] = create_branch((unsigned int *)ip,
+ rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+ if (!op[0]) {
+ printk(KERN_ERR "REL24 out of range!\n");
return -EINVAL;
}
- /* Set to "bl addr" */
- op[0] = branch_offset(offset);
/* ld r2,40(r1) */
op[1] = 0xe8410028;
DEBUGP("write to %lx\n", rec->ip);
- if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE * 2))
+ if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2))
return -EPERM;
+ flush_icache_range(ip, ip + 8);
+
return 0;
}
#else
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned char replaced[MCOUNT_INSN_SIZE];
- unsigned int *op = (unsigned *)&replaced;
+ unsigned int op;
unsigned long ip = rec->ip;
- unsigned long offset;
/* read where this goes */
- if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
/* It should be pointing to a nop */
- if (op[0] != PPC_NOP_INSTR) {
- printk(KERN_ERR "Expected NOP but have %x\n", op[0]);
+ if (op != PPC_NOP_INSTR) {
+ printk(KERN_ERR "Expected NOP but have %x\n", op);
return -EINVAL;
}
@@ -455,23 +445,21 @@
return -EINVAL;
}
- /* now calculate a jump to the ftrace caller trampoline */
- offset = rec->arch.mod->arch.tramp - ip;
-
- if (test_offset(offset)) {
- printk(KERN_ERR "REL24 %li out of range!\n",
- (long int)offset);
+ /* create the branch to the trampoline */
+ op = create_branch((unsigned int *)ip,
+ rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+ if (!op) {
+ printk(KERN_ERR "REL24 out of range!\n");
return -EINVAL;
}
- /* Set to "bl addr" */
- op[0] = branch_offset(offset);
-
DEBUGP("write to %lx\n", rec->ip);
- if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
+ if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
return -EPERM;
+ flush_icache_range(ip, ip + 8);
+
return 0;
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index d69912c..8db3527 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -6,6 +6,9 @@
EXTRA_CFLAGS += -mno-minimal-toc
endif
+CFLAGS_REMOVE_code-patching.o = -pg
+CFLAGS_REMOVE_feature-fixups.o = -pg
+
obj-y := string.o alloc.o \
checksum_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
diff --git a/fs/seq_file.c b/fs/seq_file.c
index f03220d7..16c2115 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -387,7 +387,7 @@
}
return NULL;
}
-EXPORT_SYMBOL_GPL(mangle_path);
+EXPORT_SYMBOL(mangle_path);
/*
* return the absolute path of 'dentry' residing in mount 'mnt'.
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 34c14bc..b85e74c 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -211,8 +211,10 @@
/*
* marker_synchronize_unregister must be called between the last marker probe
- * unregistration and the end of module exit to make sure there is no caller
- * executing a probe when it is freed.
+ * unregistration and the first one of
+ * - the end of module exit function
+ * - the free of any resource used by the probes
+ * to ensure the code and data are valid for any possibly running probes.
*/
#define marker_synchronize_unregister() synchronize_sched()
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index cbf8b09..08b536a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -786,7 +786,6 @@
#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
struct ftrace_iterator {
- loff_t pos;
struct ftrace_page *pg;
unsigned idx;
unsigned flags;
@@ -811,6 +810,8 @@
iter->pg = iter->pg->next;
iter->idx = 0;
goto retry;
+ } else {
+ iter->idx = -1;
}
} else {
rec = &iter->pg->records[iter->idx++];
@@ -833,8 +834,6 @@
}
spin_unlock(&ftrace_lock);
- iter->pos = *pos;
-
return rec;
}
@@ -842,13 +841,15 @@
{
struct ftrace_iterator *iter = m->private;
void *p = NULL;
- loff_t l = -1;
- if (*pos > iter->pos)
- *pos = iter->pos;
+ if (*pos > 0) {
+ if (iter->idx < 0)
+ return p;
+ (*pos)--;
+ iter->idx--;
+ }
- l = *pos;
- p = t_next(m, p, &l);
+ p = t_next(m, p, pos);
return p;
}
@@ -859,21 +860,15 @@
static int t_show(struct seq_file *m, void *v)
{
- struct ftrace_iterator *iter = m->private;
struct dyn_ftrace *rec = v;
char str[KSYM_SYMBOL_LEN];
- int ret = 0;
if (!rec)
return 0;
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
- ret = seq_printf(m, "%s\n", str);
- if (ret < 0) {
- iter->pos--;
- iter->idx--;
- }
+ seq_printf(m, "%s\n", str);
return 0;
}
@@ -899,7 +894,6 @@
return -ENOMEM;
iter->pg = ftrace_pages_start;
- iter->pos = 0;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
@@ -986,7 +980,6 @@
if (file->f_mode & FMODE_READ) {
iter->pg = ftrace_pages_start;
- iter->pos = 0;
iter->flags = enable ? FTRACE_ITER_FILTER :
FTRACE_ITER_NOTRACE;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5811e0a5f..91887a2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -983,6 +983,7 @@
struct trace_array_cpu *data,
unsigned long flags, int pc)
{
+#ifdef CONFIG_STACKTRACE
struct ring_buffer_event *event;
struct userstack_entry *entry;
struct stack_trace trace;
@@ -1008,6 +1009,7 @@
save_stack_trace_user(&trace);
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+#endif
}
void __trace_userstack(struct trace_array *tr,
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 877ee88..bc97275 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -6,6 +6,7 @@
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
+#include <linux/irqflags.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/module.h>
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d31d695..894b50b 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,15 +15,24 @@
#define TRACE_GRAPH_INDENT 2
+/* Flag options */
#define TRACE_GRAPH_PRINT_OVERRUN 0x1
+#define TRACE_GRAPH_PRINT_CPU 0x2
+#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
+
static struct tracer_opt trace_opts[] = {
- /* Display overruns or not */
- { TRACER_OPT(overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+ /* Display overruns ? */
+ { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+ /* Display CPU ? */
+ { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
+ /* Display Overhead ? */
+ { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
{ } /* Empty entry */
};
static struct tracer_flags tracer_flags = {
- .val = 0, /* Don't display overruns by default */
+ /* Don't display overruns by default */
+ .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
.opts = trace_opts
};
@@ -52,37 +61,208 @@
unregister_ftrace_graph();
}
+static inline int log10_cpu(int nb)
+{
+ if (nb / 100)
+ return 3;
+ if (nb / 10)
+ return 2;
+ return 1;
+}
+
+static enum print_line_t
+print_graph_cpu(struct trace_seq *s, int cpu)
+{
+ int i;
+ int ret;
+ int log10_this = log10_cpu(cpu);
+ int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
+
+
+ /*
+ * Start with a space character - to make it stand out
+ * to the right a bit when trace output is pasted into
+ * email:
+ */
+ ret = trace_seq_printf(s, " ");
+
+ /*
+ * Tricky - we space the CPU field according to the max
+ * number of online CPUs. On a 2-cpu system it would take
+ * a maximum of 1 digit - on a 128 cpu system it would
+ * take up to 3 digits:
+ */
+ for (i = 0; i < log10_all - log10_this; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ ret = trace_seq_printf(s, "%d) ", cpu);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+
/* If the pid changed since the last trace, output this event */
static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
{
- char *comm;
+ char *comm, *prev_comm;
+ pid_t prev_pid;
+ int ret;
if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
return 1;
+ prev_pid = last_pid[cpu];
last_pid[cpu] = pid;
- comm = trace_find_cmdline(pid);
- return trace_seq_printf(s, "\nCPU[%03d]"
- " ------------8<---------- thread %s-%d"
- " ------------8<----------\n\n",
- cpu, comm, pid);
+ comm = trace_find_cmdline(pid);
+ prev_comm = trace_find_cmdline(prev_pid);
+
+/*
+ * Context-switch trace line:
+
+ ------------------------------------------
+ | 1) migration/0--1 => sshd-1755
+ ------------------------------------------
+
+ */
+ ret = trace_seq_printf(s,
+ " ------------------------------------------\n");
+ ret += trace_seq_printf(s, " | %d) %s-%d => %s-%d\n",
+ cpu, prev_comm, prev_pid, comm, pid);
+ ret += trace_seq_printf(s,
+ " ------------------------------------------\n\n");
+ return ret;
}
-static enum print_line_t
-print_graph_entry(struct ftrace_graph_ent *call, struct trace_seq *s,
- struct trace_entry *ent, int cpu)
+static bool
+trace_branch_is_leaf(struct trace_iterator *iter,
+ struct ftrace_graph_ent_entry *curr)
{
- int i;
+ struct ring_buffer_iter *ring_iter;
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ret_entry *next;
+
+ ring_iter = iter->buffer_iter[iter->cpu];
+
+ if (!ring_iter)
+ return false;
+
+ event = ring_buffer_iter_peek(ring_iter, NULL);
+
+ if (!event)
+ return false;
+
+ next = ring_buffer_event_data(event);
+
+ if (next->ent.type != TRACE_GRAPH_RET)
+ return false;
+
+ if (curr->ent.pid != next->ent.pid ||
+ curr->graph_ent.func != next->ret.func)
+ return false;
+
+ return true;
+}
+
+
+static inline int
+print_graph_duration(unsigned long long duration, struct trace_seq *s)
+{
+ unsigned long nsecs_rem = do_div(duration, 1000);
+ return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem);
+}
+
+/* Signal a overhead of time execution to the output */
+static int
+print_graph_overhead(unsigned long long duration, struct trace_seq *s)
+{
+ /* Duration exceeded 100 msecs */
+ if (duration > 100000ULL)
+ return trace_seq_printf(s, "! ");
+
+ /* Duration exceeded 10 msecs */
+ if (duration > 10000ULL)
+ return trace_seq_printf(s, "+ ");
+
+ return trace_seq_printf(s, " ");
+}
+
+/* Case of a leaf function on its call entry */
+static enum print_line_t
+print_graph_entry_leaf(struct trace_iterator *iter,
+ struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
+{
+ struct ftrace_graph_ret_entry *ret_entry;
+ struct ftrace_graph_ret *graph_ret;
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ent *call;
+ unsigned long long duration;
int ret;
+ int i;
- if (!verif_pid(s, ent->pid, cpu))
- return TRACE_TYPE_PARTIAL_LINE;
+ event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+ ret_entry = ring_buffer_event_data(event);
+ graph_ret = &ret_entry->ret;
+ call = &entry->graph_ent;
+ duration = graph_ret->rettime - graph_ret->calltime;
- ret = trace_seq_printf(s, "CPU[%03d] ", cpu);
+ /* Must not exceed 8 characters: 9999.999 us */
+ if (duration > 10000000ULL)
+ duration = 9999999ULL;
+
+ /* Overhead */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Duration */
+ ret = print_graph_duration(duration, s);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
+ /* Function */
+ for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ ret = seq_print_ip_sym(s, call->func, 0);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ ret = trace_seq_printf(s, "();\n");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
+ struct trace_seq *s)
+{
+ int i;
+ int ret;
+ struct ftrace_graph_ent *call = &entry->graph_ent;
+
+ /* No overhead */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* No time */
+ ret = trace_seq_printf(s, " | ");
+
+ /* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
ret = trace_seq_printf(s, " ");
if (!ret)
@@ -96,37 +276,82 @@
ret = trace_seq_printf(s, "() {\n");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
+
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
+print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
+ struct trace_iterator *iter, int cpu)
+{
+ int ret;
+ struct trace_entry *ent = iter->ent;
+
+ /* Pid */
+ if (!verif_pid(s, ent->pid, cpu))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Cpu */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (trace_branch_is_leaf(iter, field))
+ return print_graph_entry_leaf(iter, field, s);
+ else
+ return print_graph_entry_nested(field, s);
+
+}
+
+static enum print_line_t
print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
struct trace_entry *ent, int cpu)
{
int i;
int ret;
+ unsigned long long duration = trace->rettime - trace->calltime;
+ /* Must not exceed 8 characters: xxxx.yyy us */
+ if (duration > 10000000ULL)
+ duration = 9999999ULL;
+
+ /* Pid */
if (!verif_pid(s, ent->pid, cpu))
return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_printf(s, "CPU[%03d] ", cpu);
+ /* Cpu */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Overhead */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Duration */
+ ret = print_graph_duration(duration, s);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
+ /* Closing brace */
for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
ret = trace_seq_printf(s, " ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
}
- ret = trace_seq_printf(s, "} ");
+ ret = trace_seq_printf(s, "}\n");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_printf(s, "%llu\n", trace->rettime - trace->calltime);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
+ /* Overrun */
if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
ret = trace_seq_printf(s, " (Overruns: %lu)\n",
trace->overrun);
@@ -146,7 +371,7 @@
case TRACE_GRAPH_ENT: {
struct ftrace_graph_ent_entry *field;
trace_assign_type(field, entry);
- return print_graph_entry(&field->graph_ent, s, entry,
+ return print_graph_entry(field, s, iter,
iter->cpu);
}
case TRACE_GRAPH_RET: {
@@ -160,7 +385,7 @@
}
static struct tracer graph_trace __read_mostly = {
- .name = "function-graph",
+ .name = "function_graph",
.init = graph_trace_init,
.reset = graph_trace_reset,
.print_line = print_graph_function,