Fixed bug with labels, added support for tuples in hash

commit: cc27edfdf89f646eab14b517a59e0d74af24462a [log] [tgz]
author: Sasha Goldshtein <goldshtn@gmail.com> Sun Feb 14 03:49:01 2016 -0800
committer: Sasha Goldshtein <goldshtn@gmail.com> Sun Feb 14 03:49:01 2016 -0800
tree: 91b9819a2d3212e4d481d4316e97bc81bbc3f2ba
parent: 7983d6b61f83275471ee8fc4f2162facf4b36d91 [diff] [blame]
diff --git a/tools/argdist.py b/tools/argdist.py
index 76e68fd..fd53cf3 100755
--- a/tools/argdist.py
+++ b/tools/argdist.py

@@ -25,8 +25,8 @@
 {
         PREFIX
         PID_FILTER
-        KEY_EXPR
         if (!(FILTER)) return 0;
+        KEY_EXPR
         COLLECT
         return 0;
 }
@@ -92,11 +92,13 @@
                 # when entering the function.
                 self.args_to_probe = set()
                 regex = r"\$entry\((\w+)\)"
-                for arg in re.finditer(regex, self.expr):
-                        self.args_to_probe.add(arg.group(1))
+                for expr in self.exprs:
+                        for arg in re.finditer(regex, expr):
+                                self.args_to_probe.add(arg.group(1))
                 for arg in re.finditer(regex, self.filter):
                         self.args_to_probe.add(arg.group(1))
-                if "$latency" in self.expr or "$latency" in self.filter:
+                if any(map(lambda expr: "$latency" in expr, self.exprs)) or \
+                   "$latency" in self.filter:
                         self.args_to_probe.add("__latency")
                         self.param_types["__latency"] = "u64"    # nanoseconds
                 for pname in self.args_to_probe:
@@ -139,7 +141,9 @@
                         else:
                                 entry_expr = "$entry(%s)" % pname
                                 val_expr = "(*%s)" % vname
-                        self.expr = self.expr.replace(entry_expr, val_expr)
+                        for i in range(0, len(self.exprs)):
+                                self.exprs[i] = self.exprs[i].replace(
+                                                entry_expr, val_expr)
                         self.filter = self.filter.replace(entry_expr,
                                                           val_expr)
 
@@ -171,7 +175,17 @@
                                    "but got '%s'" % parts[0])
                 if re.match(r"\w+\(.*\)", parts[2]) is None:
                         self._bail(("function signature '%s' has an invalid " +
-                                   "format") % parts[2])
+                                    "format") % parts[2])
+
+        def _parse_expr_types(self, expr_types):
+                if len(expr_types) == 0:
+                        self._bail("no expr types specified")
+                self.expr_types = expr_types.split(',')
+
+        def _parse_exprs(self, exprs):
+                if len(exprs) == 0:
+                        self._bail("no exprs specified")
+                self.exprs = exprs.split(',')
 
         def __init__(self, type, specifier, pid):
                 self.raw_spec = specifier
@@ -195,26 +209,31 @@
                 # the retval in a ret probe, or simply the value "1" otherwise.
                 self.is_default_expr = len(parts) < 5
                 if not self.is_default_expr:
-                        self.expr_type = parts[3]
-                        self.expr = parts[4]
+                        self._parse_expr_types(parts[3])
+                        self._parse_exprs(parts[4])
+                        if len(self.exprs) != len(self.expr_types):
+                                self._bail("mismatched # of exprs and types")
+                        if self.type == "hist" and len(self.expr_types) > 1:
+                                self._bail("histograms can only have 1 expr")
                 else:
                         if not self.is_ret_probe and self.type == "hist":
-                                raise ValueError("dist probes must have expr")
-                        self.expr_type = \
-                                "u64" if not self.is_ret_probe else "int"
-                        self.expr = "1" if not self.is_ret_probe else "$retval"
+                                self._bail("histograms must have expr")
+                        self.expr_types = \
+                                ["u64" if not self.is_ret_probe else "int"]
+                        self.exprs = \
+                                ["1" if not self.is_ret_probe else "$retval"]
                 self.filter = "" if len(parts) != 6 else parts[5]
                 self._substitute_exprs()
 
                 # Do we need to attach an entry probe so that we can collect an 
                 # argument that is required for an exit (return) probe?
+                def check(expr):
+                        keywords = ["$entry", "$latency"]
+                        return any(map(lambda kw: kw in expr, keywords))
                 self.entry_probe_required = self.is_ret_probe and \
-                       ("$entry" in self.expr or "$entry" in self.filter or
-                        "$latency" in self.expr or "$latency" in self.filter)
+                        (any(map(check, self.exprs)) or check(self.filter))
 
                 self.pid = pid
-                # Generating unique names for probes means we can attach
-                # many times to the same function.
                 self.probe_func_name = "%s_probe%d" % \
                         (self.function, Specifier.next_probe_index)
                 self.probe_hash_name = "%s_hash%d" % \
@@ -222,17 +241,71 @@
                 Specifier.next_probe_index += 1
 
         def _substitute_exprs(self):
-                self.expr = self.expr.replace("$retval",
-                                              "(%s)ctx->ax" % self.expr_type)
-                self.filter = self.filter.replace("$retval",
-                                              "(%s)ctx->ax" % self.expr_type)
-                self.expr = self._substitute_aliases(self.expr)
-                self.filter = self._substitute_aliases(self.filter)
+                def repl(expr):
+                        expr = self._substitute_aliases(expr)
+                        return expr.replace("$retval", "ctx->ax")
+                for i in range(0, len(self.exprs)):
+                        self.exprs[i] = repl(self.exprs[i])
+                self.filter = repl(self.filter)
 
-        def _is_string_probe(self):
-                return self.expr_type == "char*" or self.expr_type == "char *"
+        def _is_string(self, expr_type):
+                return expr_type == "char*" or expr_type == "char *"
 
-        def generate_text(self, string_size):
+        def _generate_hash_field(self, i):
+                if self._is_string(self.expr_types[i]):
+                        return "struct __string_t v%d;\n" % i
+                else:
+                        return "%s v%d;\n" % (self.expr_types[i], i)
+
+        def _generate_field_assignment(self, i):
+                if self._is_string(self.expr_types[i]):
+                        return "bpf_probe_read(" + \
+                               "&__key.v%d.s, sizeof(__key.v%d.s), %s);\n" % \
+                                (i, i, self.exprs[i])
+                else:
+                        return "__key.v%d = %s;\n" % (i, self.exprs[i])
+
+        def _generate_hash_decl(self):
+                if self.type == "hist":
+                        return "BPF_HISTOGRAM(%s, %s);" % \
+                               (self.probe_hash_name, self.expr_types[0])
+                else:
+                        text = "struct %s_key_t {\n" % self.probe_hash_name
+                        for i in range(0, len(self.expr_types)):
+                                text += self._generate_hash_field(i)
+                        text += "};\n"
+                        text += "BPF_HASH(%s, struct %s_key_t, u64);\n" % \
+                                (self.probe_hash_name, self.probe_hash_name)
+                        return text
+
+        def _generate_key_assignment(self):
+                if self.type == "hist":
+                        return "%s __key = %s;\n" % \
+                                (self.expr_types[0], self.exprs[0])
+                else:
+                        text = "struct %s_key_t __key = {};\n" % \
+                                self.probe_hash_name
+                        for i in range(0, len(self.exprs)):
+                                text += self._generate_field_assignment(i) 
+                        return text
+
+        def _generate_hash_update(self):
+                if self.type == "hist":
+                        return "%s.increment(bpf_log2l(__key));" % \
+                                self.probe_hash_name 
+                else:
+                        return "%s.increment(__key);" % self.probe_hash_name
+
+        def _generate_pid_filter(self):
+                # Kernel probes need to explicitly filter pid, because the
+                # attach interface doesn't support pid filtering
+                if self.pid is not None and not self.is_user:
+                        return "u32 pid = bpf_get_current_pid_tgid();\n" + \
+                               "if (pid != %d) { return 0; }" % self.pid
+                else:
+                        return ""
+
+        def generate_text(self):
                 # We don't like tools writing tools (Brendan Gregg), but this
                 # is an exception because we're letting the user fully
                 # customize the values we probe. As a rule of thumb though,
@@ -246,6 +319,8 @@
                 if self.entry_probe_required:
                         program = self._generate_entry_probe()
                         prefix = self._generate_retprobe_prefix()                                         
+                        # Replace $entry(paramname) with a reference to the
+                        # value we collected when entering the function:
                         self._replace_entry_exprs()
 
                 program += self.probe_text.replace("PROBENAME",
@@ -254,39 +329,12 @@
                                   or self.is_ret_probe \
                                else ", " + self.signature
                 program = program.replace("SIGNATURE", signature)
-                if self.pid is not None and not self.is_user:
-                        # Kernel probes need to explicitly filter pid
-                        program = program.replace("PID_FILTER",
-                                "u32 pid = bpf_get_current_pid_tgid();\n" + \
-                                "if (pid != %d) { return 0; }" % self.pid)
-                else:
-                        program = program.replace("PID_FILTER", "")
-                if self._is_string_probe():
-                        decl = """
-struct %s_key_t { char key[%d]; };
-BPF_HASH(%s, struct %s_key_t, u64);
-""" \
-                        % (self.function, string_size,
-                           self.probe_hash_name, self.function)
-                        collect = "%s.increment(__key);" % self.probe_hash_name
-                        key_expr = """
-struct %s_key_t __key = {0};
-bpf_probe_read(&__key.key, sizeof(__key.key), %s);
-""" \
-                        % (self.function, self.expr)
-                elif self.type == "freq":
-                        decl = "BPF_HASH(%s, %s, u64);" % \
-                                (self.probe_hash_name, self.expr_type)
-                        collect = "%s.increment(__key);" % self.probe_hash_name
-                        key_expr = "%s __key = %s;" % \
-                                   (self.expr_type, self.expr)
-                elif self.type == "hist":
-                        decl = "BPF_HISTOGRAM(%s, %s);" % \
-                                (self.probe_hash_name, self.expr_type)
-                        collect = "%s.increment(bpf_log2l(__key));" % \
-                                  self.probe_hash_name 
-                        key_expr = "%s __key = %s;" % \
-                                   (self.expr_type, self.expr)
+                program = program.replace("PID_FILTER",
+                                          self._generate_pid_filter())
+
+                decl = self._generate_hash_decl()
+                key_expr = self._generate_key_assignment()
+                collect = self._generate_hash_update()
                 program = program.replace("DATA_DECL", decl)
                 program = program.replace("KEY_EXPR", key_expr) 
                 program = program.replace("FILTER",
@@ -318,6 +366,40 @@
                 if self.entry_probe_required:
                         self._attach_entry_probe()
 
+        def _v2s(self, v):
+                # Most fields can be converted with plain str(), but strings
+                # are wrapped in a __string_t which has an .s field
+                if "__string_t" in type(v).__name__:
+                        return str(v.s)
+                return str(v)
+
+        def _display_expr(self, i):
+                # Replace ugly latency calculation with $latency
+                expr = self.exprs[i].replace(
+                        "(bpf_ktime_get_ns() - *____latency_val)", "$latency")
+                # Replace alias values back with the alias name
+                for alias, subst in Specifier.aliases.items():
+                        expr = expr.replace(subst, alias) 
+                # Replace retval expression with $retval
+                expr = expr.replace("ctx->ax", "$retval")
+                # Replace ugly (*__param_val) expressions with param name
+                return re.sub(r"\(\*__(\w+)_val\)", r"\1", expr)
+
+        def _display_key(self, key):
+                if self.is_default_expr:
+                        if not self.is_ret_probe:
+                                return "total calls"
+                        else:
+                                return "retval = %s" % str(key.v0)
+                else:
+                        # The key object has v0, ..., vk fields containing
+                        # the values of the expressions from self.exprs
+                        def str_i(i):
+                                key_i = self._v2s(getattr(key, "v%d" % i))
+                                return "%s = %s" % \
+                                        (self._display_expr(i), key_i)
+                        return ", ".join(map(str_i, range(0, len(self.exprs))))
+
         def display(self, top):
                 data = self.bpf.get_table(self.probe_hash_name)
                 if self.type == "freq":
@@ -327,8 +409,6 @@
                         if top is not None:
                                 data = data[-top:]
                         for key, value in data: 
-                                key_val = key.key if self._is_string_probe() \
-                                                  else str(key.value)
                                 # Print some nice values if the user didn't
                                 # specify an expression to probe
                                 if self.is_default_expr:
@@ -336,21 +416,19 @@
                                                 key_str = "total calls"
                                         else:
                                                 key_str = "retval = %s" % \
-                                                          key_val
+                                                          self._v2s(key.v0)
                                 else:
-                                        key_str = "%s = %s" % \
-                                                  (self.expr, key_val)
+                                        key_str = self._display_key(key) 
                                 print("\t%-10s %s" % \
                                       (str(value.value), key_str))
                 elif self.type == "hist":
-                        label = self.label or \
-                                (self.expr if not self.is_default_expr \
-                                           else "retval")
+                        label = self.label or (self._display_expr(0)
+                                if not self.is_default_expr  else "retval")
                         data.print_log2_hist(val_type=label)
 
 examples = """
 Probe specifier syntax:
-        {p,r}:[library]:function(signature)[:type:expr[:filter]][#label]
+        {p,r}:[library]:function(signature)[:type[,type...]:expr[,expr...][:filter]][#label]
 Where:
         p,r        -- probe at function entry or at function exit
                       in exit probes: can use $retval, $entry(param), $latency
@@ -358,8 +436,8 @@
                       (leave empty for kernel functions)
         function   -- the function name to trace
         signature  -- the function's parameters, as in the C header
-        type       -- the type of the expression to collect
-        expr       -- the expression to collect
+        type       -- the type of the expression to collect (supports multiple)
+        expr       -- the expression to collect (supports multiple)
         filter     -- the filter that is applied to collected values
         label      -- the label for this probe in the resulting output
 
@@ -372,7 +450,7 @@
         Print a frequency count of how many times process 1005 called malloc
         with an allocation size of 16 bytes
 
-argdist.py -C 'r:c:gets():char*:$retval#snooped strings'
+argdist.py -C 'r:c:gets():char*:(char*)$retval#snooped strings'
         Snoop on all strings returned by gets()
 
 argdist.py -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte'
@@ -388,7 +466,7 @@
         the top 5 busiest fds
 
 argdist.py -p 1005 -H 'r:c:read()'
-        Print a histogram of error codes returned by read() in process 1005
+        Print a histogram of results (sizes) returned by read() in process 1005
 
 argdist.py -C 'r::__vfs_read():u32:$PID:$latency > 100000'
         Print frequency of reads by process where the latency was >0.1ms
@@ -451,11 +529,15 @@
         print("at least one specifier is required")
         exit(1)
 
-bpf_source = "#include <uapi/linux/ptrace.h>\n"
+bpf_source = """
+struct __string_t { char s[%d]; };
+
+#include <uapi/linux/ptrace.h>
+""" % args.string_size
 for include in (args.include or []):
         bpf_source += "#include <%s>\n" % include
 for specifier in specifiers:
-        bpf_source += specifier.generate_text(args.string_size)
+        bpf_source += specifier.generate_text()
 
 if args.verbose:
         print(bpf_source)
commit	cc27edfdf89f646eab14b517a59e0d74af24462a	[log] [tgz]
author	Sasha Goldshtein <goldshtn@gmail.com>	Sun Feb 14 03:49:01 2016 -0800
committer	Sasha Goldshtein <goldshtn@gmail.com>	Sun Feb 14 03:49:01 2016 -0800
tree	91b9819a2d3212e4d481d4316e97bc81bbc3f2ba
parent	7983d6b61f83275471ee8fc4f2162facf4b36d91 [diff] [blame]