tools: filter/display using PID instead of TID

As mentioned in #3407, several BCC tools misuse bpf_get_current_pid_tgid(),
bpf_get_current_pid_tgid() returns process ID in the upper 32bits, and
thread ID in lower 32 bits (both from userspace's perspective).
In this commit, we return process ID to userspace for display, and use
thread ID as BPF map key so that we can avoid event loss or data corruption.

The following tools are fixed in the commit:
* bashreadline
* cachetop
* dcsnoop
* killsnoop
* llcstat
* mdflush
* mysqld_qslower
* wakeuptime

See also #3411, #3427, #3433 .

Signed-off-by: Hengqi Chen <chenhengqi@outlook.com>
diff --git a/tools/bashreadline.py b/tools/bashreadline.py
index ad9cfdc..908a145 100755
--- a/tools/bashreadline.py
+++ b/tools/bashreadline.py
@@ -50,7 +50,7 @@
     u32 pid;
     if (!PT_REGS_RC(ctx))
         return 0;
-    pid = bpf_get_current_pid_tgid();
+    pid = bpf_get_current_pid_tgid() >> 32;
     data.pid = pid;
     bpf_probe_read_user(&data.str, sizeof(data.str), (void *)PT_REGS_RC(ctx));
 
diff --git a/tools/cachetop.py b/tools/cachetop.py
index 803e2a0..fe6a9a9 100755
--- a/tools/cachetop.py
+++ b/tools/cachetop.py
@@ -157,8 +157,8 @@
         u32 uid = bpf_get_current_uid_gid();
 
         key.ip = PT_REGS_IP(ctx);
-        key.pid = pid & 0xFFFFFFFF;
-        key.uid = uid & 0xFFFFFFFF;
+        key.pid = pid >> 32;
+        key.uid = uid;
         bpf_get_current_comm(&(key.comm), 16);
 
         counts.increment(key);
diff --git a/tools/dcsnoop.py b/tools/dcsnoop.py
index 819d4e6..274eaa5 100755
--- a/tools/dcsnoop.py
+++ b/tools/dcsnoop.py
@@ -90,7 +90,7 @@
 
 int trace_fast(struct pt_regs *ctx, struct nameidata *nd, struct path *path)
 {
-    u32 pid = bpf_get_current_pid_tgid();
+    u32 pid = bpf_get_current_pid_tgid() >> 32;
     submit_event(ctx, (void *)nd->last.name, LOOKUP_REFERENCE, pid);
     return 1;
 }
@@ -98,26 +98,34 @@
 int kprobe__d_lookup(struct pt_regs *ctx, const struct dentry *parent,
     const struct qstr *name)
 {
-    u32 pid = bpf_get_current_pid_tgid();
+    u32 tid = bpf_get_current_pid_tgid();
     struct entry_t entry = {};
     const char *fname = name->name;
     if (fname) {
         bpf_probe_read_kernel(&entry.name, sizeof(entry.name), (void *)fname);
     }
-    entrybypid.update(&pid, &entry);
+    entrybypid.update(&tid, &entry);
     return 0;
 }
 
 int kretprobe__d_lookup(struct pt_regs *ctx)
 {
-    u32 pid = bpf_get_current_pid_tgid();
+    u64 pid_tgid = bpf_get_current_pid_tgid();
+    u32 pid = pid_tgid >> 32;
+    u32 tid = (u32)pid_tgid;
     struct entry_t *ep;
-    ep = entrybypid.lookup(&pid);
-    if (ep == 0 || PT_REGS_RC(ctx) != 0) {
-        return 0;   // missed entry or lookup didn't fail
+
+    ep = entrybypid.lookup(&tid);
+    if (ep == 0) {
+        return 0;   // missed entry
     }
+    if (PT_REGS_RC(ctx) != 0) {
+        entrybypid.delete(&tid);
+        return 0;   // lookup didn't fail
+    }
+
     submit_event(ctx, (void *)ep->name, LOOKUP_MISS, pid);
-    entrybypid.delete(&pid);
+    entrybypid.delete(&tid);
     return 0;
 }
 """
diff --git a/tools/killsnoop.py b/tools/killsnoop.py
index 2dc5b8a..663c810 100755
--- a/tools/killsnoop.py
+++ b/tools/killsnoop.py
@@ -65,7 +65,10 @@
 
 int syscall__kill(struct pt_regs *ctx, int tpid, int sig)
 {
-    u32 pid = bpf_get_current_pid_tgid();
+    u64 pid_tgid = bpf_get_current_pid_tgid();
+    u32 pid = pid_tgid >> 32;
+    u32 tid = (u32)pid_tgid;
+
     PID_FILTER
     SIGNAL_FILTER
 
@@ -73,7 +76,7 @@
     if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
         val.tpid = tpid;
         val.sig = sig;
-        infotmp.update(&pid, &val);
+        infotmp.update(&tid, &val);
     }
 
     return 0;
@@ -83,9 +86,11 @@
 {
     struct data_t data = {};
     struct val_t *valp;
-    u32 pid = bpf_get_current_pid_tgid();
+    u64 pid_tgid = bpf_get_current_pid_tgid();
+    u32 pid = pid_tgid >> 32;
+    u32 tid = (u32)pid_tgid;
 
-    valp = infotmp.lookup(&pid);
+    valp = infotmp.lookup(&tid);
     if (valp == 0) {
         // missed entry
         return 0;
@@ -98,7 +103,7 @@
     data.sig = valp->sig;
 
     events.perf_submit(ctx, &data, sizeof(data));
-    infotmp.delete(&pid);
+    infotmp.delete(&tid);
 
     return 0;
 }
diff --git a/tools/llcstat.py b/tools/llcstat.py
index 7b7bc47..4f1ba2f 100755
--- a/tools/llcstat.py
+++ b/tools/llcstat.py
@@ -50,7 +50,7 @@
 
 static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
     key->cpu = bpf_get_smp_processor_id();
-    key->pid = bpf_get_current_pid_tgid();
+    key->pid = bpf_get_current_pid_tgid() >> 32;
     bpf_get_current_comm(&(key->name), sizeof(key->name));
 }
 
diff --git a/tools/mdflush.py b/tools/mdflush.py
index 2abe15c..8a23520 100755
--- a/tools/mdflush.py
+++ b/tools/mdflush.py
@@ -32,7 +32,7 @@
 int kprobe__md_flush_request(struct pt_regs *ctx, void *mddev, struct bio *bio)
 {
     struct data_t data = {};
-    u32 pid = bpf_get_current_pid_tgid();
+    u32 pid = bpf_get_current_pid_tgid() >> 32;
     data.pid = pid;
     bpf_get_current_comm(&data.comm, sizeof(data.comm));
 /*
diff --git a/tools/mysqld_qslower.py b/tools/mysqld_qslower.py
index 33ea7dd..088cd63 100755
--- a/tools/mysqld_qslower.py
+++ b/tools/mysqld_qslower.py
@@ -58,19 +58,21 @@
 BPF_PERF_OUTPUT(events);
 
 int do_start(struct pt_regs *ctx) {
-    u32 pid = bpf_get_current_pid_tgid();
+    u32 tid = bpf_get_current_pid_tgid();
     struct start_t start = {};
     start.ts = bpf_ktime_get_ns();
     bpf_usdt_readarg(1, ctx, &start.query);
-    start_tmp.update(&pid, &start);
+    start_tmp.update(&tid, &start);
     return 0;
 };
 
 int do_done(struct pt_regs *ctx) {
-    u32 pid = bpf_get_current_pid_tgid();
+    u64 pid_tgid = bpf_get_current_pid_tgid();
+    u32 pid = pid_tgid >> 32;
+    u32 tid = (u32)pid_tgid;
     struct start_t *sp;
 
-    sp = start_tmp.lookup(&pid);
+    sp = start_tmp.lookup(&tid);
     if (sp == 0) {
         // missed tracing start
         return 0;
@@ -85,7 +87,7 @@
         events.perf_submit(ctx, &data, sizeof(data));
     }
 
-    start_tmp.delete(&pid);
+    start_tmp.delete(&tid);
 
     return 0;
 };
diff --git a/tools/wakeuptime.py b/tools/wakeuptime.py
index 0723f8a..531030b 100755
--- a/tools/wakeuptime.py
+++ b/tools/wakeuptime.py
@@ -103,7 +103,9 @@
 BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
 
 static int offcpu_sched_switch() {
-    u32 pid = bpf_get_current_pid_tgid();
+    u64 pid_tgid = bpf_get_current_pid_tgid();
+    u32 pid = pid_tgid >> 32;
+    u32 tid = (u32)pid_tgid;
     struct task_struct *p = (struct task_struct *) bpf_get_current_task();
     u64 ts;
 
@@ -111,18 +113,19 @@
         return 0;
 
     ts = bpf_ktime_get_ns();
-    start.update(&pid, &ts);
+    start.update(&tid, &ts);
     return 0;
 }
 
 static int wakeup(ARG0, struct task_struct *p) {
-    u32 pid = p->pid;
+    u32 pid = p->tgid;
+    u32 tid = p->pid;
     u64 delta, *tsp, ts;
 
-    tsp = start.lookup(&pid);
+    tsp = start.lookup(&tid);
     if (tsp == 0)
         return 0;        // missed start
-    start.delete(&pid);
+    start.delete(&tid);
 
     if (FILTER)
         return 0;