tools/runqslower.py: Fix raw tracepoint code

Closes: #2588

It is was observed that many unrelated process names
were reported for the same pid.

The cause of the bug was the use of bpf_get_current_comm()
which operates on the current process which might already be
different than the 'next' task we captured.
The fix is to use bpf_probe_read_str() instead.

Also removing dead code related to tgid extraction which is not
used at all.
diff --git a/tools/runqslower.py b/tools/runqslower.py
index 5f5c3b9..b678533 100755
--- a/tools/runqslower.py
+++ b/tools/runqslower.py
@@ -28,6 +28,10 @@
 # Licensed under the Apache License, Version 2.0 (the "License")
 #
 # 02-May-2018   Ivan Babrou   Created this.
+# 18-Nov-2019   Gergely Bod   BUG fix: Use bpf_probe_read_str() to extract the
+#                               process name from 'task_struct* next' in raw tp code.
+#                               bpf_get_current_comm() operates on the current task
+#                               which might already be different than 'next'.
 
 from __future__ import print_function
 from bcc import BPF
@@ -163,13 +167,12 @@
     // TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next)
     struct task_struct *prev = (struct task_struct *)ctx->args[1];
     struct task_struct *next= (struct task_struct *)ctx->args[2];
-    u32 pid, tgid;
+    u32 pid;
     long state;
 
     // ivcsw: treat like an enqueue event and store timestamp
     bpf_probe_read(&state, sizeof(long), (const void *)&prev->state);
     if (state == TASK_RUNNING) {
-        bpf_probe_read(&tgid, sizeof(prev->tgid), &prev->tgid);
         bpf_probe_read(&pid, sizeof(prev->pid), &prev->pid);
         if (!(FILTER_PID || pid == 0)) {
             u64 ts = bpf_ktime_get_ns();
@@ -177,7 +180,6 @@
         }
     }
 
-    bpf_probe_read(&tgid, sizeof(next->tgid), &next->tgid);
     bpf_probe_read(&pid, sizeof(next->pid), &next->pid);
 
     u64 *tsp, delta_us;
@@ -195,7 +197,7 @@
     struct data_t data = {};
     data.pid = pid;
     data.delta_us = delta_us;
-    bpf_get_current_comm(&data.task, sizeof(data.task));
+    bpf_probe_read_str(&data.task, sizeof(data.task), next->comm);
 
     // output
     events.perf_submit(ctx, &data, sizeof(data));