softirqs: Combined CPU as part of the key is necessary to avoid amiss value.

In the environment of massive software interrupts.
<idle>-0     [003] ..s1   106.421020: softirq_entry: vec=6 [action=TASKLET]
<idle>-0     [000] ..s1   106.421063: softirq_entry: vec=3 [action=NET_RX]
<idle>-0     [003] ..s1   106.421083: softirq_exit: vec=6 [action=TASKLET]

Follow the above ftrace logs, we know the correct vec-6 start timestamp is replaced with incorrect vec-3.
Because PID is idle-0. It will produce the wrong result after calculating delta.
diff --git a/tools/softirqs.py b/tools/softirqs.py
index 1e2daf5..32bbef5 100755
--- a/tools/softirqs.py
+++ b/tools/softirqs.py
@@ -54,6 +54,11 @@
 bpf_text = """
 #include <uapi/linux/ptrace.h>
 
+typedef struct entry_key {
+    u32 pid;
+    u32 cpu;
+} entry_key_t;
+
 typedef struct irq_key {
     u32 vec;
     u64 slot;
@@ -64,17 +69,22 @@
     u32 vec;
 } account_val_t;
 
-BPF_HASH(start, u32, account_val_t);
+BPF_HASH(start, entry_key_t, account_val_t);
 BPF_HASH(iptr, u32);
 BPF_HISTOGRAM(dist, irq_key_t);
 
 TRACEPOINT_PROBE(irq, softirq_entry)
 {
-    u32 pid = bpf_get_current_pid_tgid();
     account_val_t val = {};
+    entry_key_t key = {};
+
+    key.pid = bpf_get_current_pid_tgid();
+    key.cpu = bpf_get_smp_processor_id();
     val.ts = bpf_ktime_get_ns();
     val.vec = args->vec;
-    start.update(&pid, &val);
+
+    start.update(&key, &val);
+
     return 0;
 }
 
@@ -82,12 +92,15 @@
 {
     u64 delta;
     u32 vec;
-    u32 pid = bpf_get_current_pid_tgid();
     account_val_t *valp;
     irq_key_t key = {0};
+    entry_key_t entry_key = {};
+
+    entry_key.pid = bpf_get_current_pid_tgid();
+    entry_key.cpu = bpf_get_smp_processor_id();
 
     // fetch timestamp and calculate delta
-    valp = start.lookup(&pid);
+    valp = start.lookup(&entry_key);
     if (valp == 0) {
         return 0;   // missed start
     }
@@ -97,7 +110,7 @@
     // store as sum or histogram
     STORE
 
-    start.delete(&pid);
+    start.delete(&entry_key);
     return 0;
 }
 """