add sched trace to check_io_trace_all

- For all processes, calculate execution time and wait time due to I/O.

bug: 64810855
Test: python packages/services/Car/tools/io_analysis/check_io_trace_all.py kernel_trace_file

Change-Id: Iea53b8de0f7e63806082fbab53e0fb7c4c43ffa4
diff --git a/tools/io_analysis/check_io_trace_all.py b/tools/io_analysis/check_io_trace_all.py
index 586a677..2d118ff 100644
--- a/tools/io_analysis/check_io_trace_all.py
+++ b/tools/io_analysis/check_io_trace_all.py
@@ -22,7 +22,12 @@
 import string
 import sys
 
+# ex) <...>-52    [001] ...1     1.362574: block_bio_queue: 8,16 R 0 + 8 [kworker/u8:1]
 RE_BLOCK = r'.+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+block_bio_queue:\s+([0-9]+)\,([0-9]+)\s+([RW]\S*)\s+([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)'
+# ex)  <...>-453   [001] d..4     3.181854: sched_blocked_reason: pid=471 iowait=1 caller=__wait_on_buffer+0x24/0x2c
+RE_SCHED_BLOCKED_READSON = r'.+-([0-9]+)\s+\[([0-9]+)\]\s.*\s+([0-9]+\.[0-9]+):\s+sched_blocked_reason:\spid=([0-9]+)\siowait=([01])\scaller=(\S+)'
+# ex) <idle>-0     [000] d..3     3.181864: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ueventd next_pid=471 next_prio=120
+RE_SCHED_SWITCH = r'.+-([0-9]+)\s+\[([0-9]+)\]\s.*\s+([0-9]+\.[0-9]+):\s+sched_switch:\sprev_comm=(.+)\sprev_pid=([0-9]+)\sprev_prio=([0-9]+)\sprev_state=(\S+).*next_comm=(.+)\snext_pid=([0-9]+)\snext_prio=([0-9]+)'
 
 # dev_num = major * MULTIPLIER + minor
 DEV_MAJOR_MULTIPLIER = 1000
@@ -56,6 +61,7 @@
     self.total_dm_reads = 0
     self.total_dm_writes = 0
 
+
   def add_read_event(self, major, minor, event):
     devNum = major * DEV_MAJOR_MULTIPLIER + minor;
     events = self.reads.get(devNum)
@@ -104,7 +110,7 @@
       dev = sorted_w.popitem(last=False)
       print " ", dev[0],dev[1]
 
-class Trace:
+class IoTrace:
 
   def __init__(self):
     self.ios = {} #K: process name, v:ProcessData
@@ -114,10 +120,22 @@
     self.total_writes_per_device = {}
     self.total_dm_reads = {} #K: devnum, V: blocks
     self.total_dm_writes = {}
+    self.re_block = re.compile(RE_BLOCK)
 
-  def parse_bio_queue(self, l, match):
+  def parse(self, l):
+    match = self.re_block.match(l)
+    if not match:
+      return False
+    try:
+      self.do_parse_bio_queue(l, match)
+    except ValueError:
+      print "cannot parse:", l
+      raise
+    return True
+
+  def do_parse_bio_queue(self, l, match):
     pid = match.group(1)
-    start_time = int(float(match.group(2))*1000000) #us
+    start_time = float(match.group(2))*1000 #ms
     major = int(match.group(3))
     minor =  int(match.group(4))
     devNum = major * DEV_MAJOR_MULTIPLIER + minor;
@@ -158,13 +176,6 @@
         self.total_writes_per_device[devNum] = 0
       self.total_writes_per_device[devNum] += size
 
-  def parse_block_trace(self, l, match):
-    try:
-      self.parse_bio_queue(l, match)
-    except ValueError:
-      print "cannot parse:", l
-      raise
-
   def dump(self):
     print "total read blocks,", self.total_reads
     print "total write blocks,", self.total_writes
@@ -206,20 +217,156 @@
       dev = sorted_by_total_w.popitem(last=False)
       print dev[0],dev[1]
 
+class SchedProcess:
+  def __init__(self, pid):
+    self.pid = pid
+    self.name = "unknown"
+    self.total_execution_time = 0.0
+    self.total_io_wait_time = 0.0
+    self.total_other_wait_time = 0.0
+    self.waiting_calls = {} # k: waiting_call, v : waiting counter
+    self.io_waiting_call_times = {} # k: waiting_call, v: total wait time
+    self.in_iowait = False
+    self.last_waiting_call = None
+    self.last_switch_out_time = 0.0
+    self.last_switch_in_time = 0.0
+    self.last_core = -1
+    self.execution_time_per_core = {} # k: core, v : time
+
+  def handle_reason(self, current_time, iowait, waiting_call):
+    #if self.pid == 1232:
+    #  print current_time, iowait, waiting_call
+    if iowait == 1:
+      self.in_iowait = True
+    self.last_waiting_call = waiting_call
+    call_counter = self.waiting_calls.get(waiting_call, 0)
+    call_counter += 1
+    self.waiting_calls[waiting_call] = call_counter
+
+  def handle_switch_out(self, current_time, out_state, priority, name, core):
+    #if self.pid == 1232:
+    #  print "out", current_time, out_state
+    if self.name != name:
+      self.name = name
+    self.last_switch_out_time = current_time
+    if self.last_switch_in_time == 0.0: # switch in not recorded. ignore this one
+      return
+    execution_time = current_time - self.last_switch_in_time
+    self.total_execution_time += execution_time
+    core_execution_time = self.execution_time_per_core.get(core, 0.0)
+    core_execution_time += execution_time
+    self.execution_time_per_core[core] = core_execution_time
+
+  def handle_switch_in(self, current_time, priority, name, core):
+    #if self.pid == 1232:
+    #  print "in", current_time, self.in_iowait
+    if self.name != name:
+      self.name = name
+    self.last_switch_in_time = current_time
+    if self.last_switch_out_time == 0.0: # in without out, probably 1st
+      self.in_iowait = False
+      return
+    wait_time = current_time - self.last_switch_out_time
+    if self.in_iowait:
+      self.total_io_wait_time += wait_time
+      total_waiting_call_time = self.io_waiting_call_times.get(self.last_waiting_call, 0.0)
+      total_waiting_call_time += wait_time
+      self.io_waiting_call_times[self.last_waiting_call] = total_waiting_call_time
+    else:
+      self.total_other_wait_time += wait_time
+    self.in_iowait = False
+
+
+  def dump(self):
+    print "PID:", self.pid, " name:", self.name
+    print " total execution time:", self.total_execution_time,\
+      " io wait:", self.total_io_wait_time, " other wait:", self.total_other_wait_time
+    print " Core execution:", self.execution_time_per_core
+    print " Wait calls:", self.waiting_calls
+    print " IO Wait time per wait calls:", self.io_waiting_call_times
+
+class SchedTrace:
+  def __init__(self):
+    self.re_switch = re.compile(RE_SCHED_SWITCH)
+    self.re_reason = re.compile(RE_SCHED_BLOCKED_READSON)
+    self.processes = {} # key: pid, v : SchedProcess
+
+  def parse(self, l):
+    checked_reason = False
+    match = self.re_switch.match(l)
+    if not match:
+      match = self.re_reason.match(l)
+      checked_reason = True
+    if not match:
+      return False
+    try:
+      if checked_reason:
+        self.do_handle_reason(l, match)
+      else:
+        self.do_handle_switch(l, match)
+    except ValueError:
+      print "cannot parse:", l
+      raise
+    return True
+
+  def do_handle_switch(self, l, match):
+    current_pid = int(match.group(1))
+    cpu_core = int(match.group(2))
+    current_time = float(match.group(3))*1000 #ms
+    out_name = match.group(4)
+    out_pid = int(match.group(5))
+    out_prio = int(match.group(6))
+    out_state = match.group(7)
+    in_name = match.group(8)
+    in_pid = int(match.group(9))
+    in_prio = int(match.group(10))
+    out_process = self.processes.get(out_pid)
+    if not out_process:
+      out_process = SchedProcess(out_pid)
+      self.processes[out_pid] = out_process
+    in_process = self.processes.get(in_pid)
+    if not in_process:
+      in_process = SchedProcess(in_pid)
+      self.processes[in_pid] = in_process
+    out_process.handle_switch_out(current_time, out_state, out_prio, out_name, cpu_core)
+    in_process.handle_switch_in(current_time, in_prio, in_name, cpu_core)
+
+  def do_handle_reason(self, l, match):
+    current_pid = int(match.group(1))
+    cpu_core = int(match.group(2))
+    current_time = float(match.group(3))*1000 #ms
+    pid = int(match.group(4))
+    iowait = int(match.group(5))
+    waiting_call = match.group(6)
+    process = self.processes.get(pid)
+    if not process:
+      process = SchedProcess(pid)
+      self.processes[pid] = process
+    process.handle_reason(current_time, iowait, waiting_call)
+
+  def dump(self):
+    sorted_by_total_execution = collections.OrderedDict(sorted(self.processes.items(), \
+      key = lambda item: item[1].total_io_wait_time, reverse = True))
+    for k, v in sorted_by_total_execution.iteritems():
+      if v.total_execution_time > 10.0 or v.total_io_wait_time != 0.0:
+        v.dump()
+
 def main(argv):
   if (len(argv) < 2):
     print "check_io_trace_all.py filename"
     return
   filename = argv[1]
 
-  trace = Trace()
-  prog = re.compile(RE_BLOCK)
+  io_trace = IoTrace()
+  sched_trace = SchedTrace()
   with open(filename) as f:
     for l in f:
-      result = prog.match(l)
-      if result:
-        trace.parse_block_trace(l, result)
-  trace.dump()
+      if io_trace.parse(l):
+        continue
+      sched_trace.parse(l)
+  io_trace.dump()
+  print "\n\n\n"
+  sched_trace.dump()
 
 if __name__ == '__main__':
   main(sys.argv)