New rtc dump analyzing tool in Python

R=henrik.lundin@webrtc.org, ivoc@webrtc.org, kwiberg@webrtc.org, peah@webrtc.org, phoglund@webrtc.org

Review-Url: https://codereview.webrtc.org/1999113002
Cr-Commit-Position: refs/heads/master@{#13218}
diff --git a/PRESUBMIT.py b/PRESUBMIT.py
index 5d5a5c3..1ece201 100755
--- a/PRESUBMIT.py
+++ b/PRESUBMIT.py
@@ -397,6 +397,7 @@
 
   test_directories = [
     join('tools', 'autoroller', 'unittests'),
+    join('webrtc', 'tools', 'py_event_log_analyzer'),
   ]
 
   tests = []
diff --git a/webrtc/tools/BUILD.gn b/webrtc/tools/BUILD.gn
index a9fe473..2d09563 100644
--- a/webrtc/tools/BUILD.gn
+++ b/webrtc/tools/BUILD.gn
@@ -279,4 +279,21 @@
       deps += [ "//testing/android/native_test:native_test_support" ]
     }
   }
+
+  if (rtc_enable_protobuf) {
+    copy("rtp_analyzer") {
+      sources = [
+        "py_event_log_analyzer/misc.py",
+        "py_event_log_analyzer/pb_parse.py",
+        "py_event_log_analyzer/rtp_analyzer.py",
+        "py_event_log_analyzer/rtp_analyzer.sh",
+      ]
+      outputs = [
+        "$root_build_dir/{{source_file_part}}",
+      ]
+      deps = [
+        "..:rtc_event_log_proto",
+      ]
+    }
+  }
 }
diff --git a/webrtc/tools/py_event_log_analyzer/README b/webrtc/tools/py_event_log_analyzer/README
new file mode 100644
index 0000000..ed656dd
--- /dev/null
+++ b/webrtc/tools/py_event_log_analyzer/README
@@ -0,0 +1,26 @@
+This file describes how to set up and use the RTP log analyzer.
+
+First build the tool with
+
+    ninja -C out/my_build webrtc:rtp_analyzer
+
+The tool is built by default, so
+
+    ninja -C out/my_build
+
+is enough.
+
+After building, run the tool as follows:
+
+    ./out/my_build/rtp_analyzer.sh <rtc event log>
+
+where <rtc event log> is a recorded RTC event log, which is stored in
+protobuf format. Such logs are generated in multiple ways, e.g. by
+Chrome through the chrome://webrtc-internals page.
+
+The script has been tested to work in python versions 3.4.1 and 2.7.6,
+but should work in all python versions.
+
+Working versions of NumPy (http://www.numpy.org/) and matplotlib
+(http://matplotlib.org/) are needed to run this tool. See this link
+with installation instructions (http://www.scipy.org/install.html).
diff --git a/webrtc/tools/py_event_log_analyzer/misc.py b/webrtc/tools/py_event_log_analyzer/misc.py
new file mode 100644
index 0000000..50ac28e
--- /dev/null
+++ b/webrtc/tools/py_event_log_analyzer/misc.py
@@ -0,0 +1,79 @@
+#  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+#
+#  Use of this source code is governed by a BSD-style license
+#  that can be found in the LICENSE file in the root of the source
+#  tree. An additional intellectual property rights grant can be found
+#  in the file PATENTS.  All contributing project authors may
+#  be found in the AUTHORS file in the root of the source tree.
+
+"""Utility functions for calculating statistics.
+"""
+
+from __future__ import division
+import collections
+import sys
+
+
+def count_reordered(sequence_numbers):
+  """Returns number of reordered indices.
+
+  A reordered index is an index `i` for which sequence_numbers[i] >=
+  sequence_numbers[i + 1]
+  """
+  return sum(1 for (s1, s2) in zip(sequence_numbers,
+                                   sequence_numbers[1:]) if
+             s1 >= s2)
+
+
+def ssrc_normalized_size_table(data_points):
+  """Counts proportion of data for every SSRC.
+
+  Args:
+     data_points: list of pb_parse.DataPoint
+
+  Returns:
+     A dictionary mapping from every SSRC in the data points. The
+     value of an SSRC `s` is the proportion of sizes of packets with
+     SSRC `s` to the total size of all packets.
+
+  """
+  mapping = collections.defaultdict(int)
+  for point in data_points:
+    mapping[point.ssrc] += point.size
+  return normalize_counter(mapping)
+
+
+def normalize_counter(counter):
+  """Returns a normalized version of the dictionary `counter`.
+
+  Does not modify `counter`.
+
+  Returns:
+    A new dictionary, in which every value in `counter`
+    has been divided by the total to sum up to 1.
+  """
+  total = sum(counter.values())
+  return {key: counter[key] / total for key in counter}
+
+
+def unwrap(data, mod):
+  """Returns `data` unwrapped modulo `mod`. Does not modify data.
+
+  Adds integer multiples of mod to all elements of data except the
+  first, such that all pairs of consecutive elements (a, b) satisfy
+  -mod / 2 <= b - a < mod / 2.
+
+  E.g. unwrap([0, 1, 2, 0, 1, 2, 7, 8], 3) -> [0, 1, 2, 3,
+  4, 5, 4, 5]
+  """
+  lst = data[:]
+  for i in range(1, len(data)):
+    lst[i] = lst[i - 1] + (lst[i] - lst[i - 1] +
+                           mod // 2) % mod - (mod // 2)
+  return lst
+
+# Python 2/3-compatible input function
+if sys.version_info[0] <= 2:
+  get_input = raw_input
+else:
+  get_input = input
diff --git a/webrtc/tools/py_event_log_analyzer/misc_test.py b/webrtc/tools/py_event_log_analyzer/misc_test.py
new file mode 100755
index 0000000..f128da5
--- /dev/null
+++ b/webrtc/tools/py_event_log_analyzer/misc_test.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+#  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+#
+#  Use of this source code is governed by a BSD-style license
+#  that can be found in the LICENSE file in the root of the source
+#  tree. An additional intellectual property rights grant can be found
+#  in the file PATENTS.  All contributing project authors may
+#  be found in the AUTHORS file in the root of the source tree.
+
+"""Run the tests with
+
+      python misc_test.py
+or
+      python3 misc_test.py
+"""
+
+from __future__ import division
+import random
+import unittest
+
+import misc
+
+
+class TestMisc(unittest.TestCase):
+
+  def testUnwrapMod3(self):
+    data = [0, 1, 2, 0, -1, -2, -3, -4]
+    unwrapped_3 = misc.unwrap(data, 3)
+    self.assertEqual([0, 1, 2, 3, 2, 1, 0, -1], unwrapped_3)
+
+  def testUnwrapMod4(self):
+    data = [0, 1, 2, 0, -1, -2, -3, -4]
+    unwrapped_4 = misc.unwrap(data, 4)
+    self.assertEqual([0, 1, 2, 0, -1, -2, -3, -4], unwrapped_4)
+
+  def testDataShouldNotChangeAfterUnwrap(self):
+    data = [0, 1, 2, 0, -1, -2, -3, -4]
+    _ = misc.unwrap(data, 4)
+
+    self.assertEqual([0, 1, 2, 0, -1, -2, -3, -4], data)
+
+  def testRandomlyMultiplesOfModAdded(self):
+    # `unwrap` definition says only multiples of mod are added.
+    random_data = [random.randint(0, 9) for _ in range(100)]
+
+    for mod in range(1, 100):
+      random_data_unwrapped_mod = misc.unwrap(random_data, mod)
+
+      for (old_a, a) in zip(random_data, random_data_unwrapped_mod):
+        self.assertEqual((old_a - a) % mod, 0)
+
+  def testRandomlyAgainstInequalityDefinition(self):
+    # Data has to satisfy -mod/2 <= difference < mod/2 for every
+    # difference between consecutive values after unwrap.
+    random_data = [random.randint(0, 9) for _ in range(100)]
+
+    for mod in range(1, 100):
+      random_data_unwrapped_mod = misc.unwrap(random_data, mod)
+
+      for (a, b) in zip(random_data_unwrapped_mod,
+                        random_data_unwrapped_mod[1:]):
+        self.assertTrue(-mod / 2 <= b - a < mod / 2)
+
+  def testRandomlyDataShouldNotChangeAfterUnwrap(self):
+    random_data = [random.randint(0, 9) for _ in range(100)]
+    random_data_copy = random_data[:]
+    for mod in range(1, 100):
+      _ = misc.unwrap(random_data, mod)
+
+      self.assertEqual(random_data, random_data_copy)
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/webrtc/tools/py_event_log_analyzer/pb_parse.py b/webrtc/tools/py_event_log_analyzer/pb_parse.py
new file mode 100644
index 0000000..b1232f0
--- /dev/null
+++ b/webrtc/tools/py_event_log_analyzer/pb_parse.py
@@ -0,0 +1,50 @@
+#  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+#
+#  Use of this source code is governed by a BSD-style license
+#  that can be found in the LICENSE file in the root of the source
+#  tree. An additional intellectual property rights grant can be found
+#  in the file PATENTS.  All contributing project authors may
+#  be found in the AUTHORS file in the root of the source tree.
+
+"""Parses protobuf RTC dumps."""
+
+from __future__ import division
+import struct
+import pyproto.webrtc.call.rtc_event_log_pb2 as rtc_pb
+
+
+class DataPoint(object):
+  """Simple container class for RTP events."""
+
+  def __init__(self, rtp_header_str, packet_size,
+               arrival_timestamp_us):
+    """Builds a data point by parsing an RTP header, size and arrival time.
+
+    RTP header structure is defined in RFC 3550 section 5.1.
+    """
+    self.size = packet_size
+    self.arrival_timestamp_ms = arrival_timestamp_us / 1000
+    header = struct.unpack_from("!HHII", rtp_header_str, 0)
+    (first2header_bytes, self.sequence_number, self.timestamp,
+     self.ssrc) = header
+    self.payload_type = first2header_bytes & 0b01111111
+
+
+def parse_protobuf(file_path):
+  """Parses RTC event log from protobuf file.
+
+  Args:
+       file_path: path to protobuf file of RTC event stream
+
+  Returns:
+    all RTP packet events from the event stream as a list of DataPoints
+  """
+  event_stream = rtc_pb.EventStream()
+  with open(file_path, "rb") as f:
+    event_stream.ParseFromString(f.read())
+
+  return [DataPoint(event.rtp_packet.header,
+                    event.rtp_packet.packet_length,
+                    event.timestamp_us)
+          for event in event_stream.stream
+          if event.HasField("rtp_packet")]
diff --git a/webrtc/tools/py_event_log_analyzer/rtp_analyzer.py b/webrtc/tools/py_event_log_analyzer/rtp_analyzer.py
new file mode 100644
index 0000000..f859837
--- /dev/null
+++ b/webrtc/tools/py_event_log_analyzer/rtp_analyzer.py
@@ -0,0 +1,260 @@
+#  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+#
+#  Use of this source code is governed by a BSD-style license
+#  that can be found in the LICENSE file in the root of the source
+#  tree. An additional intellectual property rights grant can be found
+#  in the file PATENTS.  All contributing project authors may
+#  be found in the AUTHORS file in the root of the source tree.
+
+"""Displays statistics and plots graphs from RTC protobuf dump."""
+
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import sys
+
+import matplotlib.pyplot as plt
+import numpy
+
+import misc
+import pb_parse
+
+
+class RTPStatistics(object):
+  """Has methods for calculating and plotting RTP stream statistics."""
+
+  BANDWIDTH_SMOOTHING_WINDOW_SIZE = 10
+
+  def __init__(self, data_points):
+    """Initializes object with data_points and computes simple statistics.
+
+    Computes percentages of number of packets and packet sizes by
+    SSRC.
+
+    Args:
+        data_points: list of pb_parse.DataPoints on which statistics are
+            calculated.
+
+    """
+
+    self.data_points = data_points
+    self.ssrc_frequencies = misc.normalize_counter(
+        collections.Counter([pt.ssrc for pt in self.data_points]))
+    self.ssrc_size_table = misc.ssrc_normalized_size_table(self.data_points)
+    self.bandwidth_kbps = None
+    self.smooth_bw_kbps = None
+
+  def print_ssrc_info(self, ssrc_id, ssrc):
+    """Prints packet and size statistics for a given SSRC.
+
+    Args:
+        ssrc_id: textual identifier of SSRC printed beside statistics for it.
+        ssrc: SSRC by which to filter data and display statistics
+    """
+    filtered_ssrc = [point for point in self.data_points if point.ssrc
+                     == ssrc]
+    payloads = misc.normalize_counter(
+        collections.Counter([point.payload_type for point in
+                             filtered_ssrc]))
+
+    payload_info = "payload type(s): {}".format(
+        ", ".join(str(payload) for payload in  payloads))
+    print("{} 0x{:x} {}, {:.2f}% packets, {:.2f}% data".format(
+        ssrc_id, ssrc, payload_info, self.ssrc_frequencies[ssrc] * 100,
+        self.ssrc_size_table[ssrc] * 100))
+    print("  packet sizes:")
+    (bin_counts, bin_bounds) = numpy.histogram([point.size for point in
+                                                filtered_ssrc], bins=5,
+                                               density=False)
+    bin_proportions = bin_counts / sum(bin_counts)
+    print("\n".join([
+        " {:.1f} - {:.1f}: {:.2f}%".format(bin_bounds[i], bin_bounds[i + 1],
+                                           bin_proportions[i] * 100)
+        for i in range(len(bin_proportions))
+    ]))
+
+  def choose_ssrc(self):
+    """Queries user for SSRC."""
+
+    if len(self.ssrc_frequencies) == 1:
+      chosen_ssrc = self.ssrc_frequencies[0][-1]
+      self.print_ssrc_info("", chosen_ssrc)
+      return chosen_ssrc
+
+    for (i, ssrc) in enumerate(self.ssrc_frequencies):
+      self.print_ssrc_info(i, ssrc)
+
+    while True:
+      chosen_index = int(misc.get_input("choose one> "))
+      if 0 <= chosen_index < len(self.ssrc_frequencies):
+        return list(self.ssrc_frequencies)[chosen_index]
+      else:
+        print("Invalid index!")
+
+  def filter_ssrc(self, chosen_ssrc):
+    """Filters and wraps data points.
+
+    Removes data points with `ssrc != chosen_ssrc`. Unwraps sequence
+    numbers and timestamps for the chosen selection.
+    """
+    self.data_points = [point for point in self.data_points if
+                        point.ssrc == chosen_ssrc]
+    unwrapped_sequence_numbers = misc.unwrap(
+        [point.sequence_number for point in self.data_points], 2**16 - 1)
+    for (data_point, sequence_number) in zip(self.data_points,
+                                             unwrapped_sequence_numbers):
+      data_point.sequence_number = sequence_number
+
+    unwrapped_timestamps = misc.unwrap([point.timestamp for point in
+                                        self.data_points], 2**32 - 1)
+
+    for (data_point, timestamp) in zip(self.data_points,
+                                       unwrapped_timestamps):
+      data_point.timestamp = timestamp
+
+  def print_sequence_number_statistics(self):
+    seq_no_set = set(point.sequence_number for point in
+                     self.data_points)
+    print("Missing sequence numbers: {} out of {}".format(
+        max(seq_no_set) - min(seq_no_set) + 1 - len(seq_no_set),
+        len(seq_no_set)
+    ))
+    print("Duplicated packets: {}".format(len(self.data_points) -
+                                          len(seq_no_set)))
+    print("Reordered packets: {}".format(
+        misc.count_reordered([point.sequence_number for point in
+                              self.data_points])))
+
+  def estimate_frequency(self):
+    """Estimates frequency and updates data.
+
+    Guesses the most probable frequency by looking at changes in
+    timestamps (RFC 3550 section 5.1), calculates clock drifts and
+    sending time of packets. Updates `self.data_points` with changes
+    in delay and send time.
+    """
+    delta_timestamp = (self.data_points[-1].timestamp -
+                       self.data_points[0].timestamp)
+    delta_arr_timestamp = float((self.data_points[-1].arrival_timestamp_ms -
+                                 self.data_points[0].arrival_timestamp_ms))
+    freq_est = delta_timestamp / delta_arr_timestamp
+
+    freq_vec = [8, 16, 32, 48, 90]
+    freq = None
+    for f in freq_vec:
+      if abs((freq_est - f) / f) < 0.05:
+        freq = f
+
+    print("Estimated frequency: {}kHz".format(freq_est))
+    if freq is None:
+      freq = int(misc.get_input(
+          "Frequency could not be guessed. Input frequency (in kHz)> "))
+    else:
+      print("Guessed frequency: {}kHz".format(freq))
+
+    for point in self.data_points:
+      point.real_send_time_ms = (point.timestamp -
+                                 self.data_points[0].timestamp) / freq
+      point.delay = point.arrival_timestamp_ms -point.real_send_time_ms
+
+  def print_duration_statistics(self):
+    """Prints delay, clock drift and bitrate statistics."""
+
+    min_delay = min(point.delay for point in self.data_points)
+
+    for point in self.data_points:
+      point.absdelay = point.delay - min_delay
+
+    stream_duration_sender = self.data_points[-1].real_send_time_ms / 1000
+    print("Stream duration at sender: {:.1f} seconds".format(
+        stream_duration_sender
+    ))
+
+    arrival_timestamps_ms = [point.arrival_timestamp_ms for point in
+                             self.data_points]
+    stream_duration_receiver = (max(arrival_timestamps_ms) -
+                                min(arrival_timestamps_ms)) / 1000
+    print("Stream duration at receiver: {:.1f} seconds".format(
+        stream_duration_receiver
+    ))
+
+    print("Clock drift: {:.2f}%".format(
+        100 * (stream_duration_receiver / stream_duration_sender - 1)
+    ))
+
+    total_size = sum(point.size for point in self.data_points) * 8 / 1000
+    print("Send average bitrate: {:.2f} kbps".format(
+        total_size / stream_duration_sender))
+
+    print("Receive average bitrate: {:.2f} kbps".format(
+        total_size / stream_duration_receiver))
+
+  def remove_reordered(self):
+    last = self.data_points[0]
+    data_points_ordered = [last]
+    for point in self.data_points[1:]:
+      if point.sequence_number > last.sequence_number and (
+          point.real_send_time_ms > last.real_send_time_ms):
+        data_points_ordered.append(point)
+        last = point
+    self.data_points = data_points_ordered
+
+  def compute_bandwidth(self):
+    """Computes bandwidth averaged over several consecutive packets.
+
+    The number of consecutive packets used in the average is
+    BANDWIDTH_SMOOTHING_WINDOW_SIZE. Averaging is done with
+    numpy.correlate.
+    """
+    self.bandwidth_kbps = []
+    for i in range(len(self.data_points) - 1):
+      self.bandwidth_kbps.append(self.data_points[i].size * 8 /
+                                 (self.data_points[i +
+                                                   1].real_send_time_ms -
+                                  self.data_points[i].real_send_time_ms)
+                                )
+    correlate_filter = (numpy.ones(
+        RTPStatistics.BANDWIDTH_SMOOTHING_WINDOW_SIZE) /
+                        RTPStatistics.BANDWIDTH_SMOOTHING_WINDOW_SIZE)
+    self.smooth_bw_kbps = numpy.correlate(self.bandwidth_kbps, correlate_filter)
+
+  def plot_statistics(self):
+    """Plots changes in delay and average bandwidth."""
+    plt.figure(1)
+    plt.plot([f.real_send_time_ms / 1000 for f in self.data_points],
+             [f.absdelay for f in self.data_points])
+    plt.xlabel("Send time [s]")
+    plt.ylabel("Relative transport delay [ms]")
+
+    plt.figure(2)
+    plt.plot([f.real_send_time_ms / 1000 for f in
+              self.data_points][:len(self.smooth_bw_kbps)],
+             self.smooth_bw_kbps[:len(self.data_points)])
+    plt.xlabel("Send time [s]")
+    plt.ylabel("Bandwidth [kbps]")
+
+    plt.show()
+
+
+def main():
+  if len(sys.argv) < 2:
+    print("Usage: python rtp_analyzer.py <filename of rtc event log>")
+    sys.exit(0)
+
+  data_points = pb_parse.parse_protobuf(sys.argv[1])
+  rtp_stats = RTPStatistics(data_points)
+  chosen_ssrc = rtp_stats.choose_ssrc()
+  print("Chosen SSRC: 0X{:X}".format(chosen_ssrc))
+
+  rtp_stats.filter_ssrc(chosen_ssrc)
+  print("Statistics:")
+  rtp_stats.print_sequence_number_statistics()
+  rtp_stats.estimate_frequency()
+  rtp_stats.print_duration_statistics()
+  rtp_stats.remove_reordered()
+  rtp_stats.compute_bandwidth()
+  rtp_stats.plot_statistics()
+
+if __name__ == "__main__":
+  main()
diff --git a/webrtc/tools/py_event_log_analyzer/rtp_analyzer.sh b/webrtc/tools/py_event_log_analyzer/rtp_analyzer.sh
new file mode 100755
index 0000000..7727d59
--- /dev/null
+++ b/webrtc/tools/py_event_log_analyzer/rtp_analyzer.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+#  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+#
+#  Use of this source code is governed by a BSD-style license
+#  that can be found in the LICENSE file in the root of the source
+#  tree. An additional intellectual property rights grant can be found
+#  in the file PATENTS.  All contributing project authors may
+#  be found in the AUTHORS file in the root of the source tree.
+set -e
+cd $(dirname $0)
+PYTHONPATH="../../third_party/protobuf/python/"
+if [ -z ${PYTHON_EXECUTABLE+x} ]
+then
+    PYTHON_EXECUTABLE=python3
+fi
+exec $PYTHON_EXECUTABLE "rtp_analyzer.py" $@
diff --git a/webrtc/tools/tools.gyp b/webrtc/tools/tools.gyp
index 977ea00..998319e 100644
--- a/webrtc/tools/tools.gyp
+++ b/webrtc/tools/tools.gyp
@@ -177,6 +177,26 @@
             }],
           ],
         }, # tools_unittests
+        {
+          'target_name': 'rtp_analyzer',
+          'type': 'none',
+          'variables': {
+            'copy_output_dir%': '<(PRODUCT_DIR)',
+          },
+          'copies': [
+            {
+              'destination': '<(copy_output_dir)/',
+              'files': [
+                'py_event_log_analyzer/misc.py',
+                'py_event_log_analyzer/pb_parse.py',
+                'py_event_log_analyzer/rtp_analyzer.py',
+                'py_event_log_analyzer/rtp_analyzer.sh',
+              ]
+            },
+          ],
+          'dependencies': [ '<(webrtc_root)/webrtc.gyp:rtc_event_log_proto' ],
+          'process_outputs_as_sources': 1,
+        }, # rtp_analyzer
       ], # targets
       'conditions': [
         ['OS=="android"', {