Merge branch 'joelagnel-for-line'
diff --git a/tests/test_base.py b/tests/test_base.py
index c186ecc..96b8d96 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -85,7 +85,7 @@
         in_data = """     kworker/4:1-397   [004]   720.741315: thermal_power_cpu_get: cpus=000000f0 freq=1900000 raw_cpu_power=1259 load={} power=61
      kworker/4:1-397   [004]   720.741349: thermal_power_cpu_get: cpus=0000000f freq=1400000 raw_cpu_power=189 load={} power=14"""
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "cpus", "freq",
+        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "cpus", "freq",
                                 "raw_cpu_power", "power"])
 
         with open("trace.txt", "w") as fout:
@@ -121,7 +121,7 @@
                         timestamp
                         )
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "tag"])
+        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "tag"])
 
         with open("trace.txt", "w") as fout:
             fout.write(in_data)
@@ -145,7 +145,7 @@
 
         in_data = """     rcu_preempt-7     [000]    73.604532: my_sched_stat_runtime:   comm=Space separated taskname pid=7 runtime=262875 [ns] vruntime=17096359856 [ns]"""
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "comm", "pid", "runtime", "vruntime"])
+        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"])
 
         with open("trace.txt", "w") as fout:
             fout.write(in_data)
@@ -203,13 +203,26 @@
         self.assertEquals(round(thrm.data_frame.index[0], 7), 0)
         self.assertEquals(round(last_time - expected_last_time, 7), 0)
 
+    def test_line_num(self):
+        """TestBase: Test line number functionality"""
+        trace = trappy.FTrace()
+        self.assertEquals(trace.lines, 804)
+
+        df = trace.thermal.data_frame
+        self.assertEquals(df.iloc[0]['__line'], 0);
+        self.assertEquals(df.iloc[-1]['__line'], 792);
+
+        df = trace.thermal_governor.data_frame
+        self.assertEquals(df.iloc[0]['__line'], 11);
+        self.assertEquals(df.iloc[-1]['__line'], 803)
+
     def test_equals_in_field_value(self):
         """TestBase: Can parse events with fields with values containing '='"""
         trace = trappy.FTrace("trace_equals.txt", events=['equals_event'])
 
         df = trace.equals_event.data_frame
         self.assertSetEqual(set(df.columns),
-                            set(["__comm", "__pid", "__cpu", "my_field"]))
+                            set(["__comm", "__pid", "__cpu", "__line", "my_field"]))
         self.assertListEqual(df["my_field"].tolist(),
                              ["foo", "foo=bar", "foo=bar=baz", 1,
                               "1=2", "1=foo", "1foo=2"])
diff --git a/tests/test_ftrace.py b/tests/test_ftrace.py
index 389d31f..1377c68 100644
--- a/tests/test_ftrace.py
+++ b/tests/test_ftrace.py
@@ -424,6 +424,10 @@
         """Test with a matching unique but no special fields"""
         version_parser = trappy.register_dynamic_ftrace("Version", "version")
 
+        # Append invalid line to file
+        with open("trace.txt", "a") as fil:
+            fil.write("version = 6")
+
         with self.assertRaises(ValueError):
             trappy.FTrace(scope="custom")
 
diff --git a/trappy/base.py b/trappy/base.py
index 93ce60c..4502c77 100644
--- a/trappy/base.py
+++ b/trappy/base.py
@@ -106,6 +106,7 @@
         self.fallback = fallback
         self.tracer = None
         self.data_frame = pd.DataFrame()
+        self.line_array = []
         self.data_array = []
         self.time_array = []
         self.comm_array = []
@@ -150,7 +151,7 @@
 
         return ret
 
-    def append_data(self, time, comm, pid, cpu, data):
+    def append_data(self, time, comm, pid, cpu, line, data):
         """Append data parsed from a line to the corresponding arrays
 
         The :mod:`DataFrame` will be created from this when the whole trace
@@ -175,6 +176,7 @@
         self.comm_array.append(comm)
         self.pid_array.append(pid)
         self.cpu_array.append(cpu)
+        self.line_array.append(line)
         self.data_array.append(data)
 
     def generate_data_dict(self, data_str):
@@ -205,9 +207,10 @@
         check_memory_usage = True
         check_memory_count = 1
 
-        for (comm, pid, cpu, data_str) in zip(self.comm_array, self.pid_array,
-                                              self.cpu_array, self.data_array):
-            data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu}
+        for (comm, pid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
+                                              self.cpu_array, self.line_array,
+                                              self.data_array):
+            data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu, "__line": line}
             data_dict.update(self.generate_data_dict(data_str))
 
             # When running out of memory, Pandas has been observed to segfault
@@ -242,6 +245,7 @@
         self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx)
 
         self.time_array = []
+        self.line_array = []
         self.comm_array = []
         self.pid_array = []
         self.cpu_array = []
diff --git a/trappy/ftrace.py b/trappy/ftrace.py
index 5bd6872..23189d1 100644
--- a/trappy/ftrace.py
+++ b/trappy/ftrace.py
@@ -46,6 +46,12 @@
         trappy.plot_utils.plot_hist(allfreqs[actor], ax, this_title, "KHz", 20,
                              "Frequency", xlim, "default")
 
+SPECIAL_FIELDS_RE = re.compile(
+                        r"^\s*(?P<comm>.*)-(?P<pid>\d+)(?:\s+\(.*\))"\
+                        r"?\s+\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
+                        r"(?P<timestamp>[0-9]+\.[0-9]+): (\w+:\s+)+(?P<data>.+)"
+)
+
 class GenericFTrace(BareTrace):
     """Generic class to parse output of FTrace.  This class is meant to be
 subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
@@ -168,16 +174,14 @@
                     return True
             return False
 
-        fields_regexp = r"^\s*(?P<comm>.*)-(?P<pid>\d+)(?:\s+\(.*\))"\
-                                r"?\s+\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
-                                r"(?P<timestamp>[0-9]+\.[0-9]+): (\w+:\s+)+(?P<data>.+)"
-        fields_regexp = re.compile(fields_regexp)
-
         actual_trace = itertools.dropwhile(self.trace_hasnt_started(), fin)
         actual_trace = itertools.takewhile(self.trace_hasnt_finished(),
                                            actual_trace)
 
-        for line in itertools.ifilter(contains_unique_word, actual_trace):
+        for line in actual_trace:
+            if not contains_unique_word(line):
+                self.lines += 1
+                continue
             for unique_word, cls in cls_for_unique_word.iteritems():
                 if unique_word in line:
                     trace_class = cls
@@ -189,9 +193,9 @@
 
             line = line[:-1]
 
-            fields_match = fields_regexp.match(line)
+            fields_match = SPECIAL_FIELDS_RE.match(line)
             if not fields_match:
-                raise FTraceParseError("Couldn't match special fields in '{}'".format(line))
+                raise FTraceParseError("Couldn't match fields in '{}'".format(line))
             comm = fields_match.group('comm')
             pid = int(fields_match.group('pid'))
             cpu = int(fields_match.group('cpu'))
@@ -203,6 +207,7 @@
 
             if (timestamp < window[0] + self.basetime) or \
                (timestamp < abs_window[0]):
+                self.lines += 1
                 continue
 
             if (window[1] and timestamp > window[1] + self.basetime) or \
@@ -212,7 +217,8 @@
             # Remove empty arrays from the trace
             data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
 
-            trace_class.append_data(timestamp, comm, pid, cpu, data_str)
+            trace_class.append_data(timestamp, comm, pid, cpu, self.lines, data_str)
+            self.lines += 1
 
     def trace_hasnt_started(self):
         """Return a function that accepts a line and returns true if this line
@@ -226,7 +232,7 @@
         started).
 
         """
-        return lambda x: False
+        return lambda line: not SPECIAL_FIELDS_RE.match(line)
 
     def trace_hasnt_finished(self):
         """Return a function that accepts a line and returns true if this line
@@ -264,6 +270,7 @@
 
         try:
             with open(trace_file) as fin:
+                self.lines = 0
                 self.__populate_data(
                     fin, cls_for_unique_word, window, abs_window)
         except FTraceParseError as e:
@@ -583,6 +590,6 @@
                     setattr(self, "_" + match.group(1), match.group(2))
                     metadata_keys.remove(match.group(1))
 
-                if re.search(r"^\s+[^\[]+-\d+\s+\[\d+\]\s+\d+\.\d+:", line):
+                if SPECIAL_FIELDS_RE.match(line):
                     # Reached a valid trace line, abort metadata population
                     return
diff --git a/trappy/systrace.py b/trappy/systrace.py
index e18abf8..0a7f42b 100644
--- a/trappy/systrace.py
+++ b/trappy/systrace.py
@@ -27,24 +27,26 @@
     the headers that start with #
 
     """
-    def __init__(self):
+    def __init__(self, tracer):
         self.before_begin_trace = True
-        self.before_script_trace_data = True
         self.before_actual_trace = True
+        self.tracer = tracer
 
     def __call__(self, line):
         if self.before_begin_trace:
             if line.startswith("<!-- BEGIN TRACE -->") or \
                line.startswith("<title>Android System Trace</title>"):
                 self.before_begin_trace = False
-        elif self.before_script_trace_data:
+        elif self.before_actual_trace:
             if line.startswith('  <script class="trace-data"') or \
                line.startswith("  var linuxPerfData"):
-                self.before_script_trace_data = False
-        elif not line.startswith("#"):
-            self.before_actual_trace = False
+                self.before_actual_trace = False
 
-        return self.before_actual_trace
+        if not self.before_actual_trace:
+            base_call = super(SysTrace, self.tracer).trace_hasnt_started()
+            return base_call(line)
+        else:
+            return True
 
 class SysTrace(GenericFTrace):
     """A wrapper that parses all events of a SysTrace run
@@ -67,7 +69,7 @@
             pass
 
     def trace_hasnt_started(self):
-        return drop_before_trace()
+        return drop_before_trace(self)
 
     def trace_hasnt_finished(self):
         """Return a function that returns True while the current line is still part of the trace