trace_processor: make utid lookup in sched table fast

We almost always want to join sched with the thread table to get more
information about the slice. This CL adds a index for the utid column
to allow efficient lookups of utid in the sched table.

For example for the example trace the following query:
select sum(dur) from sched join thread using(utid) where upid = 15;

Old code: 30ms
New code 0.3ms

This sort of query is used by the UI to do summarization of the time
scheduled for each process.

Change-Id: I51a655ff1af7bdbdbe6f79eadcb9fa2896e70a4d
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h
index 3c4d008..b417d27 100644
--- a/src/trace_processor/trace_storage.h
+++ b/src/trace_processor/trace_storage.h
@@ -174,6 +174,7 @@
       start_ns_.emplace_back(start_ns);
       durations_.emplace_back(duration_ns);
       utids_.emplace_back(utid);
+      rows_for_utids_.emplace(utid, slice_count() - 1);
       return slice_count() - 1;
     }
 
@@ -191,6 +192,10 @@
 
     const std::deque<UniqueTid>& utids() const { return utids_; }
 
+    const std::multimap<UniqueTid, uint32_t>& rows_for_utids() const {
+      return rows_for_utids_;
+    }
+
    private:
     // Each deque below has the same number of entries (the number of slices
     // in the trace for the CPU).
@@ -198,6 +203,7 @@
     std::deque<int64_t> start_ns_;
     std::deque<int64_t> durations_;
     std::deque<UniqueTid> utids_;
+    std::multimap<UniqueTid, uint32_t> rows_for_utids_;
   };
 
   class NestableSlices {