trace_processor: reduce memory + ingestion time for utid map

Since utids are dense, we can replace the hash map with a deque for
free and gain a lot in performance + memory use.

On a 1GB trace
Memory consumption: 1240MB -> 883MB
Ingestion Speed 80 MB/s -> 122 MB/s

Change-Id: Ic806edcc737344f31b78c5fa28606b07e694cf77
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h
index 63e34ff..8b5402c 100644
--- a/src/trace_processor/trace_storage.h
+++ b/src/trace_processor/trace_storage.h
@@ -188,7 +188,10 @@
       utids_.emplace_back(utid);
       end_states_.emplace_back(end_state);
       priorities_.emplace_back(priority);
-      rows_for_utids_.emplace(utid, slice_count() - 1);
+
+      if (utid >= rows_for_utids_.size())
+        rows_for_utids_.resize(utid + 1);
+      rows_for_utids_[utid].emplace_back(slice_count() - 1);
       return slice_count() - 1;
     }
 
@@ -216,7 +219,7 @@
 
     const std::deque<int32_t>& priorities() const { return priorities_; }
 
-    const std::multimap<UniqueTid, uint32_t>& rows_for_utids() const {
+    const std::deque<std::vector<uint32_t>>& rows_for_utids() const {
       return rows_for_utids_;
     }
 
@@ -229,7 +232,9 @@
     std::deque<UniqueTid> utids_;
     std::deque<ftrace_utils::TaskState> end_states_;
     std::deque<int32_t> priorities_;
-    std::multimap<UniqueTid, uint32_t> rows_for_utids_;
+
+    // One row per utid.
+    std::deque<std::vector<uint32_t>> rows_for_utids_;
   };
 
   class NestableSlices {