blob: bf186bb90b0def1ec1acf3e5d9241bb0b127e378 [file] [log] [blame]
Primiano Tucci50446cb2020-02-14 10:40:23 +00001/*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "src/trace_processor/importers/ninja/ninja_log_parser.h"
Lalit Maganti1534bb72021-04-23 14:11:26 +010018
Primiano Tucci50446cb2020-02-14 10:40:23 +000019#include "perfetto/ext/base/string_splitter.h"
20#include "perfetto/ext/base/string_utils.h"
Lalit Maganti617deae2020-04-14 21:00:49 +010021#include "src/trace_processor/importers/common/process_tracker.h"
Lalit Magantid5c45f42020-04-14 21:01:50 +010022#include "src/trace_processor/importers/common/slice_tracker.h"
Lalit Maganti617deae2020-04-14 21:00:49 +010023#include "src/trace_processor/importers/common/track_tracker.h"
Primiano Tucci50446cb2020-02-14 10:40:23 +000024#include "src/trace_processor/storage/trace_storage.h"
25#include "src/trace_processor/trace_sorter.h"
Primiano Tucci50446cb2020-02-14 10:40:23 +000026
27namespace perfetto {
28namespace trace_processor {
29
30using base::StringSplitter;
31
32NinjaLogParser::NinjaLogParser(TraceProcessorContext* ctx) : ctx_(ctx) {}
33NinjaLogParser::~NinjaLogParser() = default;
34
Primiano Tucci3264b592021-11-08 18:20:51 +000035util::Status NinjaLogParser::Parse(TraceBlobView blob) {
Primiano Tucci50446cb2020-02-14 10:40:23 +000036 // A trace is read in chunks of arbitrary size (for http fetch() pipeliniing),
37 // not necessarily aligned on a line boundary.
38 // Here we push everything into a vector and, on each call, consume only
39 // the leading part until the last \n, keeping the rest for the next call.
Primiano Tucci3264b592021-11-08 18:20:51 +000040 const char* src = reinterpret_cast<const char*>(blob.data());
41 log_.insert(log_.end(), src, src + blob.size());
Primiano Tucci50446cb2020-02-14 10:40:23 +000042
43 // Find the last \n.
44 size_t valid_size = log_.size();
45 for (; valid_size > 0 && log_[valid_size - 1] != '\n'; --valid_size) {
46 }
47
48 for (StringSplitter line(log_.data(), valid_size, '\n'); line.Next();) {
49 static const char kHeader[] = "# ninja log v";
50 if (!header_parsed_) {
51 if (!base::StartsWith(line.cur_token(), kHeader))
52 return util::ErrStatus("Failed to parse ninja log header");
53 header_parsed_ = true;
54 auto version = base::CStringToUInt32(line.cur_token() + strlen(kHeader));
55 if (!version || *version != 5)
56 return util::ErrStatus("Unsupported ninja log version");
57 continue;
58 }
59
60 // Each line in the ninja log looks like this:
61 // 4 12 1579224178 ui/assets/modal.scss 832a958a9e234dfa
62 // Where:
63 // - [4, 12] are the timestamps in ms of [start, end] of the job, measured
64 // from the beginning of the build.
65 // - 1579224178 is the "restat" (ignored).
66 // - ui/assets/modal.scss is the name of the output file being built.
67 // - 832a958a9e234dfa is a hash of the compiler invocation.
68 // In most cases, each hash should be unique per ninja invocation (because
69 // two rules shouln't generate the same output). However, in rare
70 // circumstances the same hash can show up more than once. Examples:
71 // - A GN action generates > 1 output per invocation (e.g., protos). In this
72 // case all items will have the same [start, end] timestamp. In this case
73 // we want to merge all the output names into one build step, because from
74 // the build system viewpoint, that was the same compiler/tool invocation.
75 // - A subtle script that generates different outputs without taking a
76 // --output=filename argument (e.g. via env vars or similar). Note that
77 // this happens in the perfetto codebase itself (goto.google.com/nigew).
78 // In this case we want to treat the two entries as two distinct jobs.
79 //
80 // In summary the deduping logic here is: if both the hash and the
81 // timestamps match -> merge, if not, keep distinct.
82 StringSplitter tok(&line, '\t');
83 auto t_start = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
84 auto t_end = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
85 tok.Next(); // Ignore restat.
86 const char* name = tok.Next() ? tok.cur_token() : nullptr;
87 auto cmdhash = base::CStringToUInt64(tok.Next() ? tok.cur_token() : "", 16);
88
89 if (!t_start || !t_end || !name || !cmdhash) {
90 ctx_->storage->IncrementStats(stats::ninja_parse_errors);
91 continue;
92 }
93
94 // The same log file can contain timestamps for different builds. The only
95 // way we can tell when a new build starts is by detecting the end timestamp
96 // breaking monotonicity.
97 if (last_end_seen_ == 0 || *t_end < last_end_seen_) {
98 // Create a new "process" for each build. In the UI this causes each build
99 // to be nested under a track group. |cur_build_id_| is the fake pid
100 // of the synthesized process.
101 ++cur_build_id_;
102 StringId name_id = ctx_->storage->InternString("Build");
103 ctx_->process_tracker->SetProcessNameIfUnset(
104 ctx_->process_tracker->GetOrCreateProcess(cur_build_id_), name_id);
105 }
106 last_end_seen_ = *t_end;
107
108 // If more hashes show up back-to-back with the same timestamps, merge them
109 // together as they identify multiple outputs for the same build rule.
110 if (!jobs_.empty() && *cmdhash == jobs_.back().hash &&
111 *t_start == jobs_.back().start_ms && *t_end == jobs_.back().end_ms) {
112 jobs_.back().names.append(" ");
113 jobs_.back().names.append(name);
114 continue;
115 }
116
117 jobs_.emplace_back(cur_build_id_, *t_start, *t_end, *cmdhash, name);
118 }
119 log_.erase(log_.begin(), log_.begin() + static_cast<ssize_t>(valid_size));
120 return util::OkStatus();
121}
122
123// This is called after the last Parase() call. At this point all |jobs_| have
124// been populated.
125void NinjaLogParser::NotifyEndOfFile() {
126 std::sort(jobs_.begin(), jobs_.end(),
127 [](const Job& x, const Job& y) { return x.start_ms < y.start_ms; });
128
129 // Now we need to work out the job parallelism. There's no direct indication
130 // of that in the ninja logs, so it must be inferred by observing overlapping
131 // of timestamps. In this context a "Worker" is an inferred sequence of jobs
132 // that happened concurrently with other sequences.
133 // Here we pack jobs according the following heuristic, for the sake of making
134 // the graph nicer to read to humans. Consider the initial situation:
135 // 1: [ job 1 ]
136 // 2: [ job 2 ]
137 // 3: [ job 3 ]
138 // T=0 | T=6
139 // Assume that a new job starts at T=6. It's very likely that job4 was started
140 // as a consequence of job2 completion (othewise it could have been started
141 // earlier, soon after job 1 or Job 3). It seems to make more sense to draw
142 // it next in the 2nd worker, i.e. next to job 2.
143 struct Worker {
144 int64_t busy_until;
145 TrackId track_id;
146 };
147 std::map<uint32_t /*build_id*/, std::vector<Worker>> workers_by_build;
148
149 // Assign thread ids to worker without conflicting with builds' process ids
150 // (to avoid main-thread auto-mapping).s
151 uint32_t last_worker_id = cur_build_id_;
152
153 for (const auto& job : jobs_) {
154 Worker* worker = nullptr;
155 auto& workers = workers_by_build[job.build_id];
156 for (Worker& cur : workers) {
157 // Pick the worker which has the greatest end time (busy_until) <= the
158 // job's start time.
159 if (cur.busy_until <= job.start_ms) {
160 if (!worker || cur.busy_until > worker->busy_until)
161 worker = &cur;
162 }
163 }
164 if (worker) {
165 // Update the worker's end time with the newly assigned job.
166 worker->busy_until = job.end_ms;
167 } else {
168 // All workers are busy, allocate a new one.
169 uint32_t worker_id = ++last_worker_id;
Primiano Tucci3dcdff22021-10-13 15:08:35 +0100170 base::StackString<32> name("Worker %zu", workers.size() + 1);
171 StringId name_id = ctx_->storage->InternString(name.string_view());
Primiano Tucci50446cb2020-02-14 10:40:23 +0000172 auto utid = ctx_->process_tracker->UpdateThread(worker_id, job.build_id);
Mikhail Khokhlov642b8352020-07-24 10:04:39 +0100173 ctx_->process_tracker->UpdateThreadNameByUtid(utid, name_id,
174 ThreadNamePriority::kOther);
Primiano Tucci50446cb2020-02-14 10:40:23 +0000175 TrackId track_id = ctx_->track_tracker->InternThreadTrack(utid);
176 workers.emplace_back(Worker{/*busy_until=*/job.end_ms, track_id});
177 worker = &workers.back();
178 }
179
180 static constexpr int64_t kMsToNs = 1000 * 1000;
181 const int64_t start_ns = job.start_ms * kMsToNs;
182 const int64_t dur_ns = (job.end_ms - job.start_ms) * kMsToNs;
183 StringId name_id = ctx_->storage->InternString(base::StringView(job.names));
184 ctx_->slice_tracker->Scoped(start_ns, worker->track_id, StringId::Null(),
185 name_id, dur_ns);
186 }
187}
188
189} // namespace trace_processor
190} // namespace perfetto