Primiano Tucci | 50446cb | 2020-02-14 10:40:23 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2020 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "src/trace_processor/importers/ninja/ninja_log_parser.h" |
Lalit Maganti | 1534bb7 | 2021-04-23 14:11:26 +0100 | [diff] [blame] | 18 | |
Primiano Tucci | 50446cb | 2020-02-14 10:40:23 +0000 | [diff] [blame] | 19 | #include "perfetto/ext/base/string_splitter.h" |
| 20 | #include "perfetto/ext/base/string_utils.h" |
Lalit Maganti | 617deae | 2020-04-14 21:00:49 +0100 | [diff] [blame] | 21 | #include "src/trace_processor/importers/common/process_tracker.h" |
Lalit Maganti | d5c45f4 | 2020-04-14 21:01:50 +0100 | [diff] [blame] | 22 | #include "src/trace_processor/importers/common/slice_tracker.h" |
Lalit Maganti | 617deae | 2020-04-14 21:00:49 +0100 | [diff] [blame] | 23 | #include "src/trace_processor/importers/common/track_tracker.h" |
Primiano Tucci | 50446cb | 2020-02-14 10:40:23 +0000 | [diff] [blame] | 24 | #include "src/trace_processor/storage/trace_storage.h" |
| 25 | #include "src/trace_processor/trace_sorter.h" |
Primiano Tucci | 50446cb | 2020-02-14 10:40:23 +0000 | [diff] [blame] | 26 | |
| 27 | namespace perfetto { |
| 28 | namespace trace_processor { |
| 29 | |
| 30 | using base::StringSplitter; |
| 31 | |
| 32 | NinjaLogParser::NinjaLogParser(TraceProcessorContext* ctx) : ctx_(ctx) {} |
| 33 | NinjaLogParser::~NinjaLogParser() = default; |
| 34 | |
Primiano Tucci | 3264b59 | 2021-11-08 18:20:51 +0000 | [diff] [blame^] | 35 | util::Status NinjaLogParser::Parse(TraceBlobView blob) { |
Primiano Tucci | 50446cb | 2020-02-14 10:40:23 +0000 | [diff] [blame] | 36 | // A trace is read in chunks of arbitrary size (for http fetch() pipeliniing), |
| 37 | // not necessarily aligned on a line boundary. |
| 38 | // Here we push everything into a vector and, on each call, consume only |
| 39 | // the leading part until the last \n, keeping the rest for the next call. |
Primiano Tucci | 3264b59 | 2021-11-08 18:20:51 +0000 | [diff] [blame^] | 40 | const char* src = reinterpret_cast<const char*>(blob.data()); |
| 41 | log_.insert(log_.end(), src, src + blob.size()); |
Primiano Tucci | 50446cb | 2020-02-14 10:40:23 +0000 | [diff] [blame] | 42 | |
| 43 | // Find the last \n. |
| 44 | size_t valid_size = log_.size(); |
| 45 | for (; valid_size > 0 && log_[valid_size - 1] != '\n'; --valid_size) { |
| 46 | } |
| 47 | |
| 48 | for (StringSplitter line(log_.data(), valid_size, '\n'); line.Next();) { |
| 49 | static const char kHeader[] = "# ninja log v"; |
| 50 | if (!header_parsed_) { |
| 51 | if (!base::StartsWith(line.cur_token(), kHeader)) |
| 52 | return util::ErrStatus("Failed to parse ninja log header"); |
| 53 | header_parsed_ = true; |
| 54 | auto version = base::CStringToUInt32(line.cur_token() + strlen(kHeader)); |
| 55 | if (!version || *version != 5) |
| 56 | return util::ErrStatus("Unsupported ninja log version"); |
| 57 | continue; |
| 58 | } |
| 59 | |
| 60 | // Each line in the ninja log looks like this: |
| 61 | // 4 12 1579224178 ui/assets/modal.scss 832a958a9e234dfa |
| 62 | // Where: |
| 63 | // - [4, 12] are the timestamps in ms of [start, end] of the job, measured |
| 64 | // from the beginning of the build. |
| 65 | // - 1579224178 is the "restat" (ignored). |
| 66 | // - ui/assets/modal.scss is the name of the output file being built. |
| 67 | // - 832a958a9e234dfa is a hash of the compiler invocation. |
| 68 | // In most cases, each hash should be unique per ninja invocation (because |
| 69 | // two rules shouln't generate the same output). However, in rare |
| 70 | // circumstances the same hash can show up more than once. Examples: |
| 71 | // - A GN action generates > 1 output per invocation (e.g., protos). In this |
| 72 | // case all items will have the same [start, end] timestamp. In this case |
| 73 | // we want to merge all the output names into one build step, because from |
| 74 | // the build system viewpoint, that was the same compiler/tool invocation. |
| 75 | // - A subtle script that generates different outputs without taking a |
| 76 | // --output=filename argument (e.g. via env vars or similar). Note that |
| 77 | // this happens in the perfetto codebase itself (goto.google.com/nigew). |
| 78 | // In this case we want to treat the two entries as two distinct jobs. |
| 79 | // |
| 80 | // In summary the deduping logic here is: if both the hash and the |
| 81 | // timestamps match -> merge, if not, keep distinct. |
| 82 | StringSplitter tok(&line, '\t'); |
| 83 | auto t_start = base::CStringToInt64(tok.Next() ? tok.cur_token() : ""); |
| 84 | auto t_end = base::CStringToInt64(tok.Next() ? tok.cur_token() : ""); |
| 85 | tok.Next(); // Ignore restat. |
| 86 | const char* name = tok.Next() ? tok.cur_token() : nullptr; |
| 87 | auto cmdhash = base::CStringToUInt64(tok.Next() ? tok.cur_token() : "", 16); |
| 88 | |
| 89 | if (!t_start || !t_end || !name || !cmdhash) { |
| 90 | ctx_->storage->IncrementStats(stats::ninja_parse_errors); |
| 91 | continue; |
| 92 | } |
| 93 | |
| 94 | // The same log file can contain timestamps for different builds. The only |
| 95 | // way we can tell when a new build starts is by detecting the end timestamp |
| 96 | // breaking monotonicity. |
| 97 | if (last_end_seen_ == 0 || *t_end < last_end_seen_) { |
| 98 | // Create a new "process" for each build. In the UI this causes each build |
| 99 | // to be nested under a track group. |cur_build_id_| is the fake pid |
| 100 | // of the synthesized process. |
| 101 | ++cur_build_id_; |
| 102 | StringId name_id = ctx_->storage->InternString("Build"); |
| 103 | ctx_->process_tracker->SetProcessNameIfUnset( |
| 104 | ctx_->process_tracker->GetOrCreateProcess(cur_build_id_), name_id); |
| 105 | } |
| 106 | last_end_seen_ = *t_end; |
| 107 | |
| 108 | // If more hashes show up back-to-back with the same timestamps, merge them |
| 109 | // together as they identify multiple outputs for the same build rule. |
| 110 | if (!jobs_.empty() && *cmdhash == jobs_.back().hash && |
| 111 | *t_start == jobs_.back().start_ms && *t_end == jobs_.back().end_ms) { |
| 112 | jobs_.back().names.append(" "); |
| 113 | jobs_.back().names.append(name); |
| 114 | continue; |
| 115 | } |
| 116 | |
| 117 | jobs_.emplace_back(cur_build_id_, *t_start, *t_end, *cmdhash, name); |
| 118 | } |
| 119 | log_.erase(log_.begin(), log_.begin() + static_cast<ssize_t>(valid_size)); |
| 120 | return util::OkStatus(); |
| 121 | } |
| 122 | |
| 123 | // This is called after the last Parase() call. At this point all |jobs_| have |
| 124 | // been populated. |
| 125 | void NinjaLogParser::NotifyEndOfFile() { |
| 126 | std::sort(jobs_.begin(), jobs_.end(), |
| 127 | [](const Job& x, const Job& y) { return x.start_ms < y.start_ms; }); |
| 128 | |
| 129 | // Now we need to work out the job parallelism. There's no direct indication |
| 130 | // of that in the ninja logs, so it must be inferred by observing overlapping |
| 131 | // of timestamps. In this context a "Worker" is an inferred sequence of jobs |
| 132 | // that happened concurrently with other sequences. |
| 133 | // Here we pack jobs according the following heuristic, for the sake of making |
| 134 | // the graph nicer to read to humans. Consider the initial situation: |
| 135 | // 1: [ job 1 ] |
| 136 | // 2: [ job 2 ] |
| 137 | // 3: [ job 3 ] |
| 138 | // T=0 | T=6 |
| 139 | // Assume that a new job starts at T=6. It's very likely that job4 was started |
| 140 | // as a consequence of job2 completion (othewise it could have been started |
| 141 | // earlier, soon after job 1 or Job 3). It seems to make more sense to draw |
| 142 | // it next in the 2nd worker, i.e. next to job 2. |
| 143 | struct Worker { |
| 144 | int64_t busy_until; |
| 145 | TrackId track_id; |
| 146 | }; |
| 147 | std::map<uint32_t /*build_id*/, std::vector<Worker>> workers_by_build; |
| 148 | |
| 149 | // Assign thread ids to worker without conflicting with builds' process ids |
| 150 | // (to avoid main-thread auto-mapping).s |
| 151 | uint32_t last_worker_id = cur_build_id_; |
| 152 | |
| 153 | for (const auto& job : jobs_) { |
| 154 | Worker* worker = nullptr; |
| 155 | auto& workers = workers_by_build[job.build_id]; |
| 156 | for (Worker& cur : workers) { |
| 157 | // Pick the worker which has the greatest end time (busy_until) <= the |
| 158 | // job's start time. |
| 159 | if (cur.busy_until <= job.start_ms) { |
| 160 | if (!worker || cur.busy_until > worker->busy_until) |
| 161 | worker = &cur; |
| 162 | } |
| 163 | } |
| 164 | if (worker) { |
| 165 | // Update the worker's end time with the newly assigned job. |
| 166 | worker->busy_until = job.end_ms; |
| 167 | } else { |
| 168 | // All workers are busy, allocate a new one. |
| 169 | uint32_t worker_id = ++last_worker_id; |
Primiano Tucci | 3dcdff2 | 2021-10-13 15:08:35 +0100 | [diff] [blame] | 170 | base::StackString<32> name("Worker %zu", workers.size() + 1); |
| 171 | StringId name_id = ctx_->storage->InternString(name.string_view()); |
Primiano Tucci | 50446cb | 2020-02-14 10:40:23 +0000 | [diff] [blame] | 172 | auto utid = ctx_->process_tracker->UpdateThread(worker_id, job.build_id); |
Mikhail Khokhlov | 642b835 | 2020-07-24 10:04:39 +0100 | [diff] [blame] | 173 | ctx_->process_tracker->UpdateThreadNameByUtid(utid, name_id, |
| 174 | ThreadNamePriority::kOther); |
Primiano Tucci | 50446cb | 2020-02-14 10:40:23 +0000 | [diff] [blame] | 175 | TrackId track_id = ctx_->track_tracker->InternThreadTrack(utid); |
| 176 | workers.emplace_back(Worker{/*busy_until=*/job.end_ms, track_id}); |
| 177 | worker = &workers.back(); |
| 178 | } |
| 179 | |
| 180 | static constexpr int64_t kMsToNs = 1000 * 1000; |
| 181 | const int64_t start_ns = job.start_ms * kMsToNs; |
| 182 | const int64_t dur_ns = (job.end_ms - job.start_ms) * kMsToNs; |
| 183 | StringId name_id = ctx_->storage->InternString(base::StringView(job.names)); |
| 184 | ctx_->slice_tracker->Scoped(start_ns, worker->track_id, StringId::Null(), |
| 185 | name_id, dur_ns); |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | } // namespace trace_processor |
| 190 | } // namespace perfetto |