Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2018 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Eric Seckler | d8b5208 | 2019-10-17 15:58:38 +0100 | [diff] [blame] | 17 | #include "src/trace_processor/importers/systrace/systrace_trace_parser.h" |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 18 | |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 19 | #include "perfetto/base/logging.h" |
| 20 | #include "perfetto/ext/base/string_splitter.h" |
Primiano Tucci | 2c5488f | 2019-06-01 03:27:28 +0100 | [diff] [blame] | 21 | #include "perfetto/ext/base/string_utils.h" |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 22 | #include "src/trace_processor/importers/common/process_tracker.h" |
Lalit Maganti | 2f0b41d | 2020-02-27 13:35:39 +0000 | [diff] [blame] | 23 | #include "src/trace_processor/trace_sorter.h" |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 24 | |
Bruno Buss | db8546e | 2020-01-17 12:35:05 +0000 | [diff] [blame] | 25 | #include <cctype> |
Primiano Tucci | 58d2dc6 | 2021-06-24 16:03:24 +0100 | [diff] [blame] | 26 | #include <cinttypes> |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 27 | #include <string> |
| 28 | #include <unordered_map> |
| 29 | |
| 30 | namespace perfetto { |
| 31 | namespace trace_processor { |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 32 | namespace { |
| 33 | |
| 34 | std::vector<base::StringView> SplitOnSpaces(base::StringView str) { |
| 35 | std::vector<base::StringView> result; |
| 36 | for (size_t i = 0; i < str.size(); ++i) { |
| 37 | // Consume all spaces. |
| 38 | for (; i < str.size() && str.data()[i] == ' '; ++i) |
| 39 | ; |
| 40 | // If we haven't reached the end consume all non-spaces and add result. |
| 41 | if (i != str.size()) { |
| 42 | size_t start = i; |
| 43 | for (; i < str.size() && str.data()[i] != ' '; ++i) |
| 44 | ; |
| 45 | result.push_back(base::StringView(str.data() + start, i - start)); |
| 46 | } |
| 47 | } |
| 48 | return result; |
| 49 | } |
| 50 | |
| 51 | bool IsProcessDumpShortHeader(const std::vector<base::StringView>& tokens) { |
| 52 | return tokens.size() == 4 && tokens[0] == "USER" && tokens[1] == "PID" && |
| 53 | tokens[2] == "TID" && tokens[3] == "CMD"; |
| 54 | } |
| 55 | |
| 56 | bool IsProcessDumpLongHeader(const std::vector<base::StringView>& tokens) { |
| 57 | return tokens.size() > 4 && tokens[0] == "USER" && tokens[1] == "PID" && |
| 58 | tokens[2] == "PPID" && tokens[3] == "VSZ"; |
| 59 | } |
| 60 | |
| 61 | } // namespace |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 62 | |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 63 | SystraceTraceParser::SystraceTraceParser(TraceProcessorContext* ctx) |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 64 | : line_parser_(ctx), ctx_(ctx) {} |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 65 | SystraceTraceParser::~SystraceTraceParser() = default; |
| 66 | |
Primiano Tucci | 3264b59 | 2021-11-08 18:20:51 +0000 | [diff] [blame^] | 67 | util::Status SystraceTraceParser::Parse(TraceBlobView blob) { |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 68 | if (state_ == ParseState::kEndOfSystrace) |
| 69 | return util::OkStatus(); |
Primiano Tucci | 3264b59 | 2021-11-08 18:20:51 +0000 | [diff] [blame^] | 70 | partial_buf_.insert(partial_buf_.end(), blob.data(), |
| 71 | blob.data() + blob.size()); |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 72 | |
| 73 | if (state_ == ParseState::kBeforeParse) { |
| 74 | state_ = partial_buf_[0] == '<' ? ParseState::kHtmlBeforeSystrace |
| 75 | : ParseState::kSystrace; |
| 76 | } |
| 77 | |
Isabelle Taylor | 8b4740b | 2019-10-25 10:25:24 +0100 | [diff] [blame] | 78 | // There can be multiple trace data sections in an HTML trace, we want to |
| 79 | // ignore any that don't contain systrace data. In the future it would be |
| 80 | // good to also parse the process dump section. |
| 81 | const char kTraceDataSection[] = |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 82 | R"(<script class="trace-data" type="application/text">)"; |
| 83 | auto start_it = partial_buf_.begin(); |
| 84 | for (;;) { |
| 85 | auto line_it = std::find(start_it, partial_buf_.end(), '\n'); |
| 86 | if (line_it == partial_buf_.end()) |
| 87 | break; |
| 88 | |
| 89 | std::string buffer(start_it, line_it); |
Isabelle Taylor | 8b4740b | 2019-10-25 10:25:24 +0100 | [diff] [blame] | 90 | |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 91 | if (state_ == ParseState::kHtmlBeforeSystrace) { |
Isabelle Taylor | 8b4740b | 2019-10-25 10:25:24 +0100 | [diff] [blame] | 92 | if (base::Contains(buffer, kTraceDataSection)) { |
| 93 | state_ = ParseState::kTraceDataSection; |
| 94 | } |
| 95 | } else if (state_ == ParseState::kTraceDataSection) { |
Lalit Maganti | 9cee2d6 | 2021-02-01 16:19:50 +0000 | [diff] [blame] | 96 | if (base::StartsWith(buffer, "#") && base::Contains(buffer, "TASK-PID")) { |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 97 | state_ = ParseState::kSystrace; |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 98 | } else if (base::StartsWith(buffer, "PROCESS DUMP")) { |
| 99 | state_ = ParseState::kProcessDumpLong; |
Lalit Maganti | 86dfcdb | 2020-08-19 17:25:51 +0100 | [diff] [blame] | 100 | } else if (base::StartsWith(buffer, "CGROUP DUMP")) { |
| 101 | state_ = ParseState::kCgroupDump; |
Isabelle Taylor | 8b4740b | 2019-10-25 10:25:24 +0100 | [diff] [blame] | 102 | } else if (base::Contains(buffer, R"(</script>)")) { |
| 103 | state_ = ParseState::kHtmlBeforeSystrace; |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 104 | } |
| 105 | } else if (state_ == ParseState::kSystrace) { |
| 106 | if (base::Contains(buffer, R"(</script>)")) { |
Isabelle Taylor | 8b4740b | 2019-10-25 10:25:24 +0100 | [diff] [blame] | 107 | state_ = ParseState::kEndOfSystrace; |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 108 | break; |
Lalit Maganti | 5e7e148 | 2020-03-31 13:23:25 +0100 | [diff] [blame] | 109 | } else if (!base::StartsWith(buffer, "#") && !buffer.empty()) { |
Lalit Maganti | 2f0b41d | 2020-02-27 13:35:39 +0000 | [diff] [blame] | 110 | SystraceLine line; |
| 111 | util::Status status = line_tokenizer_.Tokenize(buffer, &line); |
Lalit Maganti | c00223d | 2020-07-15 13:17:07 +0100 | [diff] [blame] | 112 | if (status.ok()) { |
| 113 | line_parser_.ParseLine(std::move(line)); |
| 114 | } else { |
| 115 | ctx_->storage->IncrementStats(stats::systrace_parse_failure); |
| 116 | } |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 117 | } |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 118 | } else if (state_ == ParseState::kProcessDumpLong || |
| 119 | state_ == ParseState::kProcessDumpShort) { |
| 120 | if (base::Contains(buffer, R"(</script>)")) { |
| 121 | state_ = ParseState::kHtmlBeforeSystrace; |
| 122 | } else { |
| 123 | std::vector<base::StringView> tokens = |
| 124 | SplitOnSpaces(base::StringView(buffer)); |
| 125 | if (IsProcessDumpShortHeader(tokens)) { |
| 126 | state_ = ParseState::kProcessDumpShort; |
| 127 | } else if (IsProcessDumpLongHeader(tokens)) { |
| 128 | state_ = ParseState::kProcessDumpLong; |
| 129 | } else if (state_ == ParseState::kProcessDumpLong && |
| 130 | tokens.size() >= 10) { |
| 131 | // Format is: |
| 132 | // user pid ppid vsz rss wchan pc s name my cmd line |
| 133 | const base::Optional<uint32_t> pid = |
| 134 | base::StringToUInt32(tokens[1].ToStdString()); |
| 135 | const base::Optional<uint32_t> ppid = |
| 136 | base::StringToUInt32(tokens[2].ToStdString()); |
Hector Dearman | 1389d0b | 2020-04-23 13:15:46 +0100 | [diff] [blame] | 137 | base::StringView name = tokens[8]; |
| 138 | // Command line may contain spaces, merge all remaining tokens: |
| 139 | const char* cmd_start = tokens[9].data(); |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 140 | base::StringView cmd( |
Hector Dearman | 1389d0b | 2020-04-23 13:15:46 +0100 | [diff] [blame] | 141 | cmd_start, |
| 142 | static_cast<size_t>((buffer.data() + buffer.size()) - cmd_start)); |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 143 | if (!pid || !ppid) { |
| 144 | PERFETTO_ELOG("Could not parse line '%s'", buffer.c_str()); |
| 145 | return util::ErrStatus("Could not parse PROCESS DUMP line"); |
| 146 | } |
Lalit Maganti | 4662a20 | 2020-07-08 13:09:52 +0100 | [diff] [blame] | 147 | ctx_->process_tracker->SetProcessMetadata(pid.value(), ppid, name, |
| 148 | base::StringView()); |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 149 | } else if (state_ == ParseState::kProcessDumpShort && |
| 150 | tokens.size() >= 4) { |
| 151 | // Format is: |
| 152 | // username pid tid my cmd line |
| 153 | const base::Optional<uint32_t> tgid = |
| 154 | base::StringToUInt32(tokens[1].ToStdString()); |
| 155 | const base::Optional<uint32_t> tid = |
| 156 | base::StringToUInt32(tokens[2].ToStdString()); |
Hector Dearman | 1389d0b | 2020-04-23 13:15:46 +0100 | [diff] [blame] | 157 | // Command line may contain spaces, merge all remaining tokens: |
| 158 | const char* cmd_start = tokens[3].data(); |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 159 | base::StringView cmd( |
Hector Dearman | 1389d0b | 2020-04-23 13:15:46 +0100 | [diff] [blame] | 160 | cmd_start, |
| 161 | static_cast<size_t>((buffer.data() + buffer.size()) - cmd_start)); |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 162 | StringId cmd_id = |
| 163 | ctx_->storage->mutable_string_pool()->InternString(cmd); |
| 164 | if (!tid || !tgid) { |
| 165 | PERFETTO_ELOG("Could not parse line '%s'", buffer.c_str()); |
| 166 | return util::ErrStatus("Could not parse PROCESS DUMP line"); |
| 167 | } |
Mikhail Khokhlov | 642b835 | 2020-07-24 10:04:39 +0100 | [diff] [blame] | 168 | UniqueTid utid = |
| 169 | ctx_->process_tracker->UpdateThread(tid.value(), tgid.value()); |
| 170 | ctx_->process_tracker->UpdateThreadNameByUtid( |
| 171 | utid, cmd_id, ThreadNamePriority::kOther); |
Hector Dearman | b6390cc | 2020-04-22 12:54:19 +0100 | [diff] [blame] | 172 | } |
| 173 | } |
Lalit Maganti | 86dfcdb | 2020-08-19 17:25:51 +0100 | [diff] [blame] | 174 | } else if (state_ == ParseState::kCgroupDump) { |
| 175 | if (base::Contains(buffer, R"(</script>)")) { |
| 176 | state_ = ParseState::kHtmlBeforeSystrace; |
| 177 | } |
| 178 | // TODO(lalitm): see if it is important to parse this. |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 179 | } |
| 180 | start_it = line_it + 1; |
| 181 | } |
| 182 | if (state_ == ParseState::kEndOfSystrace) { |
| 183 | partial_buf_.clear(); |
| 184 | } else { |
| 185 | partial_buf_.erase(partial_buf_.begin(), start_it); |
| 186 | } |
| 187 | return util::OkStatus(); |
| 188 | } |
| 189 | |
Primiano Tucci | 40da82f | 2020-02-13 18:04:35 +0000 | [diff] [blame] | 190 | void SystraceTraceParser::NotifyEndOfFile() {} |
| 191 | |
Lalit Maganti | d54d752 | 2019-05-30 14:36:08 +0100 | [diff] [blame] | 192 | } // namespace trace_processor |
| 193 | } // namespace perfetto |