blob: 49aae70f7eddbc84b51a9dc6a50ceb4f2174bc54 [file] [log] [blame]
lakshmana5d24e0b2017-08-14 14:18:43 -07001// Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "perf_parser.h"
6
7#include <fcntl.h>
8#include <stdint.h>
9#include <stdio.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12#include <unistd.h>
lakshmanad825e1e2017-11-03 10:26:31 -070013#include <algorithm>
lakshmana5d24e0b2017-08-14 14:18:43 -070014
15#include <memory>
16#include <set>
17#include <sstream>
18
19#include "base/logging.h"
20
21#include "address_mapper.h"
22#include "binary_data_utils.h"
23#include "compat/proto.h"
24#include "compat/string.h"
25#include "dso.h"
26#include "huge_page_deducer.h"
27
28namespace quipper {
29
30using BranchStackEntry = PerfDataProto_BranchStackEntry;
31using CommEvent = PerfDataProto_CommEvent;
32using ForkEvent = PerfDataProto_ForkEvent;
33using MMapEvent = PerfDataProto_MMapEvent;
lakshmana5d24e0b2017-08-14 14:18:43 -070034using SampleEvent = PerfDataProto_SampleEvent;
35
36namespace {
37
38// MMAPs are aligned to pages of this many bytes.
39const uint64_t kMmapPageAlignment = sysconf(_SC_PAGESIZE);
40
41// Name and ID of the kernel swapper process.
42const char kSwapperCommandName[] = "swapper";
43const uint32_t kSwapperPid = 0;
44
45// Returns the offset within a page of size |kMmapPageAlignment|, given an
46// address. Requires that |kMmapPageAlignment| be a power of 2.
47uint64_t GetPageAlignedOffset(uint64_t addr) {
48 return addr % kMmapPageAlignment;
49}
50
51bool IsNullBranchStackEntry(const BranchStackEntry& entry) {
52 return (!entry.from_ip() && !entry.to_ip());
53}
54
55} // namespace
56
57PerfParser::PerfParser(PerfReader* reader) : reader_(reader) {}
58
59PerfParser::~PerfParser() {}
60
61PerfParser::PerfParser(PerfReader* reader, const PerfParserOptions& options)
lakshmana3fa0a5f2018-02-08 12:20:56 -080062 : reader_(reader), options_(options) {}
lakshmana5d24e0b2017-08-14 14:18:43 -070063
64bool PerfParser::ParseRawEvents() {
65 if (options_.sort_events_by_time) {
66 reader_->MaybeSortEventsByTime();
67 }
68
69 // Just in case there was data from a previous call.
70 process_mappers_.clear();
71
ckennelly3a88ebd2017-08-31 20:46:41 +020072 // Find huge page mappings.
73 if (options_.deduce_huge_page_mappings) {
vlankhaara6b8f1a2018-02-13 09:29:24 -080074 DeduceHugePages(reader_->mutable_events());
ckennelly3a88ebd2017-08-31 20:46:41 +020075 }
76
77 // Combine split mappings. Because the remapping process makes addresses
78 // contiguous, we cannot try to combine mappings in these situations (as we
79 // collapse maps that were non-contiguous).
80 if (options_.combine_mappings && !options_.do_remap) {
81 CombineMappings(reader_->mutable_events());
lakshmana5d24e0b2017-08-14 14:18:43 -070082 }
83
84 // Clear the parsed events to reset their fields. Otherwise, non-sample events
85 // may have residual DSO+offset info.
86 parsed_events_.clear();
87
88 // Events of type PERF_RECORD_FINISHED_ROUND don't have a timestamp, and are
89 // not needed.
90 // use the partial-sorting of events between rounds to sort faster.
91 parsed_events_.resize(reader_->events().size());
92 size_t write_index = 0;
93 for (int i = 0; i < reader_->events().size(); ++i) {
94 if (reader_->events().Get(i).header().type() == PERF_RECORD_FINISHED_ROUND)
95 continue;
96 parsed_events_[write_index++].event_ptr =
97 reader_->mutable_events()->Mutable(i);
98 }
99 parsed_events_.resize(write_index);
100
101 ProcessEvents();
102
lakshmana3fa0a5f2018-02-08 12:20:56 -0800103 if (!options_.discard_unused_events) return true;
lakshmana5d24e0b2017-08-14 14:18:43 -0700104
105 // Some MMAP/MMAP2 events' mapped regions will not have any samples. These
106 // MMAP/MMAP2 events should be dropped. |parsed_events_| should be
107 // reconstructed without these events.
108 write_index = 0;
109 size_t read_index;
110 for (read_index = 0; read_index < parsed_events_.size(); ++read_index) {
111 const ParsedEvent& event = parsed_events_[read_index];
112 if (event.event_ptr->has_mmap_event() &&
113 event.num_samples_in_mmap_region == 0) {
114 continue;
115 }
lakshmana3fa0a5f2018-02-08 12:20:56 -0800116 if (read_index != write_index) parsed_events_[write_index] = event;
lakshmana5d24e0b2017-08-14 14:18:43 -0700117 ++write_index;
118 }
119 CHECK_LE(write_index, parsed_events_.size());
120 parsed_events_.resize(write_index);
121
122 // Update the events in |reader_| to match the updated events.
123 UpdatePerfEventsFromParsedEvents();
124
125 return true;
126}
127
lannadorai91a5cde2018-01-18 09:38:44 -0800128bool PerfParser::ProcessUserEvents(PerfEvent& event) {
129 // New user events from PERF-4.13 is not yet supported
130 VLOG(1) << "Unsupported event type: " << event.header().type();
131 return true;
132}
133
lakshmana5d24e0b2017-08-14 14:18:43 -0700134bool PerfParser::ProcessEvents() {
135 stats_ = {0};
136
lakshmana3fa0a5f2018-02-08 12:20:56 -0800137 stats_.did_remap = false; // Explicitly clear the remap flag.
lakshmana5d24e0b2017-08-14 14:18:43 -0700138
139 // Pid 0 is called the swapper process. Even though perf does not record a
140 // COMM event for pid 0, we act like we did receive a COMM event for it. Perf
141 // does this itself, example:
142 // http://lxr.free-electrons.com/source/tools/perf/util/session.c#L1120
143 commands_.insert(kSwapperCommandName);
144 pidtid_to_comm_map_[std::make_pair(kSwapperPid, kSwapperPid)] =
145 &(*commands_.find(kSwapperCommandName));
146
147 // NB: Not necessarily actually sorted by time.
148 for (size_t i = 0; i < parsed_events_.size(); ++i) {
149 ParsedEvent& parsed_event = parsed_events_[i];
150 PerfEvent& event = *parsed_event.event_ptr;
lannadorai91a5cde2018-01-18 09:38:44 -0800151
152 // Process user events
153 if (event.header().type() >= PERF_RECORD_USER_TYPE_START) {
154 if (!ProcessUserEvents(event)) {
155 return false;
156 }
157 continue;
158 }
159
lakshmana5d24e0b2017-08-14 14:18:43 -0700160 switch (event.header().type()) {
161 case PERF_RECORD_SAMPLE:
162 // SAMPLE doesn't have any fields to log at a fixed,
163 // previously-endian-swapped location. This used to log ip.
164 VLOG(1) << "SAMPLE";
165 ++stats_.num_sample_events;
lakshmana3fa0a5f2018-02-08 12:20:56 -0800166 if (MapSampleEvent(&parsed_event)) ++stats_.num_sample_events_mapped;
lakshmana5d24e0b2017-08-14 14:18:43 -0700167 break;
168 case PERF_RECORD_MMAP:
lakshmana3fa0a5f2018-02-08 12:20:56 -0800169 case PERF_RECORD_MMAP2: {
lakshmana5d24e0b2017-08-14 14:18:43 -0700170 const char* mmap_type_name =
171 event.header().type() == PERF_RECORD_MMAP ? "MMAP" : "MMAP2";
172 VLOG(1) << mmap_type_name << ": " << event.mmap_event().filename();
173 ++stats_.num_mmap_events;
174 // Use the array index of the current mmap event as a unique identifier.
175 CHECK(MapMmapEvent(event.mutable_mmap_event(), i))
176 << "Unable to map " << mmap_type_name << " event!";
177 // No samples in this MMAP region yet, hopefully.
178 parsed_event.num_samples_in_mmap_region = 0;
179 DSOInfo dso_info;
180 dso_info.name = event.mmap_event().filename();
181 if (event.header().type() == PERF_RECORD_MMAP2) {
182 dso_info.maj = event.mmap_event().maj();
183 dso_info.min = event.mmap_event().min();
184 dso_info.ino = event.mmap_event().ino();
185 }
186 name_to_dso_.emplace(dso_info.name, dso_info);
187 break;
188 }
189 case PERF_RECORD_FORK:
lakshmana3fa0a5f2018-02-08 12:20:56 -0800190 // clang-format off
lakshmana5d24e0b2017-08-14 14:18:43 -0700191 VLOG(1) << "FORK: " << event.fork_event().ppid()
192 << ":" << event.fork_event().ptid()
193 << " -> " << event.fork_event().pid()
194 << ":" << event.fork_event().tid();
lakshmana3fa0a5f2018-02-08 12:20:56 -0800195 // clang-format on
lakshmana5d24e0b2017-08-14 14:18:43 -0700196 ++stats_.num_fork_events;
197 CHECK(MapForkEvent(event.fork_event())) << "Unable to map FORK event!";
198 break;
199 case PERF_RECORD_EXIT:
200 // EXIT events have the same structure as FORK events.
lakshmana3fa0a5f2018-02-08 12:20:56 -0800201 // clang-format off
lakshmana5d24e0b2017-08-14 14:18:43 -0700202 VLOG(1) << "EXIT: " << event.fork_event().ppid()
203 << ":" << event.fork_event().ptid();
lakshmana3fa0a5f2018-02-08 12:20:56 -0800204 // clang-format on
lakshmana5d24e0b2017-08-14 14:18:43 -0700205 ++stats_.num_exit_events;
206 break;
207 case PERF_RECORD_COMM:
208 {
lakshmana3fa0a5f2018-02-08 12:20:56 -0800209 // clang-format off
lakshmana5d24e0b2017-08-14 14:18:43 -0700210 VLOG(1) << "COMM: " << event.comm_event().pid()
211 << ":" << event.comm_event().tid() << ": "
212 << event.comm_event().comm();
lakshmana3fa0a5f2018-02-08 12:20:56 -0800213 // clang-format on
lakshmana5d24e0b2017-08-14 14:18:43 -0700214 ++stats_.num_comm_events;
215 CHECK(MapCommEvent(event.comm_event()));
216 commands_.insert(event.comm_event().comm());
lakshmana3fa0a5f2018-02-08 12:20:56 -0800217 const PidTid pidtid =
218 std::make_pair(event.comm_event().pid(), event.comm_event().tid());
lakshmana5d24e0b2017-08-14 14:18:43 -0700219 pidtid_to_comm_map_[pidtid] =
220 &(*commands_.find(event.comm_event().comm()));
221 break;
222 }
223 case PERF_RECORD_LOST:
224 case PERF_RECORD_THROTTLE:
225 case PERF_RECORD_UNTHROTTLE:
226 case PERF_RECORD_READ:
lakshmana5d24e0b2017-08-14 14:18:43 -0700227 VLOG(1) << "Parsed event type: " << event.header().type()
228 << ". Doing nothing.";
229 break;
lannadorai91a5cde2018-01-18 09:38:44 -0800230 case PERF_RECORD_AUX:
231 case PERF_RECORD_ITRACE_START:
232 case PERF_RECORD_LOST_SAMPLES:
233 case PERF_RECORD_SWITCH:
234 case PERF_RECORD_SWITCH_CPU_WIDE:
235 case PERF_RECORD_NAMESPACES:
236 VLOG(1) << "Parsed event type: " << event.header().type()
237 << ". Not yet supported.";
238 break;
lakshmana5d24e0b2017-08-14 14:18:43 -0700239 default:
240 LOG(ERROR) << "Unknown event type: " << event.header().type();
241 return false;
242 }
243 }
lakshmana3fa0a5f2018-02-08 12:20:56 -0800244 if (!FillInDsoBuildIds()) return false;
lakshmana5d24e0b2017-08-14 14:18:43 -0700245
246 // Print stats collected from parsing.
lakshmana3fa0a5f2018-02-08 12:20:56 -0800247 // clang-format off
lakshmana5d24e0b2017-08-14 14:18:43 -0700248 LOG(INFO) << "Parser processed: "
249 << stats_.num_mmap_events << " MMAP/MMAP2 events, "
250 << stats_.num_comm_events << " COMM events, "
251 << stats_.num_fork_events << " FORK events, "
252 << stats_.num_exit_events << " EXIT events, "
253 << stats_.num_sample_events << " SAMPLE events, "
254 << stats_.num_sample_events_mapped << " of these were mapped";
lakshmana3fa0a5f2018-02-08 12:20:56 -0800255 // clang-format on
lakshmana5d24e0b2017-08-14 14:18:43 -0700256
257 float sample_mapping_percentage =
258 static_cast<float>(stats_.num_sample_events_mapped) /
259 stats_.num_sample_events * 100.;
260 float threshold = options_.sample_mapping_percentage_threshold;
261 if (sample_mapping_percentage < threshold) {
262 LOG(ERROR) << "Mapped " << static_cast<int>(sample_mapping_percentage)
263 << "% of samples, expected at least "
264 << static_cast<int>(threshold) << "%";
265 return false;
266 }
267 stats_.did_remap = options_.do_remap;
268 return true;
269}
270
271namespace {
272
273class FdCloser {
274 public:
275 explicit FdCloser(int fd) : fd_(fd) {}
lakshmana3fa0a5f2018-02-08 12:20:56 -0800276 ~FdCloser() {
277 if (fd_ != -1) close(fd_);
278 }
279
lakshmana5d24e0b2017-08-14 14:18:43 -0700280 private:
281 FdCloser() = delete;
282 FdCloser(FdCloser&) = delete;
283
284 int fd_;
285};
286
lakshmanad825e1e2017-11-03 10:26:31 -0700287// Merges two uint32_t into a uint64_t for hashing in an unordered_set because
288// there is no default hash method for a pair.
289uint64_t mergeTwoU32(uint32_t first, uint32_t second) {
290 return (uint64_t)first << 32 | second;
291}
292
293// Splits a given uint64_t into two uint32_t. This reverts the above merge
294// operation to retrieve the two uint32_t from an unordered_set.
295std::pair<uint32_t, uint32_t> splitU64(uint64_t value) {
296 return std::make_pair(value >> 32,
297 std::numeric_limits<uint32_t>::max() & value);
298}
299
lakshmana5d24e0b2017-08-14 14:18:43 -0700300bool ReadElfBuildIdIfSameInode(const string& dso_path, const DSOInfo& dso,
301 string* buildid) {
302 int fd = open(dso_path.c_str(), O_RDONLY);
303 FdCloser fd_closer(fd);
304 if (fd == -1) {
lakshmana3fa0a5f2018-02-08 12:20:56 -0800305 if (errno != ENOENT) LOG(ERROR) << "Failed to open ELF file: " << dso_path;
lakshmana5d24e0b2017-08-14 14:18:43 -0700306 return false;
307 }
308
309 struct stat s;
310 CHECK_GE(fstat(fd, &s), 0);
311 // Only reject based on inode if we actually have device info (from MMAP2).
lakshmana3fa0a5f2018-02-08 12:20:56 -0800312 if (dso.maj != 0 && dso.min != 0 && !SameInode(dso, &s)) return false;
lakshmana5d24e0b2017-08-14 14:18:43 -0700313
314 return ReadElfBuildId(fd, buildid);
315}
316
317// Looks up build ID of a given DSO by reading directly from the file system.
318// - Does not support reading build ID of the main kernel binary.
319// - Reads build IDs of kernel modules and other DSOs using functions in dso.h.
320string FindDsoBuildId(const DSOInfo& dso_info) {
321 string buildid_bin;
322 const string& dso_name = dso_info.name;
lakshmana3fa0a5f2018-02-08 12:20:56 -0800323 if (IsKernelNonModuleName(dso_name)) return buildid_bin; // still empty
lakshmana5d24e0b2017-08-14 14:18:43 -0700324 // Does this look like a kernel module?
325 if (dso_name.size() >= 2 && dso_name[0] == '[' && dso_name.back() == ']') {
326 // This may not be successful, but either way, just return. buildid_bin
327 // will be empty if the module was not found.
lakshmana3fa0a5f2018-02-08 12:20:56 -0800328 ReadModuleBuildId(dso_name.substr(1, dso_name.size() - 2), &buildid_bin);
lakshmana5d24e0b2017-08-14 14:18:43 -0700329 return buildid_bin;
330 }
331 // Try normal files, possibly inside containers.
332 u32 last_pid = 0;
lakshmanad825e1e2017-11-03 10:26:31 -0700333 std::vector<uint64_t> threads(dso_info.threads.begin(),
334 dso_info.threads.end());
335 std::sort(threads.begin(), threads.end());
336 for (auto pidtid_it : threads) {
337 uint32_t pid, tid;
338 std::tie(pid, tid) = splitU64(pidtid_it);
lakshmana5d24e0b2017-08-14 14:18:43 -0700339 std::stringstream dso_path_stream;
340 dso_path_stream << "/proc/" << tid << "/root/" << dso_name;
341 string dso_path = dso_path_stream.str();
342 if (ReadElfBuildIdIfSameInode(dso_path, dso_info, &buildid_bin)) {
343 return buildid_bin;
344 }
345 // Avoid re-trying the parent process if it's the same for multiple threads.
346 // dso_info.threads is sorted, so threads in a process should be adjacent.
lakshmana3fa0a5f2018-02-08 12:20:56 -0800347 if (pid == last_pid || pid == tid) continue;
lakshmana5d24e0b2017-08-14 14:18:43 -0700348 last_pid = pid;
349 // Try the parent process:
350 std::stringstream parent_dso_path_stream;
351 parent_dso_path_stream << "/proc/" << pid << "/root/" << dso_name;
352 string parent_dso_path = parent_dso_path_stream.str();
353 if (ReadElfBuildIdIfSameInode(parent_dso_path, dso_info, &buildid_bin)) {
354 return buildid_bin;
355 }
356 }
357 // Still don't have a buildid. Try our own filesystem:
358 if (ReadElfBuildIdIfSameInode(dso_name, dso_info, &buildid_bin)) {
359 return buildid_bin;
360 }
361 return buildid_bin; // still empty.
362}
363
364} // namespace
365
366bool PerfParser::FillInDsoBuildIds() {
367 std::map<string, string> filenames_to_build_ids;
368 reader_->GetFilenamesToBuildIDs(&filenames_to_build_ids);
369
370 std::map<string, string> new_buildids;
371
372 for (std::pair<const string, DSOInfo>& kv : name_to_dso_) {
373 DSOInfo& dso_info = kv.second;
374 const auto it = filenames_to_build_ids.find(dso_info.name);
375 if (it != filenames_to_build_ids.end()) {
376 dso_info.build_id = it->second;
377 }
378 // If there is both an existing build ID and a new build ID returned by
379 // FindDsoBuildId(), overwrite the existing build ID.
380 if (options_.read_missing_buildids && dso_info.hit) {
381 string buildid_bin = FindDsoBuildId(dso_info);
382 if (!buildid_bin.empty()) {
383 dso_info.build_id = RawDataToHexString(buildid_bin);
384 new_buildids[dso_info.name] = dso_info.build_id;
385 }
386 }
387 }
388
lakshmana3fa0a5f2018-02-08 12:20:56 -0800389 if (new_buildids.empty()) return true;
lakshmana5d24e0b2017-08-14 14:18:43 -0700390 return reader_->InjectBuildIDs(new_buildids);
391}
392
393void PerfParser::UpdatePerfEventsFromParsedEvents() {
394 // Reorder the events in |reader_| to match the order of |parsed_events_|.
395 // The |event_ptr|'s in |parsed_events_| are pointers to existing events in
396 // |reader_|.
397 RepeatedPtrField<PerfEvent> new_events;
398 new_events.Reserve(parsed_events_.size());
399 for (ParsedEvent& parsed_event : parsed_events_) {
400 PerfEvent* new_event = new_events.Add();
401 new_event->Swap(parsed_event.event_ptr);
402 parsed_event.event_ptr = new_event;
403 }
404
405 reader_->mutable_events()->Swap(&new_events);
406}
407
408bool PerfParser::MapSampleEvent(ParsedEvent* parsed_event) {
409 bool mapping_failed = false;
410
411 const PerfEvent& event = *parsed_event->event_ptr;
412 if (!event.has_sample_event() ||
lakshmana3fa0a5f2018-02-08 12:20:56 -0800413 !(event.sample_event().has_ip() && event.sample_event().has_pid() &&
lakshmana5d24e0b2017-08-14 14:18:43 -0700414 event.sample_event().has_tid())) {
415 return false;
416 }
417 SampleEvent& sample_info = *parsed_event->event_ptr->mutable_sample_event();
418
419 // Find the associated command.
420 PidTid pidtid = std::make_pair(sample_info.pid(), sample_info.tid());
421 const auto comm_iter = pidtid_to_comm_map_.find(pidtid);
422 if (comm_iter != pidtid_to_comm_map_.end())
423 parsed_event->set_command(comm_iter->second);
424
425 const uint64_t unmapped_event_ip = sample_info.ip();
426 uint64_t remapped_event_ip = 0;
427
428 // Map the event IP itself.
lakshmana3fa0a5f2018-02-08 12:20:56 -0800429 if (!MapIPAndPidAndGetNameAndOffset(sample_info.ip(), pidtid,
lakshmana5d24e0b2017-08-14 14:18:43 -0700430 &remapped_event_ip,
431 &parsed_event->dso_and_offset)) {
432 mapping_failed = true;
433 } else {
434 sample_info.set_ip(remapped_event_ip);
435 }
436
437 if (sample_info.callchain_size() &&
lakshmana3fa0a5f2018-02-08 12:20:56 -0800438 !MapCallchain(sample_info.ip(), pidtid, unmapped_event_ip,
439 sample_info.mutable_callchain(), parsed_event)) {
lakshmana5d24e0b2017-08-14 14:18:43 -0700440 mapping_failed = true;
441 }
442
443 if (sample_info.branch_stack_size() &&
lakshmana3fa0a5f2018-02-08 12:20:56 -0800444 !MapBranchStack(pidtid, sample_info.mutable_branch_stack(),
lakshmana5d24e0b2017-08-14 14:18:43 -0700445 parsed_event)) {
446 mapping_failed = true;
447 }
448
449 return !mapping_failed;
450}
451
lakshmana3fa0a5f2018-02-08 12:20:56 -0800452bool PerfParser::MapCallchain(const uint64_t ip, const PidTid pidtid,
lakshmana5d24e0b2017-08-14 14:18:43 -0700453 const uint64_t original_event_addr,
454 RepeatedField<uint64>* callchain,
455 ParsedEvent* parsed_event) {
456 if (!callchain) {
457 LOG(ERROR) << "NULL call stack data.";
458 return false;
459 }
460
461 bool mapping_failed = false;
462
lakshmana3fa0a5f2018-02-08 12:20:56 -0800463 // If the callchain is empty, there is no work to do.
464 if (callchain->empty()) return true;
lakshmana5d24e0b2017-08-14 14:18:43 -0700465
466 // Keeps track of whether the current entry is kernel or user.
467 parsed_event->callchain.resize(callchain->size());
468 int num_entries_mapped = 0;
469 for (int i = 0; i < callchain->size(); ++i) {
470 uint64_t entry = callchain->Get(i);
471 // When a callchain context entry is found, do not attempt to symbolize it.
472 if (entry >= PERF_CONTEXT_MAX) {
473 continue;
474 }
475 // The sample address has already been mapped so no need to map it.
476 if (entry == original_event_addr) {
477 callchain->Set(i, ip);
478 continue;
479 }
480 uint64_t mapped_addr = 0;
481 if (!MapIPAndPidAndGetNameAndOffset(
lakshmana3fa0a5f2018-02-08 12:20:56 -0800482 entry, pidtid, &mapped_addr,
lakshmana5d24e0b2017-08-14 14:18:43 -0700483 &parsed_event->callchain[num_entries_mapped++])) {
484 mapping_failed = true;
485 } else {
486 callchain->Set(i, mapped_addr);
487 }
488 }
489 // Not all the entries were mapped. Trim |parsed_event->callchain| to
490 // remove unused entries at the end.
491 parsed_event->callchain.resize(num_entries_mapped);
492
493 return !mapping_failed;
494}
495
496bool PerfParser::MapBranchStack(
lakshmana3fa0a5f2018-02-08 12:20:56 -0800497 const PidTid pidtid, RepeatedPtrField<BranchStackEntry>* branch_stack,
lakshmana5d24e0b2017-08-14 14:18:43 -0700498 ParsedEvent* parsed_event) {
499 if (!branch_stack) {
500 LOG(ERROR) << "NULL branch stack data.";
501 return false;
502 }
503
504 // First, trim the branch stack to remove trailing null entries.
505 size_t trimmed_size = 0;
506 for (const BranchStackEntry& entry : *branch_stack) {
507 // Count the number of non-null entries before the first null entry.
lakshmana3fa0a5f2018-02-08 12:20:56 -0800508 if (IsNullBranchStackEntry(entry)) break;
lakshmana5d24e0b2017-08-14 14:18:43 -0700509 ++trimmed_size;
510 }
511
512 // If a null entry was found, make sure all subsequent null entries are NULL
513 // as well.
514 for (int i = trimmed_size; i < branch_stack->size(); ++i) {
515 const BranchStackEntry& entry = branch_stack->Get(i);
516 if (!IsNullBranchStackEntry(entry)) {
517 LOG(ERROR) << "Non-null branch stack entry found after null entry: "
518 << reinterpret_cast<void*>(entry.from_ip()) << " -> "
519 << reinterpret_cast<void*>(entry.to_ip());
520 return false;
521 }
522 }
523
524 // Map branch stack addresses.
525 parsed_event->branch_stack.resize(trimmed_size);
526 for (unsigned int i = 0; i < trimmed_size; ++i) {
527 BranchStackEntry* entry = branch_stack->Mutable(i);
528 ParsedEvent::BranchEntry& parsed_entry = parsed_event->branch_stack[i];
529
530 uint64_t from_mapped = 0;
lakshmana3fa0a5f2018-02-08 12:20:56 -0800531 if (!MapIPAndPidAndGetNameAndOffset(entry->from_ip(), pidtid, &from_mapped,
lakshmana5d24e0b2017-08-14 14:18:43 -0700532 &parsed_entry.from)) {
533 return false;
534 }
535 entry->set_from_ip(from_mapped);
536
537 uint64_t to_mapped = 0;
lakshmana3fa0a5f2018-02-08 12:20:56 -0800538 if (!MapIPAndPidAndGetNameAndOffset(entry->to_ip(), pidtid, &to_mapped,
lakshmana5d24e0b2017-08-14 14:18:43 -0700539 &parsed_entry.to)) {
540 return false;
541 }
542 entry->set_to_ip(to_mapped);
543
544 parsed_entry.predicted = !entry->mispredicted();
545 }
546
547 return true;
548}
549
550bool PerfParser::MapIPAndPidAndGetNameAndOffset(
lakshmana3fa0a5f2018-02-08 12:20:56 -0800551 uint64_t ip, PidTid pidtid, uint64_t* new_ip,
lakshmana5d24e0b2017-08-14 14:18:43 -0700552 ParsedEvent::DSOAndOffset* dso_and_offset) {
553 DCHECK(dso_and_offset);
554 // Attempt to find the synthetic address of the IP sample in this order:
555 // 1. Address space of the kernel.
556 // 2. Address space of its own process.
557 // 3. Address space of the parent process.
558
559 uint64_t mapped_addr = 0;
560
561 // Sometimes the first event we see is a SAMPLE event and we don't have the
562 // time to create an address mapper for a process. Example, for pid 0.
563 AddressMapper* mapper = GetOrCreateProcessMapper(pidtid.first).first;
lakshmanad1d18fc2017-11-03 12:33:31 -0700564 AddressMapper::MappingList::const_iterator ip_iter;
565 bool mapped =
566 mapper->GetMappedAddressAndListIterator(ip, &mapped_addr, &ip_iter);
lakshmana5d24e0b2017-08-14 14:18:43 -0700567 if (mapped) {
568 uint64_t id = UINT64_MAX;
lakshmanad1d18fc2017-11-03 12:33:31 -0700569 mapper->GetMappedIDAndOffset(ip, ip_iter, &id, &dso_and_offset->offset_);
lakshmana5d24e0b2017-08-14 14:18:43 -0700570 // Make sure the ID points to a valid event.
571 CHECK_LE(id, parsed_events_.size());
572 ParsedEvent& parsed_event = parsed_events_[id];
573 const auto& event = parsed_event.event_ptr;
574 DCHECK(event->has_mmap_event()) << "Expected MMAP or MMAP2 event";
575
576 // Find the mmap DSO filename in the set of known DSO names.
577 auto dso_iter = name_to_dso_.find(event->mmap_event().filename());
578 CHECK(dso_iter != name_to_dso_.end());
579 dso_and_offset->dso_info_ = &dso_iter->second;
580
581 dso_iter->second.hit = true;
lakshmanad825e1e2017-11-03 10:26:31 -0700582 dso_iter->second.threads.insert(mergeTwoU32(pidtid.first, pidtid.second));
lakshmana5d24e0b2017-08-14 14:18:43 -0700583 ++parsed_event.num_samples_in_mmap_region;
584
585 if (options_.do_remap) {
586 if (GetPageAlignedOffset(mapped_addr) != GetPageAlignedOffset(ip)) {
587 LOG(ERROR) << "Remapped address " << std::hex << mapped_addr << " "
588 << "does not have the same page alignment offset as "
589 << "original address " << ip;
590 return false;
591 }
592 *new_ip = mapped_addr;
593 } else {
594 *new_ip = ip;
595 }
596 }
597 return mapped;
598}
599
600bool PerfParser::MapMmapEvent(PerfDataProto_MMapEvent* event, uint64_t id) {
601 // We need to hide only the real kernel addresses. However, to make things
602 // more secure, and make the mapping idempotent, we should remap all
603 // addresses, both kernel and non-kernel.
604
605 AddressMapper* mapper = GetOrCreateProcessMapper(event->pid()).first;
606
607 uint64_t start = event->start();
608 uint64_t len = event->len();
609 uint64_t pgoff = event->pgoff();
610
611 // |id| == 0 corresponds to the kernel mmap. We have several cases here:
612 //
613 // For ARM and x86, in sudo mode, pgoff == start, example:
614 // start=0x80008200
615 // pgoff=0x80008200
616 // len =0xfffffff7ff7dff
617 //
618 // For x86-64, in sudo mode, pgoff is between start and start + len. SAMPLE
619 // events lie between pgoff and pgoff + length of the real kernel binary,
620 // example:
621 // start=0x3bc00000
622 // pgoff=0xffffffffbcc00198
623 // len =0xffffffff843fffff
624 // SAMPLE events will be found after pgoff. For kernels with ASLR, pgoff will
625 // be something only visible to the root user, and will be randomized at
626 // startup. With |remap| set to true, we should hide pgoff in this case. So we
627 // normalize all SAMPLE events relative to pgoff.
628 //
629 // For non-sudo mode, the kernel will be mapped from 0 to the pointer limit,
630 // example:
631 // start=0x0
632 // pgoff=0x0
633 // len =0xffffffff
634 if (id == 0) {
635 // If pgoff is between start and len, we normalize the event by setting
636 // start to be pgoff just like how it is for ARM and x86. We also set len to
637 // be a much smaller number (closer to the real length of the kernel binary)
638 // because SAMPLEs are actually only seen between |event->pgoff| and
639 // |event->pgoff + kernel text size|.
640 if (pgoff > start && pgoff < start + len) {
641 len = len + start - pgoff;
642 start = pgoff;
643 }
644 // For kernels with ALSR pgoff is critical information that should not be
645 // revealed when |remap| is true.
646 pgoff = 0;
647 }
648
649 if (!mapper->MapWithID(start, len, id, pgoff, true)) {
650 mapper->DumpToLog();
651 return false;
652 }
653
654 if (options_.do_remap) {
655 uint64_t mapped_addr;
lakshmanad1d18fc2017-11-03 12:33:31 -0700656 AddressMapper::MappingList::const_iterator start_iter;
657 if (!mapper->GetMappedAddressAndListIterator(start, &mapped_addr,
658 &start_iter)) {
lakshmana5d24e0b2017-08-14 14:18:43 -0700659 LOG(ERROR) << "Failed to map starting address " << std::hex << start;
660 return false;
661 }
662 if (GetPageAlignedOffset(mapped_addr) != GetPageAlignedOffset(start)) {
663 LOG(ERROR) << "Remapped address " << std::hex << mapped_addr << " "
664 << "does not have the same page alignment offset as start "
665 << "address " << start;
666 return false;
667 }
668
669 event->set_start(mapped_addr);
670 event->set_len(len);
671 event->set_pgoff(pgoff);
672 }
673 return true;
674}
675
676bool PerfParser::MapCommEvent(const PerfDataProto_CommEvent& event) {
677 GetOrCreateProcessMapper(event.pid());
678 return true;
679}
680
681bool PerfParser::MapForkEvent(const PerfDataProto_ForkEvent& event) {
682 PidTid parent = std::make_pair(event.ppid(), event.ptid());
683 PidTid child = std::make_pair(event.pid(), event.tid());
684 if (parent != child) {
685 auto parent_iter = pidtid_to_comm_map_.find(parent);
686 if (parent_iter != pidtid_to_comm_map_.end())
687 pidtid_to_comm_map_[child] = parent_iter->second;
688 }
689
690 const uint32_t pid = event.pid();
691
692 // If the parent and child pids are the same, this is just a new thread
693 // within the same process, so don't do anything.
lakshmana3fa0a5f2018-02-08 12:20:56 -0800694 if (event.ppid() == pid) return true;
lakshmana5d24e0b2017-08-14 14:18:43 -0700695
696 if (!GetOrCreateProcessMapper(pid, event.ppid()).second) {
697 DVLOG(1) << "Found an existing process mapper with pid: " << pid;
698 }
699
700 return true;
701}
702
703std::pair<AddressMapper*, bool> PerfParser::GetOrCreateProcessMapper(
704 uint32_t pid, uint32_t ppid) {
705 const auto& search = process_mappers_.find(pid);
706 if (search != process_mappers_.end()) {
707 return std::make_pair(search->second.get(), false);
708 }
709
710 auto parent_mapper = process_mappers_.find(ppid);
711 // Recent perf implementations (at least as recent as perf 4.4), add an
712 // explicit FORK event from the swapper process to the init process. There may
713 // be no explicit memory mappings created for the swapper process. In such
714 // cases, we must use the mappings from the kernel process, which are used by
715 // default for a new PID in the absence of an explicit FORK event.
716 if (parent_mapper == process_mappers_.end()) {
717 parent_mapper = process_mappers_.find(kKernelPid);
718 }
719 std::unique_ptr<AddressMapper> mapper;
720 if (parent_mapper != process_mappers_.end()) {
721 mapper.reset(new AddressMapper(*parent_mapper->second));
722 } else {
723 mapper.reset(new AddressMapper());
724 mapper->set_page_alignment(kMmapPageAlignment);
725 }
726
727 const auto inserted =
728 process_mappers_.insert(search, std::make_pair(pid, std::move(mapper)));
729 return std::make_pair(inserted->second.get(), true);
730}
731
732} // namespace quipper