blob: d4390831bcbaee3225822c96cd4d04c55fbf80fa [file] [log] [blame]
Teng Qinef9d83f2019-02-28 15:29:55 -08001/*
2 * Copyright (c) Facebook, Inc.
3 * Licensed under the Apache License, Version 2.0 (the "License")
4 */
5
6#include <algorithm>
7#include <cerrno>
8#include <chrono>
9#include <cstdio>
10#include <cstring>
11#include <exception>
12#include <unordered_map>
13
14#include <dirent.h>
15#include <linux/elf.h>
16#include <sys/stat.h>
17#include <sys/types.h>
18#include <unistd.h>
19
20#include "PyPerfLoggingHelper.h"
21#include "PyPerfUtil.h"
22#include "bcc_elf.h"
23#include "bcc_proc.h"
24#include "bcc_syms.h"
25
26namespace ebpf {
27namespace pyperf {
28
29extern OffsetConfig kPy36OffsetConfig;
30extern std::string PYPERF_BPF_PROGRAM;
31
32const static std::string kLostSymbol = "[Lost Symbol]";
33const static std::string kIncompleteStack = "[Truncated Stack]";
34const static std::string kErrorStack = "[Stack Error]";
35const static std::string kNonPythonStack = "[Non-Python Code]";
36const static int kPerfBufSizePages = 32;
37
38const static std::string kPidCfgTableName("pid_config");
39const static std::string kProgsTableName("progs");
40const static std::string kSamplePerfBufName("events");
41
42const static std::string kOnEventFuncName("on_event");
43
44const static std::string kPythonStackFuncName("read_python_stack");
45const static std::string kPythonStackProgIdxFlag("-DPYTHON_STACK_PROG_IDX=");
46const static int kPythonStackProgIdx = 0;
47
48const static std::string kNumCpusFlag("-DNUM_CPUS=");
49const static std::string kSymbolsHashSizeFlag("-D__SYMBOLS_SIZE__=");
50const static int kSymbolsHashSize = 16384;
51
52namespace {
53
54bool getRunningPids(std::vector<int>& output) {
55 auto dir = ::opendir("/proc/");
56 if (!dir) {
57 std::fprintf(stderr, "Open /proc failed: %d\n", errno);
58 return false;
59 }
60
61 dirent* result = nullptr;
62 do {
63 if ((result = readdir(dir))) {
64 std::string basename = result->d_name;
65 if (basename == "." || basename == "..") {
66 continue;
67 }
68
69 std::string fullpath = "/proc/" + basename;
70 struct stat st;
71 if (::stat(fullpath.c_str(), &st) != 0 || !S_ISDIR(st.st_mode)) {
72 continue;
73 }
74
75 try {
76 auto pid = std::stoi(basename);
77 output.push_back(pid);
78 } catch (const std::exception& e) {
79 continue;
80 }
81 }
82 } while (result);
83
84 if (::closedir(dir) == -1) {
85 std::fprintf(stderr, "Close /proc failed: %d\n", errno);
86 return false;
87 }
88
89 return true;
90}
91
92typedef struct {
93 int pid;
94 bool found;
95 uint64_t st;
96 uint64_t en;
97} FindPythonPathHelper;
98
99const static std::string kPy36LibName = "libpython3.6";
100
101int findPythonPathCallback(const char* name, uint64_t st, uint64_t en, uint64_t,
102 bool, void* payload) {
103 auto helper = static_cast<FindPythonPathHelper*>(payload);
104 std::string file = name;
105 auto pos = file.rfind("/");
106 if (pos != std::string::npos) {
107 file = file.substr(pos + 1);
108 }
109 if (file.find(kPy36LibName) == 0) {
110 logInfo(1, "Found Python library %s loaded at %lx-%lx for PID %d\n", name, st, en, helper->pid);
111 helper->found = true;
112 helper->st = st;
113 helper->en = en;
114 return -1;
115 }
116 return 0;
117}
118
119bool allAddrFound(const PidData& data) {
120 return (data.current_state_addr > 0) && (data.tls_key_addr > 0) &&
121 (data.gil_locked_addr > 0) && (data.gil_last_holder_addr > 0);
122}
123
124int getAddrOfPythonBinaryCallback(const char* name, uint64_t addr, uint64_t,
125 void* payload) {
126 PidData& data = *static_cast<PidData*>(payload);
127
128 auto checkAndGetAddr = [&](uintptr_t& targetAddr, const char* targetName) {
129 if (targetAddr == 0 && std::strcmp(name, targetName) == 0) {
130 targetAddr = addr;
131 }
132 };
133
134 checkAndGetAddr(data.tls_key_addr, "autoTLSkey");
135 checkAndGetAddr(data.current_state_addr, "_PyThreadState_Current");
136 checkAndGetAddr(data.gil_locked_addr, "gil_locked");
137 checkAndGetAddr(data.gil_last_holder_addr, "gil_last_holder");
138
139 if (allAddrFound(data)) {
140 return -1;
141 }
142 return 0;
143}
144
145bool getAddrOfPythonBinary(const std::string& path, PidData& data) {
146 std::memset(&data, 0, sizeof(data));
147
148 struct bcc_symbol_option option = {.use_debug_file = 0,
149 .check_debug_file_crc = 0,
150 .use_symbol_type = (1 << STT_OBJECT)};
151
152 bcc_elf_foreach_sym(path.c_str(), &getAddrOfPythonBinaryCallback, &option,
153 &data);
154
155 return allAddrFound(data);
156}
157} // namespace
158
159void handleSampleCallback(void* cb_cookie, void* raw_data, int data_size) {
160 auto profiler = static_cast<PyPerfUtil*>(cb_cookie);
161 profiler->handleSample(raw_data, data_size);
162}
163
164void handleLostSamplesCallback(void* cb_cookie, uint64_t lost_cnt) {
165 auto profiler = static_cast<PyPerfUtil*>(cb_cookie);
166 profiler->handleLostSamples(lost_cnt);
167}
168
169PyPerfUtil::PyPerfResult PyPerfUtil::init() {
170 std::vector<std::string> cflags;
171 cflags.emplace_back(kNumCpusFlag +
172 std::to_string(::sysconf(_SC_NPROCESSORS_ONLN)));
173 cflags.emplace_back(kSymbolsHashSizeFlag + std::to_string(kSymbolsHashSize));
174 cflags.emplace_back(kPythonStackProgIdxFlag +
175 std::to_string(kPythonStackProgIdx));
176
177 auto initRes = bpf_.init(PYPERF_BPF_PROGRAM, cflags);
178 if (initRes.code() != 0) {
179 std::fprintf(stderr, "Failed to compiled PyPerf BPF programs: %s\n",
180 initRes.msg().c_str());
181 return PyPerfResult::INIT_FAIL;
182 }
183
184 int progFd = -1;
185 auto loadRes =
186 bpf_.load_func(kPythonStackFuncName, BPF_PROG_TYPE_PERF_EVENT, progFd);
187 if (loadRes.code() != 0) {
188 std::fprintf(stderr, "Failed to load BPF program %s: %s\n",
189 kPythonStackFuncName.c_str(), loadRes.msg().c_str());
190 return PyPerfResult::INIT_FAIL;
191 }
192
193 auto progTable = bpf_.get_prog_table(kProgsTableName);
194 auto updateRes = progTable.update_value(kPythonStackProgIdx, progFd);
195 if (updateRes.code() != 0) {
196 std::fprintf(stderr,
197 "Failed to set BPF program %s FD %d to program table: %s\n",
198 kPythonStackFuncName.c_str(), progFd, updateRes.msg().c_str());
199 return PyPerfResult::INIT_FAIL;
200 }
201
202 std::vector<int> pids;
203 if (!getRunningPids(pids)) {
204 std::fprintf(stderr, "Failed getting running Processes\n");
205 return PyPerfResult::INIT_FAIL;
206 }
207
208 // Populate config for each Python Process
209 auto pid_hash = bpf_.get_hash_table<int, PidData>(kPidCfgTableName);
210 PidData pidData;
211 for (const auto pid : pids) {
212 if (!tryTargetPid(pid, pidData)) {
213 // Not a Python Process
214 continue;
215 }
216 pid_hash.update_value(pid, pidData);
217 }
218
219 // Open perf buffer
220 auto openRes = bpf_.open_perf_buffer(
221 kSamplePerfBufName, &handleSampleCallback, &handleLostSamplesCallback,
222 this, kPerfBufSizePages);
223 if (openRes.code() != 0) {
224 std::fprintf(stderr, "Unable to open Perf Buffer: %s\n",
225 openRes.msg().c_str());
226 return PyPerfResult::PERF_BUF_OPEN_FAIL;
227 }
228
229 initCompleted_ = true;
230 return PyPerfResult::SUCCESS;
231}
232
233void PyPerfUtil::handleSample(const void* data, int dataSize) {
234 const Event* raw = static_cast<const Event*>(data);
235 samples_.emplace_back(raw, dataSize);
236 totalSamples_++;
237}
238
239void PyPerfUtil::handleLostSamples(int lostCnt) { lostSamples_ += lostCnt; }
240
241PyPerfUtil::PyPerfResult PyPerfUtil::profile(int64_t sampleRate,
242 int64_t durationMs) {
243 if (!initCompleted_) {
244 std::fprintf(stderr, "PyPerfUtil::init not invoked or failed\n");
245 return PyPerfResult::NO_INIT;
246 }
247
248 // Attach to CPU cycles
249 auto attachRes =
250 bpf_.attach_perf_event(0, 0, kOnEventFuncName, sampleRate, 0);
251 if (attachRes.code() != 0) {
252 std::fprintf(stderr, "Attach to CPU cycles event failed: %s\n",
253 attachRes.msg().c_str());
254 return PyPerfResult::EVENT_ATTACH_FAIL;
255 }
256 logInfo(2, "Attached to profiling event\n");
257
258 // Get Perf Buffer and poll in a loop for a given duration
259 auto perfBuffer = bpf_.get_perf_buffer(kSamplePerfBufName);
260 if (!perfBuffer) {
261 std::fprintf(stderr, "Failed to get Perf Buffer: %s\n",
262 kSamplePerfBufName.c_str());
263 return PyPerfResult::PERF_BUF_OPEN_FAIL;
264 }
265 logInfo(2, "Started polling Perf Buffer\n");
266 auto start = std::chrono::steady_clock::now();
267 while (std::chrono::steady_clock::now() <
268 start + std::chrono::milliseconds(durationMs)) {
269 perfBuffer->poll(50 /* 50ms timeout */);
270 }
271 logInfo(2, "Profiling duration finished\n");
272
273 // Detach the event
274 auto detachRes = bpf_.detach_perf_event(0, 0);
275 if (detachRes.code() != 0) {
276 std::fprintf(stderr, "Detach CPU cycles event failed: %s\n",
277 detachRes.msg().c_str());
278 return PyPerfResult::EVENT_DETACH_FAIL;
279 }
280 logInfo(2, "Detached from profiling event\n");
281
282 // Drain remaining samples
283 logInfo(2, "Draining remaining samples\n");
284 while (perfBuffer->poll(0) > 0) {
285 }
286 logInfo(2, "Finished draining remaining samples\n");
287
288 // Get symbol names and output samples
289 auto symbolTable = bpf_.get_hash_table<Symbol, int32_t>("symbols");
290 std::unordered_map<int32_t, std::string> symbols;
291 for (auto& x : symbolTable.get_table_offline()) {
292 auto symbolName = getSymbolName(x.first);
293 logInfo(2, "Symbol ID %d is %s\n", x.second, symbolName.c_str());
294 symbols.emplace(x.second, std::move(symbolName));
295 }
296 logInfo(1, "Total %d unique Python symbols\n", symbols.size());
297
298 for (auto& sample : samples_) {
299 if (sample.threadStateMatch != THREAD_STATE_THIS_THREAD_NULL &&
300 sample.threadStateMatch != THREAD_STATE_BOTH_NULL) {
301 for (const auto stackId : sample.pyStackIds) {
302 auto symbIt = symbols.find(stackId);
303 if (symbIt != symbols.end()) {
304 std::printf(" %s\n", symbIt->second.c_str());
305 } else {
306 std::printf(" %s\n", kLostSymbol.c_str());
307 lostSymbols_++;
308 }
309 }
310 switch (sample.stackStatus) {
311 case STACK_STATUS_TRUNCATED:
312 std::printf(" %s\n", kIncompleteStack.c_str());
313 truncatedStack_++;
314 break;
315 case STACK_STATUS_ERROR:
316 std::printf(" %s\n", kErrorStack.c_str());
317 break;
318 default:
319 break;
320 }
321 } else {
322 std::printf(" %s\n", kNonPythonStack.c_str());
323 }
324
325 std::printf("PID: %d TID: %d (%s)\n", sample.pid, sample.tid,
326 sample.comm.c_str());
327 std::printf("GIL State: %d Thread State: %d PthreadID Match State: %d\n\n",
328 sample.threadStateMatch, sample.gilState,
329 sample.pthreadIDMatch);
330 }
331
332 logInfo(0, "%d samples collected\n", totalSamples_);
333 logInfo(0, "%d samples lost\n", lostSamples_);
334 logInfo(0, "%d samples with truncated stack\n", truncatedStack_);
335 logInfo(0, "%d times Python symbol lost\n", lostSymbols_);
336
337 return PyPerfResult::SUCCESS;
338}
339
340std::string PyPerfUtil::getSymbolName(Symbol& sym) const {
341 std::string nameStr = std::string(sym.name).substr(0, FUNCTION_NAME_LEN);
342 std::string classStr = std::string(sym.classname).substr(0, CLASS_NAME_LEN);
343 if (classStr.size() > 0) {
344 nameStr = classStr + "." + nameStr;
345 }
346
347 std::string file = std::string(sym.file).substr(0, FILE_NAME_LEN);
348 if (file.empty()) {
349 return nameStr;
350 }
351 if (file[0] == '/') {
352 file = file.substr(1);
353 }
354 if (file.find("./") == 0) {
355 file = file.substr(2);
356 }
357 if (file.find(".py", file.size() - 3) == (file.size() - 3)) {
358 file = file.substr(0, file.size() - 3);
359 }
360 std::replace(file.begin(), file.end(), '/', '.');
361
362 return file + "." + nameStr;
363}
364
365bool PyPerfUtil::tryTargetPid(int pid, PidData& data) {
366 FindPythonPathHelper helper{pid, false, 0, 0};
367 bcc_procutils_each_module(pid, &findPythonPathCallback, &helper);
368 if (!helper.found) {
369 logInfo(2, "PID %d does not contain Python library\n", pid);
370 return false;
371 }
372
373 char path[256];
374 int res = std::snprintf(path, sizeof(path), "/proc/%d/map_files/%lx-%lx", pid,
375 helper.st, helper.en);
376 if (res < 0 || size_t(res) >= sizeof(path)) {
377 return false;
378 }
379
380 if (!getAddrOfPythonBinary(path, data)) {
381 std::fprintf(stderr, "Failed getting addresses in potential Python library in PID %d\n", pid);
382 return false;
383 }
384 data.offsets = kPy36OffsetConfig;
385 data.current_state_addr += helper.st;
386 logInfo(2, "PID %d has _PyThreadState_Current at %lx\n", pid, data.current_state_addr);
387 data.tls_key_addr += helper.st;
388 logInfo(2, "PID %d has autoTLSKey at %lx\n", pid, data.current_state_addr);
389 data.gil_locked_addr += helper.st;
390 logInfo(2, "PID %d has gil_locked at %lx\n", pid, data.current_state_addr);
391 data.gil_last_holder_addr += helper.st;
392 logInfo(2, "PID %d has gil_last_holder at %lx\n", pid, data.current_state_addr);
393
394 return true;
395}
396
397} // namespace pyperf
398} // namespace ebpf