Improve PyPerf sample handling and output (#2260)
* Add common interface for PyPerf sample handling
* Better printing for enum values
diff --git a/examples/cpp/pyperf/CMakeLists.txt b/examples/cpp/pyperf/CMakeLists.txt
index 8b80275..6f963c6 100644
--- a/examples/cpp/pyperf/CMakeLists.txt
+++ b/examples/cpp/pyperf/CMakeLists.txt
@@ -5,7 +5,7 @@
include_directories(${CMAKE_SOURCE_DIR}/src/cc/api)
include_directories(${CMAKE_SOURCE_DIR}/src/cc/libbpf/include/uapi)
-add_executable(PyPerf PyPerf.cc PyPerfUtil.cc PyPerfBPFProgram.cc PyPerfLoggingHelper.cc Py36Offsets.cc)
+add_executable(PyPerf PyPerf.cc PyPerfUtil.cc PyPerfBPFProgram.cc PyPerfLoggingHelper.cc PyPerfDefaultPrinter.cc Py36Offsets.cc)
target_link_libraries(PyPerf bcc-static)
if(INSTALL_CPP_EXAMPLES)
diff --git a/examples/cpp/pyperf/PyPerf.cc b/examples/cpp/pyperf/PyPerf.cc
index bee9b59..bad2ba0 100644
--- a/examples/cpp/pyperf/PyPerf.cc
+++ b/examples/cpp/pyperf/PyPerf.cc
@@ -16,10 +16,12 @@
#include <string>
#include <vector>
+#include "PyPerfDefaultPrinter.h"
#include "PyPerfLoggingHelper.h"
#include "PyPerfUtil.h"
int main(int argc, char** argv) {
+ // Argument parsing helpers
int pos = 1;
auto parseIntArg = [&](std::vector<std::string> argNames, uint64_t& target) {
@@ -45,9 +47,29 @@
return false;
};
+ auto parseBoolArg = [&](std::vector<std::string> argNames, bool& target) {
+ std::string arg(argv[pos]);
+ for (const auto& name : argNames) {
+ if (arg == ("--" + name)) {
+ target = true;
+ return true;
+ }
+ if (arg == "--no-" + name) {
+ target = false;
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Default argument values
uint64_t sampleRate = 1000000;
uint64_t durationMs = 1000;
uint64_t verbosityLevel = 0;
+ bool showGILState = true;
+ bool showThreadState = true;
+ bool showPthreadIDState = false;
+
while (true) {
if (pos >= argc) {
break;
@@ -56,6 +78,10 @@
found = found || parseIntArg({"-c", "--sample-rate"}, sampleRate);
found = found || parseIntArg({"-d", "--duration"}, durationMs);
found = found || parseIntArg({"-v", "--verbose"}, verbosityLevel);
+ found = found || parseBoolArg({"show-gil-state"}, showGILState);
+ found = found || parseBoolArg({"show-thread-state"}, showThreadState);
+ found =
+ found || parseBoolArg({"show-pthread-id-state"}, showPthreadIDState);
if (!found) {
std::fprintf(stderr, "Unexpected argument: %s\n", argv[pos]);
std::exit(1);
@@ -66,10 +92,17 @@
ebpf::pyperf::setVerbosity(verbosityLevel);
ebpf::pyperf::logInfo(1, "Profiling Sample Rate: %" PRIu64 "\n", sampleRate);
ebpf::pyperf::logInfo(1, "Profiling Duration: %" PRIu64 "ms\n", durationMs);
+ ebpf::pyperf::logInfo(1, "Showing GIL state: %d\n", showGILState);
+ ebpf::pyperf::logInfo(1, "Showing Thread state: %d\n", showThreadState);
+ ebpf::pyperf::logInfo(1, "Showing Pthread ID state: %d\n",
+ showPthreadIDState);
ebpf::pyperf::PyPerfUtil util;
util.init();
- util.profile(sampleRate, durationMs);
+
+ ebpf::pyperf::PyPerfDefaultPrinter printer(showGILState, showThreadState,
+ showPthreadIDState);
+ util.profile(sampleRate, durationMs, &printer);
return 0;
}
diff --git a/examples/cpp/pyperf/PyPerfDefaultPrinter.cc b/examples/cpp/pyperf/PyPerfDefaultPrinter.cc
new file mode 100644
index 0000000..22ec2c3
--- /dev/null
+++ b/examples/cpp/pyperf/PyPerfDefaultPrinter.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ */
+
+#include <map>
+#include <string>
+
+#include "PyPerfDefaultPrinter.h"
+#include "PyPerfUtil.h"
+
+namespace ebpf {
+namespace pyperf {
+
+const static std::string kLostSymbol = "[Lost Symbol]";
+const static std::string kIncompleteStack = "[Truncated Stack]";
+const static std::string kErrorStack = "[Stack Error]";
+const static std::string kNonPythonStack = "[Non-Python Code]";
+
+const static std::map<int, const char*> kGILStateValues = {
+ {GIL_STATE_NO_INFO, "No GIL Info"},
+ {GIL_STATE_ERROR, "Error Reading GIL State"},
+ {GIL_STATE_UNINITIALIZED, "GIL Uninitialized"},
+ {GIL_STATE_NOT_LOCKED, "GIL Not Locked"},
+ {GIL_STATE_THIS_THREAD, "GIL on This Thread"},
+ {GIL_STATE_GLOBAL_CURRENT_THREAD,
+ "GIL on Global _PyThreadState_Current Thread"},
+ {GIL_STATE_OTHER_THREAD, "GIL on Unexpected Thread"},
+ {GIL_STATE_NULL, "GIL State Empty"}};
+
+const static std::map<int, const char*> kThreadStateValues = {
+ {THREAD_STATE_UNKNOWN, "ThreadState Unknown"},
+ {THREAD_STATE_MATCH, "TLS ThreadState is Global _PyThreadState_Current"},
+ {THREAD_STATE_MISMATCH,
+ "TLS ThreadState is not Global _PyThreadState_Current"},
+ {THREAD_STATE_THIS_THREAD_NULL, "TLS ThreadState is NULL"},
+ {THREAD_STATE_GLOBAL_CURRENT_THREAD_NULL,
+ "Global _PyThreadState_Current is NULL"},
+ {THREAD_STATE_BOTH_NULL,
+ "Both TLS ThreadState and Global _PyThreadState_Current is NULL"},
+};
+
+const static std::map<int, const char*> kPthreadIDStateValues = {
+ {PTHREAD_ID_UNKNOWN, "Pthread ID Unknown"},
+ {PTHREAD_ID_MATCH, "System Pthread ID is Python ThreadState Pthread ID"},
+ {PTHREAD_ID_MISMATCH,
+ "System Pthread ID is not Python ThreadState Pthread ID"},
+ {PTHREAD_ID_THREAD_STATE_NULL, "No Pthread ID: TLS ThreadState is NULL"},
+ {PTHREAD_ID_NULL, "Pthread ID on TLS ThreadState is NULL"},
+ {PTHREAD_ID_ERROR, "Error Reading System Pthread ID"}};
+
+void PyPerfDefaultPrinter::processSamples(
+ const std::vector<PyPerfSample>& samples, PyPerfUtil* util) {
+ auto symbols = util->getSymbolMapping();
+ uint32_t lostSymbols = 0;
+ uint32_t truncatedStack = 0;
+
+ for (auto& sample : samples) {
+ if (sample.threadStateMatch != THREAD_STATE_THIS_THREAD_NULL &&
+ sample.threadStateMatch != THREAD_STATE_BOTH_NULL) {
+ for (const auto stackId : sample.pyStackIds) {
+ auto symbIt = symbols.find(stackId);
+ if (symbIt != symbols.end()) {
+ std::printf(" %s\n", symbIt->second.c_str());
+ } else {
+ std::printf(" %s\n", kLostSymbol.c_str());
+ lostSymbols++;
+ }
+ }
+ switch (sample.stackStatus) {
+ case STACK_STATUS_TRUNCATED:
+ std::printf(" %s\n", kIncompleteStack.c_str());
+ truncatedStack++;
+ break;
+ case STACK_STATUS_ERROR:
+ std::printf(" %s\n", kErrorStack.c_str());
+ break;
+ default:
+ break;
+ }
+ } else {
+ std::printf(" %s\n", kNonPythonStack.c_str());
+ }
+
+ std::printf("PID: %d TID: %d (%s)\n", sample.pid, sample.tid,
+ sample.comm.c_str());
+ if (showGILState_)
+ std::printf("GIL State: %s\n", kGILStateValues.at(sample.gilState));
+ if (showThreadState_)
+ std::printf("Thread State: %s\n",
+ kThreadStateValues.at(sample.threadStateMatch));
+ if (showPthreadIDState_)
+ std::printf("Pthread ID State: %s\n",
+ kPthreadIDStateValues.at(sample.pthreadIDMatch));
+
+ std::printf("\n");
+ }
+
+ std::printf("%d samples collected\n", util->getTotalSamples());
+ std::printf("%d samples lost\n", util->getLostSamples());
+ std::printf("%d samples with truncated stack\n", truncatedStack);
+ std::printf("%d times Python symbol lost\n", lostSymbols);
+}
+
+} // namespace pyperf
+} // namespace ebpf
diff --git a/examples/cpp/pyperf/PyPerfDefaultPrinter.h b/examples/cpp/pyperf/PyPerfDefaultPrinter.h
new file mode 100644
index 0000000..89c8153
--- /dev/null
+++ b/examples/cpp/pyperf/PyPerfDefaultPrinter.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ */
+
+#pragma once
+
+#include "PyPerfSampleProcessor.h"
+
+namespace ebpf {
+namespace pyperf {
+
+class PyPerfDefaultPrinter : public PyPerfSampleProcessor {
+ public:
+ PyPerfDefaultPrinter(bool showGILState, bool showThreadState,
+ bool showPthreadIDState)
+ : showGILState_(showGILState),
+ showThreadState_(showThreadState),
+ showPthreadIDState_(showPthreadIDState) {}
+
+ void processSamples(const std::vector<PyPerfSample>& samples,
+ PyPerfUtil* util) override;
+
+ private:
+ bool showGILState_;
+ bool showThreadState_;
+ bool showPthreadIDState_;
+};
+
+} // namespace pyperf
+} // namespace ebpf
diff --git a/examples/cpp/pyperf/PyPerfLoggingHelper.h b/examples/cpp/pyperf/PyPerfLoggingHelper.h
index d08d93e..c101666 100644
--- a/examples/cpp/pyperf/PyPerfLoggingHelper.h
+++ b/examples/cpp/pyperf/PyPerfLoggingHelper.h
@@ -3,6 +3,8 @@
* Licensed under the Apache License, Version 2.0 (the "License")
*/
+#pragma once
+
#include <cstdint>
namespace ebpf {
diff --git a/examples/cpp/pyperf/PyPerfSampleProcessor.h b/examples/cpp/pyperf/PyPerfSampleProcessor.h
new file mode 100644
index 0000000..5f2fe5e
--- /dev/null
+++ b/examples/cpp/pyperf/PyPerfSampleProcessor.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "PyPerfType.h"
+
+namespace ebpf {
+namespace pyperf {
+
+class PyPerfUtil;
+
+class PyPerfSampleProcessor {
+ public:
+ virtual void processSamples(const std::vector<PyPerfSample>& samples,
+ PyPerfUtil* util) = 0;
+};
+
+} // namespace pyperf
+} // namespace ebpf
diff --git a/examples/cpp/pyperf/PyPerfType.h b/examples/cpp/pyperf/PyPerfType.h
index 9a54e9e..7df07c7 100644
--- a/examples/cpp/pyperf/PyPerfType.h
+++ b/examples/cpp/pyperf/PyPerfType.h
@@ -3,7 +3,12 @@
* Licensed under the Apache License, Version 2.0 (the "License")
*/
+#pragma once
+
+#include <sys/types.h>
#include <cstdint>
+#include <string>
+#include <vector>
#define PYTHON_STACK_FRAMES_PER_PROG 25
#define PYTHON_STACK_PROG_CNT 3
@@ -99,5 +104,26 @@
int32_t stack[STACK_MAX_LEN];
} Event;
+struct PyPerfSample {
+ pid_t pid;
+ pid_t tid;
+ std::string comm;
+ uint8_t threadStateMatch;
+ uint8_t gilState;
+ uint8_t pthreadIDMatch;
+ uint8_t stackStatus;
+ std::vector<int32_t> pyStackIds;
+
+ explicit PyPerfSample(const Event* raw, int rawSize)
+ : pid(raw->pid),
+ tid(raw->tid),
+ comm(raw->comm),
+ threadStateMatch(raw->thread_state_match),
+ gilState(raw->gil_state),
+ pthreadIDMatch(raw->pthread_id_match),
+ stackStatus(raw->stack_status),
+ pyStackIds(raw->stack, raw->stack + raw->stack_len) {}
+};
+
} // namespace pyperf
} // namespace ebpf
diff --git a/examples/cpp/pyperf/PyPerfUtil.cc b/examples/cpp/pyperf/PyPerfUtil.cc
index d439083..252a0fe 100644
--- a/examples/cpp/pyperf/PyPerfUtil.cc
+++ b/examples/cpp/pyperf/PyPerfUtil.cc
@@ -9,7 +9,6 @@
#include <cstdio>
#include <cstring>
#include <exception>
-#include <unordered_map>
#include <dirent.h>
#include <linux/elf.h>
@@ -29,10 +28,6 @@
extern OffsetConfig kPy36OffsetConfig;
extern std::string PYPERF_BPF_PROGRAM;
-const static std::string kLostSymbol = "[Lost Symbol]";
-const static std::string kIncompleteStack = "[Truncated Stack]";
-const static std::string kErrorStack = "[Stack Error]";
-const static std::string kNonPythonStack = "[Non-Python Code]";
const static int kPerfBufSizePages = 32;
const static std::string kPidCfgTableName("pid_config");
@@ -107,7 +102,8 @@
file = file.substr(pos + 1);
}
if (file.find(kPy36LibName) == 0) {
- logInfo(1, "Found Python library %s loaded at %lx-%lx for PID %d\n", name, st, en, helper->pid);
+ logInfo(1, "Found Python library %s loaded at %lx-%lx for PID %d\n", name,
+ st, en, helper->pid);
helper->found = true;
helper->st = st;
helper->en = en;
@@ -239,7 +235,8 @@
void PyPerfUtil::handleLostSamples(int lostCnt) { lostSamples_ += lostCnt; }
PyPerfUtil::PyPerfResult PyPerfUtil::profile(int64_t sampleRate,
- int64_t durationMs) {
+ int64_t durationMs,
+ PyPerfSampleProcessor* processor) {
if (!initCompleted_) {
std::fprintf(stderr, "PyPerfUtil::init not invoked or failed\n");
return PyPerfResult::NO_INIT;
@@ -285,7 +282,12 @@
}
logInfo(2, "Finished draining remaining samples\n");
- // Get symbol names and output samples
+ processor->processSamples(samples_, this);
+
+ return PyPerfResult::SUCCESS;
+}
+
+std::unordered_map<int32_t, std::string> PyPerfUtil::getSymbolMapping() {
auto symbolTable = bpf_.get_hash_table<Symbol, int32_t>("symbols");
std::unordered_map<int32_t, std::string> symbols;
for (auto& x : symbolTable.get_table_offline()) {
@@ -294,47 +296,7 @@
symbols.emplace(x.second, std::move(symbolName));
}
logInfo(1, "Total %d unique Python symbols\n", symbols.size());
-
- for (auto& sample : samples_) {
- if (sample.threadStateMatch != THREAD_STATE_THIS_THREAD_NULL &&
- sample.threadStateMatch != THREAD_STATE_BOTH_NULL) {
- for (const auto stackId : sample.pyStackIds) {
- auto symbIt = symbols.find(stackId);
- if (symbIt != symbols.end()) {
- std::printf(" %s\n", symbIt->second.c_str());
- } else {
- std::printf(" %s\n", kLostSymbol.c_str());
- lostSymbols_++;
- }
- }
- switch (sample.stackStatus) {
- case STACK_STATUS_TRUNCATED:
- std::printf(" %s\n", kIncompleteStack.c_str());
- truncatedStack_++;
- break;
- case STACK_STATUS_ERROR:
- std::printf(" %s\n", kErrorStack.c_str());
- break;
- default:
- break;
- }
- } else {
- std::printf(" %s\n", kNonPythonStack.c_str());
- }
-
- std::printf("PID: %d TID: %d (%s)\n", sample.pid, sample.tid,
- sample.comm.c_str());
- std::printf("GIL State: %d Thread State: %d PthreadID Match State: %d\n\n",
- sample.threadStateMatch, sample.gilState,
- sample.pthreadIDMatch);
- }
-
- logInfo(0, "%d samples collected\n", totalSamples_);
- logInfo(0, "%d samples lost\n", lostSamples_);
- logInfo(0, "%d samples with truncated stack\n", truncatedStack_);
- logInfo(0, "%d times Python symbol lost\n", lostSymbols_);
-
- return PyPerfResult::SUCCESS;
+ return symbols;
}
std::string PyPerfUtil::getSymbolName(Symbol& sym) const {
@@ -378,18 +340,23 @@
}
if (!getAddrOfPythonBinary(path, data)) {
- std::fprintf(stderr, "Failed getting addresses in potential Python library in PID %d\n", pid);
+ std::fprintf(
+ stderr,
+ "Failed getting addresses in potential Python library in PID %d\n",
+ pid);
return false;
}
data.offsets = kPy36OffsetConfig;
data.current_state_addr += helper.st;
- logInfo(2, "PID %d has _PyThreadState_Current at %lx\n", pid, data.current_state_addr);
+ logInfo(2, "PID %d has _PyThreadState_Current at %lx\n", pid,
+ data.current_state_addr);
data.tls_key_addr += helper.st;
logInfo(2, "PID %d has autoTLSKey at %lx\n", pid, data.current_state_addr);
data.gil_locked_addr += helper.st;
logInfo(2, "PID %d has gil_locked at %lx\n", pid, data.current_state_addr);
data.gil_last_holder_addr += helper.st;
- logInfo(2, "PID %d has gil_last_holder at %lx\n", pid, data.current_state_addr);
+ logInfo(2, "PID %d has gil_last_holder at %lx\n", pid,
+ data.current_state_addr);
return true;
}
diff --git a/examples/cpp/pyperf/PyPerfUtil.h b/examples/cpp/pyperf/PyPerfUtil.h
index 3e69a29..c3396f4 100644
--- a/examples/cpp/pyperf/PyPerfUtil.h
+++ b/examples/cpp/pyperf/PyPerfUtil.h
@@ -6,12 +6,14 @@
#pragma once
#include <string>
+#include <unordered_map>
#include <vector>
#include <linux/perf_event.h>
#include <sys/types.h>
#include "BPF.h"
+#include "PyPerfSampleProcessor.h"
#include "PyPerfType.h"
namespace ebpf {
@@ -28,37 +30,23 @@
EVENT_DETACH_FAIL
};
- struct Sample {
- pid_t pid;
- pid_t tid;
- std::string comm;
- uint8_t threadStateMatch;
- uint8_t gilState;
- uint8_t pthreadIDMatch;
- uint8_t stackStatus;
- std::vector<int32_t> pyStackIds;
-
- explicit Sample(const Event* raw, int rawSize)
- : pid(raw->pid),
- tid(raw->tid),
- comm(raw->comm),
- threadStateMatch(raw->thread_state_match),
- gilState(raw->gil_state),
- pthreadIDMatch(raw->pthread_id_match),
- stackStatus(raw->stack_status),
- pyStackIds(raw->stack, raw->stack + raw->stack_len) {}
- };
-
// init must be invoked exactly once before invoking profile
PyPerfResult init();
- PyPerfResult profile(int64_t sampleRate, int64_t durationMs);
+ PyPerfResult profile(int64_t sampleRate, int64_t durationMs,
+ PyPerfSampleProcessor* processor);
+
+ std::unordered_map<int32_t, std::string> getSymbolMapping();
+
+ uint32_t getTotalSamples() const { return totalSamples_; }
+
+ uint32_t getLostSamples() const { return lostSamples_; }
private:
- uint32_t lostSymbols_ = 0, totalSamples_ = 0, lostSamples_ = 0, truncatedStack_ = 0;
+ uint32_t totalSamples_ = 0, lostSamples_ = 0;
ebpf::BPF bpf_{0, nullptr, false, "", true};
- std::vector<Sample> samples_;
+ std::vector<PyPerfSample> samples_;
bool initCompleted_{false};
void handleSample(const void* data, int dataSize);