| #ifndef ANDROID_DVR_GPU_PROFILER_H_ |
| #define ANDROID_DVR_GPU_PROFILER_H_ |
| |
| // This file contains classes and macros related to run-time performance |
| // profiling of GPU processing. |
| |
| #include <deque> |
| #include <map> |
| #include <memory> |
| #include <stack> |
| #include <vector> |
| |
| #include <private/dvr/graphics/vr_gl_extensions.h> |
| |
| namespace android { |
| namespace dvr { |
| |
| // While enabled, GL commands will be submitted each frame to query timestamps |
| // of GPU workloads that have been traced using the ION_PROFILE_GPU macro |
| // defined below. |
| // |
| // Basic workflow: |
| // - have the app framework call PollGlTimerQueries at the start of each frame. |
| // - place ION_PROFILE_GPU("MyGlWorkload") at the start of code scopes where |
| // GL draw commands are performed that you want to trace. |
| class GpuProfiler { |
| public: |
| // Gets the GpuProfiler singleton instance. |
| static GpuProfiler* Get(); |
| |
| GpuProfiler(); |
| ~GpuProfiler(); |
| |
| bool IsGpuProfilingSupported() const; |
| |
| // Enables runtime GPU tracing. While enabled, GL commands will be submitted |
| // each frame to query timestamps of GPU workloads that have been traced using |
| // one of the TRACE_GPU* macros defined below. |
| void SetEnableGpuTracing(bool enabled) { enable_gpu_tracing_ = enabled; } |
| |
| bool enabled() const { return enable_gpu_tracing_; } |
| |
| // Attempt to keep the GPU times in sync with CPU times. |
| void SetEnableSyncCpuTime(bool enabled) { sync_with_cpu_time_ = enabled; } |
| |
| // When sync cpu time is enabled because of mobile GPU timer query issues, |
| // it can sometimes help to put a beginning timer query at the start of the |
| // frame to sync the CPU time when GPU work begins. |
| void QueryFrameBegin(); |
| |
| // Polls (non-blocking) for completed GL timer query data and adds events into |
| // the trace buffer. Must call once close to the start of each frame. |
| void PollGlTimerQueries(); |
| |
| // Call glFinish and process all pending timer queries. |
| void FinishGlTimerQueries(); |
| |
| // Records the beginning of a scoped GL trace event. |
| void EnterGlScope(const char* scope_name); |
| |
| // Records the end of a scoped GL trace event. |
| void LeaveGlScope(const char* scope_name, std::weak_ptr<int64_t> duration_ns, |
| int print_period); |
| |
| private: |
| // Data to queue the pending GPU timer queries that need to be polled |
| // for completion. |
| struct GpuTimerQuery { |
| enum QueryType { |
| kQueryBeginFrame, |
| kQueryBeginScope, |
| kQueryEndScope, |
| }; |
| |
| // scope_id is only required for kQueryBeginScope query types. |
| GpuTimerQuery(int64_t timestamp_ns, const char* scope_name, |
| std::weak_ptr<int64_t> duration_ns, int print_period, |
| GLuint query_id, QueryType type) |
| : timestamp_ns(timestamp_ns), |
| scope_name(scope_name), |
| duration_ns(duration_ns), |
| print_period(print_period), |
| query_id(query_id), |
| type(type) {} |
| |
| int64_t timestamp_ns; |
| const char* scope_name; |
| std::weak_ptr<int64_t> duration_ns; |
| int print_period; |
| GLuint query_id; |
| QueryType type; |
| }; |
| |
| // Struct that tracks timing data for a particular trace scope. |
| struct TimerData { |
| void reset(); |
| |
| // Print the profiling data. |
| void print(const char* name) const; |
| |
| // Enter a scope, records the timestamp for later matching with leave. |
| void enter(int64_t timestamp_ns); |
| |
| // Compute the elapsed time for the scope. |
| void leave(int64_t timestamp_ns, const char* name, |
| std::weak_ptr<int64_t> duration_ns, int print_period); |
| |
| int64_t total_elapsed_ns = 0; |
| int64_t enter_timestamp_ns = 0; |
| int num_events = 0; |
| }; |
| |
| // Synchronises the GL timebase with the CallTraceManager timebase. |
| void SyncGlTimebase(); |
| |
| // Returns a GL timer query ID if possible. Otherwise returns 0. |
| GLuint TryAllocateGlQueryId(); |
| |
| // Setting for enabling GPU tracing. |
| bool enable_gpu_tracing_; |
| |
| // Setting for synchronizing GPU timestamps with CPU time. |
| bool sync_with_cpu_time_; |
| |
| // Nanosecond offset to the GL timebase to compute the CallTraceManager time. |
| int64_t gl_timer_offset_ns_; |
| |
| std::map<const char*, TimerData> events_; |
| |
| // For GPU event TraceRecords, this tracks the pending queries that will |
| // be asynchronously polled (in order) and then added to the TraceRecorder |
| // buffer with the GPU timestamps. |
| std::deque<GpuTimerQuery> pending_gpu_queries_; |
| |
| // Available ids for use with GLTimerQuery as needed. This will generally |
| // reach a steady state after a few frames. Always push and pop from the back |
| // to avoid shifting the vector. |
| std::stack<GLuint, std::vector<GLuint> > gl_timer_query_id_pool_; |
| }; |
| |
| // Traces the GPU start and end times of the GL commands submitted in the |
| // same scope. Typically used via the TRACE_GPU macro. |
| class ScopedGlTracer { |
| public: |
| ScopedGlTracer(const char* name, std::weak_ptr<int64_t> duration_ns, |
| int print_period, bool finish) |
| : name_(name), |
| duration_ns_(duration_ns), |
| print_period_(print_period), |
| is_finish_(finish) { |
| GpuProfiler* profiler = GpuProfiler::Get(); |
| if (profiler->enabled()) { |
| profiler->EnterGlScope(name); |
| } |
| } |
| |
| ~ScopedGlTracer() { |
| GpuProfiler* profiler = GpuProfiler::Get(); |
| if (profiler->enabled()) { |
| profiler->LeaveGlScope(name_, duration_ns_, print_period_); |
| if (is_finish_) { |
| GpuProfiler::Get()->FinishGlTimerQueries(); |
| } |
| } |
| } |
| |
| private: |
| const char* name_; |
| std::weak_ptr<int64_t> duration_ns_; |
| int print_period_; |
| bool is_finish_; |
| }; |
| |
| } // namespace dvr |
| } // namespace android |
| |
| #define PROFILING_PASTE1(x, y) x##y |
| #define PROFILING_PASTE2(x, y) PROFILING_PASTE1(x, y) |
| #define PROFILING_PASTE3(x) PROFILING_PASTE2(x, __LINE__) |
| |
| // This macro can be used in any GL operation scope to trace the resulting |
| // GPU work. The argument must be a literal string. Specify the number of frames |
| // to wait before printing an average result in the num_frames_period argument. |
| #define TRACE_GPU_PRINT(group_name, num_frames_period) \ |
| (void)group_name " must be a literal string."; \ |
| android::dvr::ScopedGlTracer PROFILING_PASTE3(gpu_tracer_)( \ |
| group_name, std::weak_ptr<int64_t>(), num_frames_period, false) |
| |
| // This macro can be used in any GL operation scope to trace the resulting |
| // GPU work. The argument must be a literal string. The duration parameter |
| // is a weak_ptr to a int64_t that will receive duration values asynchronously |
| // during calls to PollGlTimerQueries. |
| #define TRACE_GPU(group_name, duration_ns_weak_ptr) \ |
| (void)group_name " must be a literal string."; \ |
| android::dvr::ScopedGlTracer PROFILING_PASTE3(gpu_tracer_)( \ |
| group_name, duration_ns_weak_ptr, -1, false) |
| |
| // This macro can be used in any GL operation scope to trace the resulting |
| // GPU work. The argument must be a literal string. Specify the number of frames |
| // to wait before printing an average result in the num_frames_period argument. |
| #define TRACE_GPU_PRINT_FINISH(group_name) \ |
| (void)group_name " must be a literal string."; \ |
| android::dvr::ScopedGlTracer PROFILING_PASTE3(gpu_tracer_)( \ |
| group_name, std::weak_ptr<int64_t>(), 1, true) |
| |
| // This macro can be used in any GL operation scope to trace the resulting |
| // GPU work. The argument must be a literal string. The duration parameter |
| // is a weak_ptr to a int64_t that will receive duration values asynchronously |
| // during calls to PollGlTimerQueries. |
| #define TRACE_GPU_FINISH(group_name, duration_ns_weak_ptr) \ |
| (void)group_name " must be a literal string."; \ |
| android::dvr::ScopedGlTracer PROFILING_PASTE3(gpu_tracer_)( \ |
| group_name, duration_ns_weak_ptr, -1, true) |
| |
| #endif // ANDROID_DVR_GPU_PROFILER_H_ |