skpbench: add option for gpu timing
Adds a gpu timing option with a GL implementation.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2388433003
Committed: https://skia.googlesource.com/skia/+/c06720d06faab3b01eba1b8693e0ac791f06dc96
Review-Url: https://codereview.chromium.org/2388433003
diff --git a/tools/gpu/FenceSync.h b/tools/gpu/FenceSync.h
index 8f2bbe2..b430f5d 100644
--- a/tools/gpu/FenceSync.h
+++ b/tools/gpu/FenceSync.h
@@ -13,7 +13,7 @@
namespace sk_gpu_test {
using PlatformFence = uint64_t;
-static constexpr PlatformFence kInvalidPlatformFence = 0;
+static constexpr PlatformFence kInvalidFence = 0;
/*
* This class provides an interface to interact with fence syncs. A fence sync is an object that the
@@ -29,6 +29,6 @@
virtual ~FenceSync() {}
};
-}
+} // namespace sk_gpu_test
#endif
diff --git a/tools/gpu/GpuTimer.h b/tools/gpu/GpuTimer.h
new file mode 100644
index 0000000..7678421
--- /dev/null
+++ b/tools/gpu/GpuTimer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GpuTimer_DEFINED
+#define GpuTimer_DEFINED
+
+#include "SkTypes.h"
+#include "SkExchange.h"
+#include <chrono>
+
+namespace sk_gpu_test {
+
+using PlatformTimerQuery = uint64_t;
+static constexpr PlatformTimerQuery kInvalidTimerQuery = 0;
+
+/**
+ * Platform-independent interface for timing operations on the GPU.
+ */
+class GpuTimer {
+public:
+ GpuTimer(bool disjointSupport)
+ : fDisjointSupport(disjointSupport)
+ , fActiveTimer(kInvalidTimerQuery) {
+ }
+ virtual ~GpuTimer() { SkASSERT(!fActiveTimer); }
+
+ /**
+ * Returns whether this timer can detect disjoint GPU operations while timing. If false, a query
+ * has less confidence when it completes with QueryStatus::kAccurate.
+ */
+ bool disjointSupport() const { return fDisjointSupport; }
+
+ /**
+ * Inserts a "start timing" command in the GPU command stream.
+ */
+ void queueStart() {
+ SkASSERT(!fActiveTimer);
+ fActiveTimer = this->onQueueTimerStart();
+ }
+
+ /**
+ * Inserts a "stop timing" command in the GPU command stream.
+ *
+ * @return a query object that can retrieve the time elapsed once the timer has completed.
+ */
+ PlatformTimerQuery SK_WARN_UNUSED_RESULT queueStop() {
+ SkASSERT(fActiveTimer);
+ this->onQueueTimerStop(fActiveTimer);
+ return skstd::exchange(fActiveTimer, kInvalidTimerQuery);
+ }
+
+ enum class QueryStatus {
+ kInvalid, //<! the timer query is invalid.
+ kPending, //<! the timer is still running on the GPU.
+ kDisjoint, //<! the query is complete, but dubious due to disjoint GPU operations.
+ kAccurate //<! the query is complete and reliable.
+ };
+
+ virtual QueryStatus checkQueryStatus(PlatformTimerQuery) = 0;
+ virtual std::chrono::nanoseconds getTimeElapsed(PlatformTimerQuery) = 0;
+ virtual void deleteQuery(PlatformTimerQuery) = 0;
+
+private:
+ virtual PlatformTimerQuery onQueueTimerStart() const = 0;
+ virtual void onQueueTimerStop(PlatformTimerQuery) const = 0;
+
+ bool const fDisjointSupport;
+ PlatformTimerQuery fActiveTimer;
+};
+
+} // namespace sk_gpu_test
+
+#endif
diff --git a/tools/gpu/TestContext.cpp b/tools/gpu/TestContext.cpp
index 8a78b90..90aba43 100644
--- a/tools/gpu/TestContext.cpp
+++ b/tools/gpu/TestContext.cpp
@@ -8,8 +8,13 @@
#include "TestContext.h"
+#include "GpuTimer.h"
+
namespace sk_gpu_test {
-TestContext::TestContext() : fFenceSync(nullptr), fCurrentFenceIdx(0) {
+TestContext::TestContext()
+ : fFenceSync(nullptr)
+ , fGpuTimer(nullptr)
+ , fCurrentFenceIdx(0) {
memset(fFrameFences, 0, sizeof(fFrameFences));
}
@@ -21,6 +26,7 @@
}
#endif
SkASSERT(!fFenceSync);
+ SkASSERT(!fGpuTimer);
}
void TestContext::makeCurrent() const { this->onPlatformMakeCurrent(); }
@@ -60,9 +66,9 @@
fFrameFences[i] = 0;
}
}
- delete fFenceSync;
- fFenceSync = nullptr;
+ fFenceSync.reset();
}
+ fGpuTimer.reset();
}
}
diff --git a/tools/gpu/TestContext.h b/tools/gpu/TestContext.h
index d01cb02..8722a33 100644
--- a/tools/gpu/TestContext.h
+++ b/tools/gpu/TestContext.h
@@ -14,6 +14,9 @@
#include "../private/SkTemplates.h"
namespace sk_gpu_test {
+
+class GpuTimer;
+
/**
* An offscreen 3D context. This class is intended for Skia's internal testing needs and not
* for general use.
@@ -27,6 +30,9 @@
bool fenceSyncSupport() const { return fFenceSync != nullptr; }
FenceSync* fenceSync() { SkASSERT(fFenceSync); return fFenceSync; }
+ bool gpuTimingSupport() const { return fGpuTimer != nullptr; }
+ GpuTimer* gpuTimer() const { SkASSERT(fGpuTimer); return fGpuTimer; }
+
bool getMaxGpuFrameLag(int *maxFrameLag) const {
if (!fFenceSync) {
return false;
@@ -75,7 +81,8 @@
virtual void finish() = 0;
protected:
- FenceSync* fFenceSync;
+ SkAutoTDelete<FenceSync> fFenceSync;
+ SkAutoTDelete<GpuTimer> fGpuTimer;
TestContext();
diff --git a/tools/gpu/gl/GLTestContext.cpp b/tools/gpu/gl/GLTestContext.cpp
index 1b077d5..20a9908 100644
--- a/tools/gpu/gl/GLTestContext.cpp
+++ b/tools/gpu/gl/GLTestContext.cpp
@@ -6,6 +6,8 @@
*/
#include "GLTestContext.h"
+
+#include "GpuTimer.h"
#include "gl/GrGLUtil.h"
namespace {
@@ -78,6 +80,135 @@
fGLDeleteSync(glsync);
}
+class GLGpuTimer : public sk_gpu_test::GpuTimer {
+public:
+ static GLGpuTimer* CreateIfSupported(const sk_gpu_test::GLTestContext*);
+
+ QueryStatus checkQueryStatus(sk_gpu_test::PlatformTimerQuery) override;
+ std::chrono::nanoseconds getTimeElapsed(sk_gpu_test::PlatformTimerQuery) override;
+ void deleteQuery(sk_gpu_test::PlatformTimerQuery) override;
+
+private:
+ GLGpuTimer(bool disjointSupport, const sk_gpu_test::GLTestContext*, const char* ext = "");
+
+ bool validate() const;
+
+ sk_gpu_test::PlatformTimerQuery onQueueTimerStart() const override;
+ void onQueueTimerStop(sk_gpu_test::PlatformTimerQuery) const override;
+
+ static constexpr GrGLenum GL_QUERY_RESULT = 0x8866;
+ static constexpr GrGLenum GL_QUERY_RESULT_AVAILABLE = 0x8867;
+ static constexpr GrGLenum GL_TIME_ELAPSED = 0x88bf;
+ static constexpr GrGLenum GL_GPU_DISJOINT = 0x8fbb;
+
+ typedef void (GR_GL_FUNCTION_TYPE* GLGetIntegervProc) (GrGLenum, GrGLint*);
+ typedef void (GR_GL_FUNCTION_TYPE* GLGenQueriesProc) (GrGLsizei, GrGLuint*);
+ typedef void (GR_GL_FUNCTION_TYPE* GLDeleteQueriesProc) (GrGLsizei, const GrGLuint*);
+ typedef void (GR_GL_FUNCTION_TYPE* GLBeginQueryProc) (GrGLenum, GrGLuint);
+ typedef void (GR_GL_FUNCTION_TYPE* GLEndQueryProc) (GrGLenum);
+ typedef void (GR_GL_FUNCTION_TYPE* GLGetQueryObjectuivProc) (GrGLuint, GrGLenum, GrGLuint*);
+ typedef void (GR_GL_FUNCTION_TYPE* GLGetQueryObjectui64vProc) (GrGLuint, GrGLenum, GrGLuint64*);
+
+ GLGetIntegervProc fGLGetIntegerv;
+ GLGenQueriesProc fGLGenQueries;
+ GLDeleteQueriesProc fGLDeleteQueries;
+ GLBeginQueryProc fGLBeginQuery;
+ GLEndQueryProc fGLEndQuery;
+ GLGetQueryObjectuivProc fGLGetQueryObjectuiv;
+ GLGetQueryObjectui64vProc fGLGetQueryObjectui64v;
+
+
+ typedef sk_gpu_test::GpuTimer INHERITED;
+};
+
+GLGpuTimer* GLGpuTimer::CreateIfSupported(const sk_gpu_test::GLTestContext* ctx) {
+ SkAutoTDelete<GLGpuTimer> ret;
+ const GrGLInterface* gl = ctx->gl();
+ if (gl->fExtensions.has("GL_EXT_disjoint_timer_query")) {
+ ret.reset(new GLGpuTimer(true, ctx, "EXT"));
+ } else if (kGL_GrGLStandard == gl->fStandard &&
+ (GrGLGetVersion(gl) > GR_GL_VER(3,3) || gl->fExtensions.has("GL_ARB_timer_query"))) {
+ ret.reset(new GLGpuTimer(false, ctx));
+ } else if (gl->fExtensions.has("GL_EXT_timer_query")) {
+ ret.reset(new GLGpuTimer(false, ctx, "EXT"));
+ }
+ return ret && ret->validate() ? ret.release() : nullptr;
+}
+
+GLGpuTimer::GLGpuTimer(bool disjointSupport, const sk_gpu_test::GLTestContext* ctx, const char* ext)
+ : INHERITED(disjointSupport) {
+ ctx->getGLProcAddress(&fGLGetIntegerv, "glGetIntegerv");
+ ctx->getGLProcAddress(&fGLGenQueries, "glGenQueries", ext);
+ ctx->getGLProcAddress(&fGLDeleteQueries, "glDeleteQueries", ext);
+ ctx->getGLProcAddress(&fGLBeginQuery, "glBeginQuery", ext);
+ ctx->getGLProcAddress(&fGLEndQuery, "glEndQuery", ext);
+ ctx->getGLProcAddress(&fGLGetQueryObjectuiv, "glGetQueryObjectuiv", ext);
+ ctx->getGLProcAddress(&fGLGetQueryObjectui64v, "glGetQueryObjectui64v", ext);
+}
+
+bool GLGpuTimer::validate() const {
+ return fGLGetIntegerv && fGLGenQueries && fGLDeleteQueries && fGLBeginQuery && fGLEndQuery &&
+ fGLGetQueryObjectuiv && fGLGetQueryObjectui64v;
+}
+
+sk_gpu_test::PlatformTimerQuery GLGpuTimer::onQueueTimerStart() const {
+ GrGLuint queryID;
+ fGLGenQueries(1, &queryID);
+ if (!queryID) {
+ return sk_gpu_test::kInvalidTimerQuery;
+ }
+ if (this->disjointSupport()) {
+ // Clear the disjoint flag.
+ GrGLint disjoint;
+ fGLGetIntegerv(GL_GPU_DISJOINT, &disjoint);
+ }
+ fGLBeginQuery(GL_TIME_ELAPSED, queryID);
+ return static_cast<sk_gpu_test::PlatformTimerQuery>(queryID);
+}
+
+void GLGpuTimer::onQueueTimerStop(sk_gpu_test::PlatformTimerQuery platformTimer) const {
+ if (sk_gpu_test::kInvalidTimerQuery == platformTimer) {
+ return;
+ }
+ fGLEndQuery(GL_TIME_ELAPSED);
+}
+
+sk_gpu_test::GpuTimer::QueryStatus
+GLGpuTimer::checkQueryStatus(sk_gpu_test::PlatformTimerQuery platformTimer) {
+ const GrGLuint queryID = static_cast<GrGLuint>(platformTimer);
+ if (!queryID) {
+ return QueryStatus::kInvalid;
+ }
+ GrGLuint available = 0;
+ fGLGetQueryObjectuiv(queryID, GL_QUERY_RESULT_AVAILABLE, &available);
+ if (!available) {
+ return QueryStatus::kPending;
+ }
+ if (this->disjointSupport()) {
+ GrGLint disjoint = 1;
+ fGLGetIntegerv(GL_GPU_DISJOINT, &disjoint);
+ if (disjoint) {
+ return QueryStatus::kDisjoint;
+ }
+ }
+ return QueryStatus::kAccurate;
+}
+
+std::chrono::nanoseconds GLGpuTimer::getTimeElapsed(sk_gpu_test::PlatformTimerQuery platformTimer) {
+ SkASSERT(this->checkQueryStatus(platformTimer) >= QueryStatus::kDisjoint);
+ const GrGLuint queryID = static_cast<GrGLuint>(platformTimer);
+ GrGLuint64 nanoseconds;
+ fGLGetQueryObjectui64v(queryID, GL_QUERY_RESULT, &nanoseconds);
+ return std::chrono::nanoseconds(nanoseconds);
+}
+
+void GLGpuTimer::deleteQuery(sk_gpu_test::PlatformTimerQuery platformTimer) {
+ const GrGLuint queryID = static_cast<GrGLuint>(platformTimer);
+ fGLDeleteQueries(1, &queryID);
+}
+
+GR_STATIC_ASSERT(sizeof(GrGLuint) <= sizeof(sk_gpu_test::PlatformTimerQuery));
+
} // anonymous namespace
namespace sk_gpu_test {
@@ -92,6 +223,7 @@
SkASSERT(!fGL.get());
fGL.reset(gl);
fFenceSync = fenceSync ? fenceSync : GLFenceSync::CreateIfSupported(this);
+ fGpuTimer = GLGpuTimer::CreateIfSupported(this);
}
void GLTestContext::teardown() {
diff --git a/tools/skpbench/_benchresult.py b/tools/skpbench/_benchresult.py
index 94c1105..666878b 100644
--- a/tools/skpbench/_benchresult.py
+++ b/tools/skpbench/_benchresult.py
@@ -25,6 +25,8 @@
'(?P<samples>\d+)'
'(?P<sample_ms_pad> +)'
'(?P<sample_ms>\d+)'
+ '(?P<clock_pad> +)'
+ '(?P<clock>[cg]pu)'
'(?P<metric_pad> +)'
'(?P<metric>ms|fps)'
'(?P<config_pad> +)'
@@ -45,6 +47,7 @@
self.stddev = float(match.group('stddev')[:-1]) # Drop '%' sign.
self.samples = int(match.group('samples'))
self.sample_ms = int(match.group('sample_ms'))
+ self.clock = match.group('clock')
self.metric = match.group('metric')
self.config = match.group('config')
self.bench = match.group('bench')
@@ -59,7 +62,7 @@
else:
values = list()
for name in ['accum', 'median', 'max', 'min', 'stddev',
- 'samples', 'sample_ms', 'metric', 'config']:
+ 'samples', 'sample_ms', 'clock', 'metric', 'config']:
values.append(self.get_string(name + '_pad'))
values.append(self.get_string(name))
values.append(config_suffix)
diff --git a/tools/skpbench/parseskpbench.py b/tools/skpbench/parseskpbench.py
index 5fe146e..800c1ca 100755
--- a/tools/skpbench/parseskpbench.py
+++ b/tools/skpbench/parseskpbench.py
@@ -8,8 +8,8 @@
from __future__ import print_function
from _benchresult import BenchResult
from argparse import ArgumentParser
+from collections import defaultdict, namedtuple
from datetime import datetime
-import collections
import operator
import os
import sys
@@ -27,7 +27,7 @@
(1) Install the "Office Editing for Docs, Sheets & Slides" Chrome extension:
https://chrome.google.com/webstore/detail/office-editing-for-docs-s/gbkeegbaiigmenfmjfclcdgdpimamgkj
-(2) Designate Chrome os-wide as the default application for opening .csv files.
+(2) Update your global OS file associations to use Chrome for .csv files.
(3) Run parseskpbench.py with the --open flag.
@@ -49,75 +49,92 @@
FLAGS = __argparse.parse_args()
+RESULT_QUALIFIERS = ('sample_ms', 'clock', 'metric')
+
+class FullConfig(namedtuple('fullconfig', ('config',) + RESULT_QUALIFIERS)):
+ def qualified_name(self, qualifiers=RESULT_QUALIFIERS):
+ return get_qualified_name(self.config.replace(',', ' '),
+ {x:getattr(self, x) for x in qualifiers})
+
+def get_qualified_name(name, qualifiers):
+ if not qualifiers:
+ return name
+ else:
+ args = ('%s=%s' % (k,v) for k,v in qualifiers.iteritems())
+ return '%s (%s)' % (name, ' '.join(args))
class Parser:
def __init__(self):
- self.configs = list() # use list to preserve the order configs appear in.
- self.rows = collections.defaultdict(dict)
- self.cols = collections.defaultdict(dict)
- self.metric = None
- self.sample_ms = None
+ self.sheet_qualifiers = {x:None for x in RESULT_QUALIFIERS}
+ self.config_qualifiers = set()
+ self.fullconfigs = list() # use list to preserve the order.
+ self.rows = defaultdict(dict)
+ self.cols = defaultdict(dict)
def parse_file(self, infile):
for line in infile:
match = BenchResult.match(line)
if not match:
continue
- if self.metric is None:
- self.metric = match.metric
- elif match.metric != self.metric:
- raise ValueError("results have mismatched metrics (%s and %s)" %
- (self.metric, match.metric))
- if self.sample_ms is None:
- self.sample_ms = match.sample_ms
- elif not FLAGS.force and match.sample_ms != self.sample_ms:
- raise ValueError("results have mismatched sampling times. "
- "(use --force to ignore)")
- if not match.config in self.configs:
- self.configs.append(match.config)
- self.rows[match.bench][match.config] = match.get_string(FLAGS.result)
- self.cols[match.config][match.bench] = getattr(match, FLAGS.result)
+
+ fullconfig = FullConfig(*(match.get_string(x)
+ for x in FullConfig._fields))
+ if not fullconfig in self.fullconfigs:
+ self.fullconfigs.append(fullconfig)
+
+ for qualifier, value in self.sheet_qualifiers.items():
+ if value is None:
+ self.sheet_qualifiers[qualifier] = match.get_string(qualifier)
+ elif value != match.get_string(qualifier):
+ del self.sheet_qualifiers[qualifier]
+ self.config_qualifiers.add(qualifier)
+
+ self.rows[match.bench][fullconfig] = match.get_string(FLAGS.result)
+ self.cols[fullconfig][match.bench] = getattr(match, FLAGS.result)
def print_csv(self, outfile=sys.stdout):
- print('%s_%s' % (FLAGS.result, self.metric), file=outfile)
+ # Write the title.
+ print(get_qualified_name(FLAGS.result, self.sheet_qualifiers), file=outfile)
# Write the header.
outfile.write('bench,')
- for config in self.configs:
- outfile.write('%s,' % config)
+ for fullconfig in self.fullconfigs:
+ outfile.write('%s,' % fullconfig.qualified_name(self.config_qualifiers))
outfile.write('\n')
# Write the rows.
- for bench, row in self.rows.items():
+ for bench, row in self.rows.iteritems():
outfile.write('%s,' % bench)
- for config in self.configs:
- if config in row:
- outfile.write('%s,' % row[config])
+ for fullconfig in self.fullconfigs:
+ if fullconfig in row:
+ outfile.write('%s,' % row[fullconfig])
elif FLAGS.force:
- outfile.write(',')
+ outfile.write('NULL,')
else:
raise ValueError("%s: missing value for %s. (use --force to ignore)" %
- (bench, config))
+ (bench,
+ fullconfig.qualified_name(self.config_qualifiers)))
outfile.write('\n')
# Add simple, literal averages.
if len(self.rows) > 1:
outfile.write('\n')
- self.__print_computed_row('MEAN',
+ self._print_computed_row('MEAN',
lambda col: reduce(operator.add, col.values()) / len(col),
outfile=outfile)
- self.__print_computed_row('GEOMEAN',
+ self._print_computed_row('GEOMEAN',
lambda col: reduce(operator.mul, col.values()) ** (1.0 / len(col)),
outfile=outfile)
- def __print_computed_row(self, name, func, outfile=sys.stdout):
+ def _print_computed_row(self, name, func, outfile=sys.stdout):
outfile.write('%s,' % name)
- for config in self.configs:
- assert(len(self.cols[config]) == len(self.rows))
- outfile.write('%.4g,' % func(self.cols[config]))
+ for fullconfig in self.fullconfigs:
+ if len(self.cols[fullconfig]) != len(self.rows):
+ outfile.write('NULL,')
+ continue
+ outfile.write('%.4g,' % func(self.cols[fullconfig]))
outfile.write('\n')
-
def main():
parser = Parser()
diff --git a/tools/skpbench/skpbench.cpp b/tools/skpbench/skpbench.cpp
index adb6af0..6d0381a 100644
--- a/tools/skpbench/skpbench.cpp
+++ b/tools/skpbench/skpbench.cpp
@@ -5,6 +5,7 @@
* found in the LICENSE file.
*/
+#include "GpuTimer.h"
#include "GrContextFactory.h"
#include "SkCanvas.h"
#include "SkOSFile.h"
@@ -33,12 +34,9 @@
* Currently, only GPU configs are supported.
*/
-using sk_gpu_test::PlatformFence;
-using sk_gpu_test::kInvalidPlatformFence;
-using sk_gpu_test::FenceSync;
-
DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
+DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
DEFINE_bool(fps, false, "use fps instead of ms");
DEFINE_string(skp, "", "path to a single .skp file to benchmark");
DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
@@ -46,13 +44,13 @@
DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
static const char* header =
- " accum median max min stddev samples sample_ms metric config bench";
+" accum median max min stddev samples sample_ms clock metric config bench";
static const char* resultFormat =
- "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-6s %-9s %s";
+"%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s";
struct Sample {
- using clock = std::chrono::high_resolution_clock;
+ using duration = std::chrono::nanoseconds;
Sample() : fFrames(0), fDuration(0) {}
double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
@@ -60,13 +58,13 @@
double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
- int fFrames;
- clock::duration fDuration;
+ int fFrames;
+ duration fDuration;
};
class GpuSync {
public:
- GpuSync(const FenceSync* fenceSync);
+ GpuSync(const sk_gpu_test::FenceSync* fenceSync);
~GpuSync();
void syncToPreviousFrame();
@@ -74,8 +72,8 @@
private:
void updateFence();
- const FenceSync* const fFenceSync;
- PlatformFence fFence;
+ const sk_gpu_test::FenceSync* const fFenceSync;
+ sk_gpu_test::PlatformFence fFence;
};
enum class ExitErr {
@@ -92,10 +90,10 @@
static SkString join(const SkCommandLineFlags::StringArray&);
static void exitf(ExitErr, const char* format, ...);
-static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const SkPicture* skp,
- std::vector<Sample>* samples) {
- using clock = Sample::clock;
- const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
+static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
+ const SkPicture* skp, std::vector<Sample>* samples) {
+ using clock = std::chrono::high_resolution_clock;
+ const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
draw_skp_and_flush(canvas, skp);
@@ -123,6 +121,66 @@
} while (now < endTime || 0 == samples->size() % 2);
}
+static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
+ const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
+ const SkPicture* skp, std::vector<Sample>* samples) {
+ using sk_gpu_test::PlatformTimerQuery;
+ using clock = std::chrono::steady_clock;
+ const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
+ const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
+
+ if (!gpuTimer->disjointSupport()) {
+ fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
+ "results may be unreliable\n");
+ }
+
+ draw_skp_and_flush(canvas, skp);
+ GpuSync gpuSync(fenceSync);
+
+ gpuTimer->queueStart();
+ draw_skp_and_flush(canvas, skp);
+ PlatformTimerQuery previousTime = gpuTimer->queueStop();
+ gpuSync.syncToPreviousFrame();
+
+ clock::time_point now = clock::now();
+ const clock::time_point endTime = now + benchDuration;
+
+ do {
+ const clock::time_point sampleEndTime = now + sampleDuration;
+ samples->emplace_back();
+ Sample& sample = samples->back();
+
+ do {
+ gpuTimer->queueStart();
+ draw_skp_and_flush(canvas, skp);
+ PlatformTimerQuery time = gpuTimer->queueStop();
+ gpuSync.syncToPreviousFrame();
+
+ switch (gpuTimer->checkQueryStatus(previousTime)) {
+ using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
+ case QueryStatus::kInvalid:
+ exitf(ExitErr::kUnavailable, "GPU timer failed");
+ case QueryStatus::kPending:
+ exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
+ case QueryStatus::kDisjoint:
+ if (FLAGS_verbosity >= 4) {
+ fprintf(stderr, "discarding timer query due to disjoint operations.\n");
+ }
+ break;
+ case QueryStatus::kAccurate:
+ sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
+ ++sample.fFrames;
+ break;
+ }
+ gpuTimer->deleteQuery(previousTime);
+ previousTime = time;
+ now = clock::now();
+ } while (now < sampleEndTime || 0 == sample.fFrames);
+ } while (now < endTime || 0 == samples->size() % 2);
+
+ gpuTimer->deleteQuery(previousTime);
+}
+
void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) {
if (0 == (samples.size() % 2)) {
exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
@@ -149,7 +207,8 @@
const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
- stddev, values.size(), FLAGS_sampleMs, Sample::metric(), config, bench);
+ stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
+ config, bench);
printf("\n");
fflush(stdout);
}
@@ -247,7 +306,15 @@
// Run the benchmark.
SkCanvas* canvas = surface->getCanvas();
canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
- run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
+ if (!FLAGS_gpuClock) {
+ run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
+ } else {
+ if (!testCtx->gpuTimingSupport()) {
+ exitf(ExitErr::kUnavailable, "GPU does not support timing");
+ }
+ run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(),
+ &samples);
+ }
print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).c_str());
// Save a proof (if one was requested).
@@ -300,7 +367,7 @@
exit((int)err);
}
-GpuSync::GpuSync(const FenceSync* fenceSync)
+GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync)
: fFenceSync(fenceSync) {
this->updateFence();
}
@@ -310,7 +377,7 @@
}
void GpuSync::syncToPreviousFrame() {
- if (kInvalidPlatformFence == fFence) {
+ if (sk_gpu_test::kInvalidFence == fFence) {
exitf(ExitErr::kSoftware, "attempted to sync with invalid fence");
}
if (!fFenceSync->waitFence(fFence)) {
@@ -322,7 +389,7 @@
void GpuSync::updateFence() {
fFence = fFenceSync->insertFence();
- if (kInvalidPlatformFence == fFence) {
+ if (sk_gpu_test::kInvalidFence == fFence) {
exitf(ExitErr::kUnavailable, "failed to insert fence");
}
}
diff --git a/tools/skpbench/skpbench.py b/tools/skpbench/skpbench.py
index 83aaf84..6bf3975 100755
--- a/tools/skpbench/skpbench.py
+++ b/tools/skpbench/skpbench.py
@@ -32,7 +32,8 @@
__argparse.add_argument('--adb',
action='store_true', help="execute skpbench over adb")
__argparse.add_argument('-s', '--device-serial',
- help="if using adb, id of the specific device to target")
+ help="if using adb, ID of the specific device to target "
+ "(only required if more than 1 device is attached)")
__argparse.add_argument('-p', '--path',
help="directory to execute ./skpbench from")
__argparse.add_argument('-m', '--max-stddev',
@@ -47,7 +48,10 @@
__argparse.add_argument('-d', '--duration',
type=int, help="number of milliseconds to run each benchmark")
__argparse.add_argument('-l', '--sample-ms',
- type=int, help="minimum duration of a sample")
+ type=int, help="duration of a sample (minimum)")
+__argparse.add_argument('--gpu',
+ action='store_true',
+ help="perform timing on the gpu clock instead of cpu (gpu work only)")
__argparse.add_argument('--fps',
action='store_true', help="use fps instead of ms")
__argparse.add_argument('-c', '--config',
@@ -93,6 +97,8 @@
ARGV.extend(['--duration', str(FLAGS.duration)])
if FLAGS.sample_ms:
ARGV.extend(['--sampleMs', str(FLAGS.sample_ms)])
+ if FLAGS.gpu:
+ ARGV.extend(['--gpuClock', 'true'])
if FLAGS.fps:
ARGV.extend(['--fps', 'true'])
if FLAGS.path:
@@ -188,7 +194,7 @@
def terminate(self):
if self._proc:
- self._proc.kill()
+ self._proc.terminate()
self._monitor.join()
self._proc.wait()
self._proc = None