Reland "Update skpbench to let the gpu thread run alongside the recording threads"
This reverts commit 2403a1001b4b62f16519725568156e25a822e666.
Reason for revert: Fixed bot failure
Original change's description:
> Revert "Update skpbench to let the gpu thread run alongside the recording threads"
>
> This reverts commit ca06ce48f174883ead3782383cd11d4c131ab4ce.
>
> Reason for revert: It is dying on a bot with the error:
> skpbench.py: error: unrecognized arguments: --ddlNumRecordingThreads c:\b\s\w\ir\skp
>
> Original change's description:
> > Update skpbench to let the gpu thread run alongside the recording threads
> >
> > Bug: skia:10176
> > Change-Id: Ibadd52b86abfd2802b9b3f31c86ab573554601db
> > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/289883
> > Commit-Queue: Adlai Holler <adlai@google.com>
> > Reviewed-by: Robert Phillips <robertphillips@google.com>
>
> TBR=robertphillips@google.com,adlai@google.com
>
> Change-Id: Iec316410563f0fe1d06f44bcfa58f6790c547f98
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Bug: skia:10176
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/290117
> Reviewed-by: Robert Phillips <robertphillips@google.com>
> Commit-Queue: Robert Phillips <robertphillips@google.com>
TBR=robertphillips@google.com,adlai@google.com
Bug: skia:10176
Change-Id: I1b57c91ed60b093440191274d1acb72d9e0c3b37
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/290443
Reviewed-by: Robert Phillips <robertphillips@google.com>
Commit-Queue: Adlai Holler <adlai@google.com>
diff --git a/infra/bots/recipes/skpbench.expected/Perf-Win10-Clang-Golo-GPU-QuadroP400-x86_64-Release-All-Vulkan_Skpbench_DDLTotal_9x9.json b/infra/bots/recipes/skpbench.expected/Perf-Win10-Clang-Golo-GPU-QuadroP400-x86_64-Release-All-Vulkan_Skpbench_DDLTotal_9x9.json
index c7aa4e7..9c30754 100644
--- a/infra/bots/recipes/skpbench.expected/Perf-Win10-Clang-Golo-GPU-QuadroP400-x86_64-Release-All-Vulkan_Skpbench_DDLTotal_9x9.json
+++ b/infra/bots/recipes/skpbench.expected/Perf-Win10-Clang-Golo-GPU-QuadroP400-x86_64-Release-All-Vulkan_Skpbench_DDLTotal_9x9.json
@@ -79,7 +79,7 @@
"--ddl",
"--gpuThreads",
"0",
- "--ddlNumAdditionalThreads",
+ "--ddlNumRecordingThreads",
"9",
"--ddlTilingWidthHeight",
"3",
diff --git a/infra/bots/recipes/skpbench.py b/infra/bots/recipes/skpbench.py
index 80681ae..7a79e0f 100644
--- a/infra/bots/recipes/skpbench.py
+++ b/infra/bots/recipes/skpbench.py
@@ -67,7 +67,7 @@
skpbench_args += ['--gpuThreads', '0']
if '9x9' in api.vars.builder_name:
skpbench_args += [
- '--ddlNumAdditionalThreads', 9,
+ '--ddlNumRecordingThreads', 9,
'--ddlTilingWidthHeight', 3]
if 'Android' in api.vars.builder_name:
skpbench_args += [
diff --git a/tools/skpbench/skpbench.cpp b/tools/skpbench/skpbench.cpp
index db48940..05ad72e 100644
--- a/tools/skpbench/skpbench.cpp
+++ b/tools/skpbench/skpbench.cpp
@@ -60,8 +60,7 @@
*/
static DEFINE_bool(ddl, false, "record the skp into DDLs before rendering");
-static DEFINE_int(ddlNumAdditionalThreads, 0,
- "number of DDL recording threads in addition to main one");
+static DEFINE_int(ddlNumRecordingThreads, 0, "number of DDL recording threads (0=num_cores)");
static DEFINE_int(ddlTilingWidthHeight, 0, "number of tiles along one edge when in DDL mode");
static DEFINE_bool(comparableDDL, false, "render in a way that is comparable to 'comparableSKP'");
@@ -201,7 +200,8 @@
std::vector<SkDocumentPage> fFrames;
};
-static void ddl_sample(GrContext* context, DDLTileHelper* tiles, GpuSync& gpuSync, Sample* sample,
+static void ddl_sample(GrContext* context, DDLTileHelper* tiles, GpuSync& gpuSync,
+ Sample* sample, SkTaskGroup* recordingTaskGroup, SkTaskGroup* gpuTaskGroup,
std::chrono::high_resolution_clock::time_point* startStopTime) {
using clock = std::chrono::high_resolution_clock;
@@ -220,12 +220,18 @@
// through a DDL.
tiles->drawAllTilesDirectly(context);
} else {
- // TODO: Here we create all the DDLs, wait, and then draw them all. This should be updated
- // to use the GPUDDLSink method of having a separate GPU thread.
- tiles->createDDLsInParallel();
- tiles->precompileAndDrawAllTiles(context);
+ tiles->kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, context);
+ recordingTaskGroup->wait();
}
- flush_with_sync(context, gpuSync);
+
+ if (gpuTaskGroup) {
+ gpuTaskGroup->add([&]{
+ flush_with_sync(context, gpuSync);
+ });
+ gpuTaskGroup->wait();
+ } else {
+ flush_with_sync(context, gpuSync);
+ }
*startStopTime = clock::now();
@@ -235,8 +241,9 @@
}
}
-static void run_ddl_benchmark(GrContext* context, sk_sp<SkSurface> dstSurface,
- SkPicture* inputPicture, std::vector<Sample>* samples) {
+static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext, GrContext *context,
+ sk_sp<SkSurface> dstSurface, SkPicture* inputPicture,
+ std::vector<Sample>* samples) {
using clock = std::chrono::high_resolution_clock;
const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
@@ -262,12 +269,26 @@
tiles.createSKPPerTile(compressedPictureData.get(), promiseImageHelper);
- SkTaskGroup::Enabler enabled(FLAGS_ddlNumAdditionalThreads);
+ // In comparable modes, there is no GPU thread. The following pointers are all null.
+ // Otherwise, we transfer testContext onto the GPU thread until after the bench.
+ std::unique_ptr<SkExecutor> gpuThread;
+ std::unique_ptr<SkTaskGroup> gpuTaskGroup;
+ std::unique_ptr<SkExecutor> recordingThreadPool;
+ std::unique_ptr<SkTaskGroup> recordingTaskGroup;
+ if (!FLAGS_comparableDDL && !FLAGS_comparableSKP) {
+ gpuThread = SkExecutor::MakeFIFOThreadPool(1, false);
+ gpuTaskGroup = std::make_unique<SkTaskGroup>(*gpuThread);
+ recordingThreadPool = SkExecutor::MakeFIFOThreadPool(FLAGS_ddlNumRecordingThreads, false);
+ recordingTaskGroup = std::make_unique<SkTaskGroup>(*recordingThreadPool);
+ testContext->makeNotCurrent();
+ gpuTaskGroup->add([=]{ testContext->makeCurrent(); });
+ }
clock::time_point startStopTime = clock::now();
GpuSync gpuSync;
- ddl_sample(context, &tiles, gpuSync, nullptr, &startStopTime);
+ ddl_sample(context, &tiles, gpuSync, nullptr, recordingTaskGroup.get(),
+ gpuTaskGroup.get(), &startStopTime);
clock::duration cumulativeDuration = std::chrono::milliseconds(0);
@@ -277,12 +298,22 @@
do {
tiles.resetAllTiles();
- ddl_sample(context, &tiles, gpuSync, &sample, &startStopTime);
+ ddl_sample(context, &tiles, gpuSync, &sample, recordingTaskGroup.get(),
+ gpuTaskGroup.get(), &startStopTime);
} while (sample.fDuration < sampleDuration);
cumulativeDuration += sample.fDuration;
} while (cumulativeDuration < benchDuration || 0 == samples->size() % 2);
+ // Move the context back to this thread now that we're done benching.
+ if (gpuTaskGroup) {
+ gpuTaskGroup->add([=]{
+ testContext->makeNotCurrent();
+ });
+ gpuTaskGroup->wait();
+ testContext->makeCurrent();
+ }
+
if (!FLAGS_png.isEmpty()) {
// The user wants to see the final result
dstSurface->draw(tiles.composeDDL());
@@ -580,7 +611,7 @@
}
if (!FLAGS_gpuClock) {
if (FLAGS_ddl) {
- run_ddl_benchmark(ctx, surface, skp.get(), &samples);
+ run_ddl_benchmark(testCtx, ctx, surface, skp.get(), &samples);
} else if (!mskp) {
auto s = std::make_unique<StaticSkp>(skp);
run_benchmark(ctx, surface.get(), s.get(), &samples);
diff --git a/tools/skpbench/skpbench.py b/tools/skpbench/skpbench.py
index 8c53e9a..8c14818 100755
--- a/tools/skpbench/skpbench.py
+++ b/tools/skpbench/skpbench.py
@@ -73,9 +73,9 @@
help="optional file to append results into")
__argparse.add_argument('--ddl',
action='store_true', help="record the skp into DDLs before rendering")
-__argparse.add_argument('--ddlNumAdditionalThreads',
+__argparse.add_argument('--ddlNumRecordingThreads',
type=int, default=0,
- help="number of DDL recording threads in addition to main one")
+ help="number of DDL recording threads (0=num_cores)")
__argparse.add_argument('--ddlTilingWidthHeight',
type=int, default=0, help="number of tiles along one edge when in DDL mode")
__argparse.add_argument('--gpuThreads',
@@ -144,9 +144,9 @@
# DDL parameters
if FLAGS.ddl:
ARGV.extend(['--ddl', 'true'])
- if FLAGS.ddlNumAdditionalThreads:
- ARGV.extend(['--ddlNumAdditionalThreads',
- str(FLAGS.ddlNumAdditionalThreads)])
+ if FLAGS.ddlNumRecordingThreads:
+ ARGV.extend(['--ddlNumRecordingThreads',
+ str(FLAGS.ddlNumRecordingThreads)])
if FLAGS.ddlTilingWidthHeight:
ARGV.extend(['--ddlTilingWidthHeight', str(FLAGS.ddlTilingWidthHeight)])