Benchmark-mode for macrobench

Adds googlebench output format support
Adds offscreen rendering for >60fps benchmarking
Adds 'all' alias to run all registered TestScenes

Change-Id: I2579e40f2f4c941bfbd90c75efbee384c08a116b
diff --git a/libs/hwui/Android.mk b/libs/hwui/Android.mk
index 12e46cd..d90f88f 100644
--- a/libs/hwui/Android.mk
+++ b/libs/hwui/Android.mk
@@ -127,6 +127,9 @@
 # a problem
 hwui_cflags += -Wno-free-nonheap-object
 
+# clang's warning is broken, see: https://llvm.org/bugs/show_bug.cgi?id=21629
+hwui_cflags += -Wno-missing-braces
+
 ifeq (true, $(HWUI_NEW_OPS))
     hwui_src_files += \
         BakedOpDispatcher.cpp \
@@ -309,6 +312,7 @@
 # set to libhwui_static_debug to skip actual GL commands
 LOCAL_WHOLE_STATIC_LIBRARIES := libhwui_static
 LOCAL_SHARED_LIBRARIES := libmemunreachable
+LOCAL_STATIC_LIBRARIES := libgoogle-benchmark
 
 LOCAL_SRC_FILES += \
     $(hwui_test_common_src_files) \
diff --git a/libs/hwui/Properties.cpp b/libs/hwui/Properties.cpp
index 6f68c2b..112ba11 100644
--- a/libs/hwui/Properties.cpp
+++ b/libs/hwui/Properties.cpp
@@ -65,6 +65,7 @@
 bool Properties::sDisableProfileBars = false;
 
 bool Properties::waitForGpuCompletion = false;
+bool Properties::forceDrawFrame = false;
 
 bool Properties::filterOutTestOverhead = false;
 
diff --git a/libs/hwui/Properties.h b/libs/hwui/Properties.h
index 8fec429..cdfc081 100644
--- a/libs/hwui/Properties.h
+++ b/libs/hwui/Properties.h
@@ -295,6 +295,7 @@
 
     // Should be used only by test apps
     static bool waitForGpuCompletion;
+    static bool forceDrawFrame;
 
     // Should only be set by automated tests to try and filter out
     // any overhead they add
diff --git a/libs/hwui/renderthread/CanvasContext.cpp b/libs/hwui/renderthread/CanvasContext.cpp
index 32dc0c1..0a48a0c 100644
--- a/libs/hwui/renderthread/CanvasContext.cpp
+++ b/libs/hwui/renderthread/CanvasContext.cpp
@@ -240,7 +240,7 @@
         return;
     }
 
-    if (CC_LIKELY(mSwapHistory.size())) {
+    if (CC_LIKELY(mSwapHistory.size() && !Properties::forceDrawFrame)) {
         nsecs_t latestVsync = mRenderThread.timeLord().latestVsync();
         const SwapHistory& lastSwap = mSwapHistory.back();
         nsecs_t vsyncDelta = std::abs(lastSwap.vsyncTime - latestVsync);
diff --git a/libs/hwui/renderthread/RenderProxy.cpp b/libs/hwui/renderthread/RenderProxy.cpp
index 0a46047..10a17f8 100644
--- a/libs/hwui/renderthread/RenderProxy.cpp
+++ b/libs/hwui/renderthread/RenderProxy.cpp
@@ -442,6 +442,19 @@
     postAndWait(task);
 }
 
+CREATE_BRIDGE2(frameTimePercentile, RenderThread* thread, int percentile) {
+    return reinterpret_cast<void*>(static_cast<uintptr_t>(
+        args->thread->jankTracker().findPercentile(args->percentile)));
+}
+
+uint32_t RenderProxy::frameTimePercentile(int p) {
+    SETUP_TASK(frameTimePercentile);
+    args->thread = &mRenderThread;
+    args->percentile = p;
+    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(
+        postAndWait(task)));
+}
+
 CREATE_BRIDGE2(dumpGraphicsMemory, int fd, RenderThread* thread) {
     args->thread->jankTracker().dump(args->fd);
 
diff --git a/libs/hwui/renderthread/RenderProxy.h b/libs/hwui/renderthread/RenderProxy.h
index 8398222..bf92cc6 100644
--- a/libs/hwui/renderthread/RenderProxy.h
+++ b/libs/hwui/renderthread/RenderProxy.h
@@ -109,6 +109,7 @@
     ANDROID_API void dumpProfileInfo(int fd, int dumpFlags);
     // Not exported, only used for testing
     void resetProfileInfo();
+    uint32_t frameTimePercentile(int p);
     ANDROID_API static void dumpGraphicsMemory(int fd);
 
     ANDROID_API void setTextureAtlas(const sp<GraphicBuffer>& buffer, int64_t* map, size_t size);
diff --git a/libs/hwui/tests/common/LeakChecker.cpp b/libs/hwui/tests/common/LeakChecker.cpp
index 3ef4b45..d935382 100644
--- a/libs/hwui/tests/common/LeakChecker.cpp
+++ b/libs/hwui/tests/common/LeakChecker.cpp
@@ -70,7 +70,6 @@
     // TODO: Until we can shutdown the RT thread we need to do this in
     // two passes as GetUnreachableMemory has limited insight into
     // thread-local caches so some leaks will not be properly tagged as leaks
-    nsecs_t before = systemTime();
     UnreachableMemoryInfo rtMemInfo;
     TestUtils::runOnRenderThread([&rtMemInfo](renderthread::RenderThread& thread) {
         if (Caches::hasInstance()) {
@@ -88,8 +87,6 @@
         return;
     }
     logUnreachable({rtMemInfo, uiMemInfo});
-    nsecs_t after = systemTime();
-    cout << "Leak check took " << ns2ms(after - before) << "ms" << endl;
 }
 
 } /* namespace test */
diff --git a/libs/hwui/tests/common/TestContext.cpp b/libs/hwui/tests/common/TestContext.cpp
index 146e735..1c7e7ee 100644
--- a/libs/hwui/tests/common/TestContext.cpp
+++ b/libs/hwui/tests/common/TestContext.cpp
@@ -62,20 +62,53 @@
 TestContext::~TestContext() {}
 
 sp<Surface> TestContext::surface() {
-    if (!mSurfaceControl.get()) {
-        mSurfaceControl = mSurfaceComposerClient->createSurface(String8("HwuiTest"),
-                gDisplay.w, gDisplay.h, PIXEL_FORMAT_RGBX_8888);
-
-        SurfaceComposerClient::openGlobalTransaction();
-        mSurfaceControl->setLayer(0x7FFFFFF);
-        mSurfaceControl->show();
-        SurfaceComposerClient::closeGlobalTransaction();
+    if (!mSurface.get()) {
+        createSurface();
     }
+    return mSurface;
+}
 
-    return mSurfaceControl->getSurface();
+void TestContext::createSurface() {
+    if (mRenderOffscreen) {
+        createOffscreenSurface();
+    } else {
+        createWindowSurface();
+    }
+}
+
+void TestContext::createWindowSurface() {
+    mSurfaceControl = mSurfaceComposerClient->createSurface(String8("HwuiTest"),
+            gDisplay.w, gDisplay.h, PIXEL_FORMAT_RGBX_8888);
+
+    SurfaceComposerClient::openGlobalTransaction();
+    mSurfaceControl->setLayer(0x7FFFFFF);
+    mSurfaceControl->show();
+    SurfaceComposerClient::closeGlobalTransaction();
+    mSurface = mSurfaceControl->getSurface();
+}
+
+void TestContext::createOffscreenSurface() {
+    sp<IGraphicBufferProducer> producer;
+    sp<IGraphicBufferConsumer> consumer;
+    BufferQueue::createBufferQueue(&producer, &consumer);
+    producer->setMaxDequeuedBufferCount(3);
+    producer->setAsyncMode(true);
+    mConsumer = new BufferItemConsumer(consumer, GRALLOC_USAGE_HW_COMPOSER, 4);
+    mConsumer->setDefaultBufferSize(gDisplay.w, gDisplay.h);
+    mSurface = new Surface(producer);
 }
 
 void TestContext::waitForVsync() {
+    if (mConsumer.get()) {
+        BufferItem buffer;
+        if (mConsumer->acquireBuffer(&buffer, 0, false) == OK) {
+            // We assume the producer is internally ordered enough such that
+            // it is unneccessary to set a release fence
+            mConsumer->releaseBuffer(buffer);
+        }
+        // We running free, go go go!
+        return;
+    }
 #if !HWUI_NULL_GPU
     // Request vsync
     mDisplayEventReceiver.requestNextVsync();
diff --git a/libs/hwui/tests/common/TestContext.h b/libs/hwui/tests/common/TestContext.h
index 2bbe5df..312988b 100644
--- a/libs/hwui/tests/common/TestContext.h
+++ b/libs/hwui/tests/common/TestContext.h
@@ -19,12 +19,16 @@
 
 #include <gui/DisplayEventReceiver.h>
 #include <gui/ISurfaceComposer.h>
+#include <gui/BufferItemConsumer.h>
 #include <gui/SurfaceComposerClient.h>
 #include <gui/SurfaceControl.h>
 #include <gui/Surface.h>
 #include <ui/DisplayInfo.h>
 #include <utils/Looper.h>
 
+#include <thread>
+#include <atomic>
+
 namespace android {
 namespace uirenderer {
 namespace test {
@@ -39,15 +43,29 @@
     TestContext();
     ~TestContext();
 
+    // Must be called before surface();
+    void setRenderOffscreen(bool renderOffscreen) {
+        LOG_ALWAYS_FATAL_IF(mSurface.get(),
+                "Must be called before surface is created");
+        mRenderOffscreen = renderOffscreen;
+    }
+
     sp<Surface> surface();
 
     void waitForVsync();
 
 private:
+    void createSurface();
+    void createWindowSurface();
+    void createOffscreenSurface();
+
     sp<SurfaceComposerClient> mSurfaceComposerClient;
     sp<SurfaceControl> mSurfaceControl;
+    sp<BufferItemConsumer> mConsumer;
     DisplayEventReceiver mDisplayEventReceiver;
     sp<Looper> mLooper;
+    sp<Surface> mSurface;
+    bool mRenderOffscreen;
 };
 
 } // namespace test
diff --git a/libs/hwui/tests/common/TestScene.h b/libs/hwui/tests/common/TestScene.h
index 706f2ff..d4a6646 100644
--- a/libs/hwui/tests/common/TestScene.h
+++ b/libs/hwui/tests/common/TestScene.h
@@ -38,6 +38,7 @@
     struct Options {
         int count = 0;
         int reportFrametimeWeight = 0;
+        bool renderOffscreen = false;
     };
 
     template <class T>
diff --git a/libs/hwui/tests/macrobench/TestSceneRunner.cpp b/libs/hwui/tests/macrobench/TestSceneRunner.cpp
index 2d78437..f03dcbf 100644
--- a/libs/hwui/tests/macrobench/TestSceneRunner.cpp
+++ b/libs/hwui/tests/macrobench/TestSceneRunner.cpp
@@ -22,6 +22,7 @@
 #include "renderthread/RenderProxy.h"
 #include "renderthread/RenderTask.h"
 
+#include <benchmark/benchmark.h>
 #include <cutils/log.h>
 #include <gui/Surface.h>
 #include <ui/PixelFormat.h>
@@ -62,13 +63,62 @@
     T mAverage;
 };
 
-void run(const TestScene::Info& info, const TestScene::Options& opts) {
+void outputBenchmarkReport(const TestScene::Info& info, const TestScene::Options& opts,
+        benchmark::BenchmarkReporter* reporter, RenderProxy* proxy,
+        double durationInS) {
+    using namespace benchmark;
+
+    struct ReportInfo {
+        int percentile;
+        const char* suffix;
+    };
+
+    static std::array<ReportInfo, 4> REPORTS = {
+        ReportInfo { 50, "_50th" },
+        ReportInfo { 90, "_90th" },
+        ReportInfo { 95, "_95th" },
+        ReportInfo { 99, "_99th" },
+    };
+
+    // Although a vector is used, it must stay with only a single element
+    // otherwise the BenchmarkReporter will automatically compute
+    // mean and stddev which doesn't make sense for our usage
+    std::vector<BenchmarkReporter::Run> reports;
+    BenchmarkReporter::Run report;
+    report.benchmark_name = info.name;
+    report.iterations = static_cast<int64_t>(opts.count);
+    report.real_accumulated_time = durationInS;
+    report.cpu_accumulated_time = durationInS;
+    report.items_per_second = opts.count / durationInS;
+    reports.push_back(report);
+    reporter->ReportRuns(reports);
+
+    // Pretend the percentiles are single-iteration runs of the test
+    // If rendering offscreen skip this as it's fps that's more interesting
+    // in that test case than percentiles.
+    if (!opts.renderOffscreen) {
+        for (auto& ri : REPORTS) {
+            reports[0].benchmark_name = info.name;
+            reports[0].benchmark_name += ri.suffix;
+            durationInS = proxy->frameTimePercentile(ri.percentile) / 1000.0;
+            reports[0].real_accumulated_time = durationInS;
+            reports[0].cpu_accumulated_time = durationInS;
+            reports[0].iterations = 1;
+            reports[0].items_per_second = 0;
+            reporter->ReportRuns(reports);
+        }
+    }
+}
+
+void run(const TestScene::Info& info, const TestScene::Options& opts,
+        benchmark::BenchmarkReporter* reporter) {
     // Switch to the real display
     gDisplay = getBuiltInDisplay();
 
     std::unique_ptr<TestScene> scene(info.createScene(opts));
 
     TestContext testContext;
+    testContext.setRenderOffscreen(opts.renderOffscreen);
 
     // create the native surface
     const int width = gDisplay.w;
@@ -91,7 +141,12 @@
     proxy->setLightCenter((Vector3){lightX, dp(-200.0f), dp(800.0f)});
 
     // Do a few cold runs then reset the stats so that the caches are all hot
-    for (int i = 0; i < 5; i++) {
+    int warmupFrameCount = 5;
+    if (opts.renderOffscreen) {
+        // Do a few more warmups to try and boost the clocks up
+        warmupFrameCount = 10;
+    }
+    for (int i = 0; i < warmupFrameCount; i++) {
         testContext.waitForVsync();
         nsecs_t vsync = systemTime(CLOCK_MONOTONIC);
         UiFrameInfoBuilder(proxy->frameInfo()).setVsync(vsync, vsync);
@@ -103,6 +158,7 @@
 
     ModifiedMovingAverage<double> avgMs(opts.reportFrametimeWeight);
 
+    nsecs_t start = systemTime(CLOCK_MONOTONIC);
     for (int i = 0; i < opts.count; i++) {
         testContext.waitForVsync();
         nsecs_t vsync = systemTime(CLOCK_MONOTONIC);
@@ -121,6 +177,13 @@
             }
         }
     }
+    proxy->fence();
+    nsecs_t end = systemTime(CLOCK_MONOTONIC);
 
-    proxy->dumpProfileInfo(STDOUT_FILENO, DumpFlags::JankStats);
+    if (reporter) {
+        outputBenchmarkReport(info, opts, reporter, proxy.get(),
+                (end - start) / (double) s2ns(1));
+    } else {
+        proxy->dumpProfileInfo(STDOUT_FILENO, DumpFlags::JankStats);
+    }
 }
diff --git a/libs/hwui/tests/macrobench/main.cpp b/libs/hwui/tests/macrobench/main.cpp
index 5bad436..ffeef45 100644
--- a/libs/hwui/tests/macrobench/main.cpp
+++ b/libs/hwui/tests/macrobench/main.cpp
@@ -20,6 +20,8 @@
 #include "protos/hwui.pb.h"
 #include "Properties.h"
 
+#include <benchmark/benchmark.h>
+#include <../src/sysinfo.h>
 #include <getopt.h>
 #include <stdio.h>
 #include <string>
@@ -40,8 +42,10 @@
 static int gRepeatCount = 1;
 static std::vector<TestScene::Info> gRunTests;
 static TestScene::Options gOpts;
+std::unique_ptr<benchmark::BenchmarkReporter> gBenchmarkReporter;
 
-void run(const TestScene::Info& info, const TestScene::Options& opts);
+void run(const TestScene::Info& info, const TestScene::Options& opts,
+        benchmark::BenchmarkReporter* reporter);
 
 static void printHelp() {
     printf(R"(
@@ -122,6 +126,20 @@
     close(fd);
 }
 
+static bool setBenchmarkFormat(const char* format) {
+    if (!strcmp(format, "tabular")) {
+        gBenchmarkReporter.reset(new benchmark::ConsoleReporter());
+    } else if (!strcmp(format, "json")) {
+        gBenchmarkReporter.reset(new benchmark::JSONReporter());
+    } else if (!strcmp(format, "csv")) {
+        gBenchmarkReporter.reset(new benchmark::CSVReporter());
+    } else {
+        fprintf(stderr, "Unknown format '%s'", format);
+        return false;
+    }
+    return true;
+}
+
 // For options that only exist in long-form. Anything in the
 // 0-255 range is reserved for short options (which just use their ASCII value)
 namespace LongOpts {
@@ -131,6 +149,8 @@
     WaitForGpu,
     ReportFrametime,
     CpuSet,
+    BenchmarkFormat,
+    Offscreen,
 };
 }
 
@@ -142,6 +162,8 @@
     { "wait-for-gpu", no_argument, nullptr, LongOpts::WaitForGpu },
     { "report-frametime", optional_argument, nullptr, LongOpts::ReportFrametime },
     { "cpuset", required_argument, nullptr, LongOpts::CpuSet },
+    { "benchmark_format", required_argument, nullptr, LongOpts::BenchmarkFormat },
+    { "offscreen", no_argument, nullptr, LongOpts::Offscreen },
     { 0, 0, 0, 0 }
 };
 
@@ -215,6 +237,20 @@
             moveToCpuSet(optarg);
             break;
 
+        case LongOpts::BenchmarkFormat:
+            if (!optarg) {
+                error = true;
+                break;
+            }
+            if (!setBenchmarkFormat(optarg)) {
+                error = true;
+            }
+            break;
+
+        case LongOpts::Offscreen:
+            gOpts.renderOffscreen = true;
+            break;
+
         case 'h':
             printHelp();
             exit(EXIT_SUCCESS);
@@ -238,12 +274,18 @@
     if (optind < argc) {
         do {
             const char* test = argv[optind++];
-            auto pos = TestScene::testMap().find(test);
-            if (pos == TestScene::testMap().end()) {
-                fprintf(stderr, "Unknown test '%s'\n", test);
-                exit(EXIT_FAILURE);
+            if (!strcmp(test, "all")) {
+                for (auto& iter : TestScene::testMap()) {
+                    gRunTests.push_back(iter.second);
+                }
             } else {
-                gRunTests.push_back(pos->second);
+                auto pos = TestScene::testMap().find(test);
+                if (pos == TestScene::testMap().end()) {
+                    fprintf(stderr, "Unknown test '%s'\n", test);
+                    exit(EXIT_FAILURE);
+                } else {
+                    gRunTests.push_back(pos->second);
+                }
             }
         } while (optind < argc);
     } else {
@@ -256,13 +298,36 @@
     gOpts.count = 150;
 
     parseOptions(argc, argv);
+    if (!gBenchmarkReporter && gOpts.renderOffscreen) {
+        gBenchmarkReporter.reset(new benchmark::ConsoleReporter());
+    }
+
+    if (gBenchmarkReporter) {
+        size_t name_field_width = 10;
+        for (auto&& test : gRunTests) {
+            name_field_width = std::max<size_t>(name_field_width, test.name.size());
+        }
+        // _50th, _90th, etc...
+        name_field_width += 5;
+
+        benchmark::BenchmarkReporter::Context context;
+        context.num_cpus = benchmark::NumCPUs();
+        context.mhz_per_cpu = benchmark::CyclesPerSecond() / 1000000.0f;
+        context.cpu_scaling_enabled = benchmark::CpuScalingEnabled();
+        context.name_field_width = name_field_width;
+        gBenchmarkReporter->ReportContext(context);
+    }
 
     for (int i = 0; i < gRepeatCount; i++) {
         for (auto&& test : gRunTests) {
-            run(test, gOpts);
+            run(test, gOpts, gBenchmarkReporter.get());
         }
     }
-    printf("Success!\n");
+
+    if (gBenchmarkReporter) {
+        gBenchmarkReporter->Finalize();
+    }
+
     LeakChecker::checkForLeaks();
     return 0;
 }