blob: 4f9fe56cf2e3c9921a0dd5e08f435de5d046ef02 [file] [log] [blame]
Primiano Tuccidf3ab202020-05-21 14:20:57 +01001/*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// See /docs/design-docs/protozero.md for rationale and results.
18
19#include <memory>
20#include <vector>
21
22#include <unistd.h>
23
24#include <benchmark/benchmark.h>
25
26#include "perfetto/base/compiler.h"
27#include "perfetto/protozero/static_buffer.h"
28
29// Autogenerated headers in out/*/gen/
30#include "src/protozero/test/example_proto/library.pbzero.h"
31#include "src/protozero/test/example_proto/test_messages.pb.h"
32#include "src/protozero/test/example_proto/test_messages.pbzero.h"
33
34// Generated by the protozero plugin.
35namespace pbzero = protozero::test::protos::pbzero;
36
37// Generated by the official protobuf compiler.
38namespace pblite = protozero::test::protos;
39
40namespace {
41
42// This needs to be > the max size written by each iteration.
43constexpr size_t kBufPerIteration = 512;
44
45// Write cyclically on a 64 MB buffer set to simulate a realistic tracing
46// scenario.
47constexpr size_t kTotalWorkingSetSize = 64 * 1024 * 1024;
48alignas(uint64_t) char g_out_buffer[kTotalWorkingSetSize];
49
50char* g_cur = g_out_buffer;
51
52uint64_t g_fake_input_simple[] = {0x12345678,
53 0x90ABCDEF,
54 0x11111111,
55 0xFFFFFFFF,
56 0x6666666666666666ULL,
57 0x6666666666666666ULL,
58 0x6666666666666666ULL,
59 0x0066666666666666ULL};
60
61// Speed-of-light serializer. Aa very simple C++ class that just appends data
62// into a linear buffer making all sorts of favourable assumptions. It does not
63// use any binary-stable encoding, it does not perform bound checking,
64// all writes are 64-bit aligned, it doesn't deal with any thread-safety.
65// The speed-of-light serializer serves as a reference for how fast a serializer
66// could be if argument marshalling and bound checking were zero cost.
67struct SOLMsg {
68 template <typename T>
69 void Append(T x) {
70 // The reinterpret_cast is to give favorable alignment guarantees.
71 memcpy(reinterpret_cast<T*>(ptr_), &x, sizeof(x));
72 ptr_ += sizeof(x);
73 }
74
75 void set_field_int32(int32_t x) { Append(x); }
76 void set_field_uint32(uint32_t x) { Append(x); }
77 void set_field_int64(int64_t x) { Append(x); }
78 void set_field_uint64(uint64_t x) { Append(x); }
79 void set_field_string(const char* str) { ptr_ = strcpy(ptr_, str); }
80
81 SOLMsg* add_field_nested() { return new (this + 1) SOLMsg(); }
82
83 char storage_[sizeof(g_fake_input_simple)];
84 char* ptr_ = &storage_[0];
85};
86
87template <typename T>
88PERFETTO_ALWAYS_INLINE void FillMessage_Simple(T* msg) {
89 benchmark::DoNotOptimize(g_fake_input_simple);
90 msg->set_field_int32(static_cast<int32_t>(g_fake_input_simple[0]));
91 msg->set_field_uint32(static_cast<uint32_t>(g_fake_input_simple[1]));
92 msg->set_field_int64(static_cast<int64_t>(g_fake_input_simple[2]));
93 msg->set_field_uint64(static_cast<uint64_t>(g_fake_input_simple[3]));
94 msg->set_field_string(reinterpret_cast<const char*>(&g_fake_input_simple[4]));
95}
96
97template <typename T>
98PERFETTO_ALWAYS_INLINE void FillMessage_Nested(T* msg, int depth = 0) {
99 benchmark::DoNotOptimize(g_fake_input_simple);
100 FillMessage_Simple(msg);
101 if (depth < 3) {
102 auto* child = msg->add_field_nested();
103 FillMessage_Nested(child, depth + 1);
104 }
105}
106
107PERFETTO_ALWAYS_INLINE void Clobber(benchmark::State& state) {
108 uint64_t* buf = reinterpret_cast<uint64_t*>(g_cur);
109
110 // Read-back the data written to have a realistic evaluation of the
111 // speed-of-light scenario. This is to deal with architecture of modern CPUs.
112 // If we write a bunch of memory bytes, never read-back from them, and then
113 // just over-write them, the CPU can just throw away the whole stream of
114 // instructions that produced them, if that's still in flight and tracked in
115 // the out-of-order units.
116 // The buf[i-1] ^= buf forces the CPU to consume the result of the writes.
117 buf[0] = reinterpret_cast<uint64_t>(&state);
118 for (size_t i = 1; i < kBufPerIteration / sizeof(uint64_t); i++)
119 buf[i] ^= buf[i - 1];
Lalit Maganti15b3c022020-06-05 13:05:53 +0100120 if (buf[(kBufPerIteration / sizeof(uint64_t)) - 1] == 42)
Primiano Tuccidf3ab202020-05-21 14:20:57 +0100121 PERFETTO_CHECK(false);
122 benchmark::DoNotOptimize(buf);
123
124 constexpr size_t kWrap = kTotalWorkingSetSize / kBufPerIteration;
125 g_cur = &g_out_buffer[(state.iterations() % kWrap) * kBufPerIteration];
126 benchmark::ClobberMemory();
127}
128
129} // namespace
130
131static void BM_Protozero_Simple_Libprotobuf(benchmark::State& state) {
132 while (state.KeepRunning()) {
133 {
134 // The nested block is to account for RAII finalizers.
135 pblite::EveryField msg;
136 FillMessage_Simple(&msg);
137 msg.SerializeToArray(g_cur, kBufPerIteration);
138 }
139 Clobber(state);
140 }
141}
142
143static void BM_Protozero_Simple_Protozero(benchmark::State& state) {
144 while (state.KeepRunning()) {
145 {
146 protozero::StaticBuffered<pbzero::EveryField> msg(g_cur,
147 kBufPerIteration);
148 FillMessage_Simple(msg.get());
149 }
150 Clobber(state);
151 }
152}
153
154static void BM_Protozero_Simple_SpeedOfLight(benchmark::State& state) {
155 while (state.KeepRunning()) {
156 SOLMsg* msg = new (g_cur) SOLMsg();
157 FillMessage_Simple(msg);
158 Clobber(state);
159 }
160}
161
162static void BM_Protozero_Nested_Libprotobuf(benchmark::State& state) {
163 while (state.KeepRunning()) {
164 {
165 pblite::EveryField msg;
166 FillMessage_Nested(&msg);
167 msg.SerializeToArray(g_cur, kBufPerIteration);
168 }
169 Clobber(state);
170 }
171}
172
173static void BM_Protozero_Nested_Protozero(benchmark::State& state) {
174 while (state.KeepRunning()) {
175 {
176 protozero::StaticBuffered<pbzero::EveryField> msg(g_cur,
177 kBufPerIteration);
178 FillMessage_Nested(msg.get());
179 }
180 Clobber(state);
181 }
182}
183
184static void BM_Protozero_Nested_SpeedOfLight(benchmark::State& state) {
185 while (state.KeepRunning()) {
186 SOLMsg* msg = new (g_cur) SOLMsg();
187 FillMessage_Nested(msg);
188 Clobber(state);
189 }
190}
191
192BENCHMARK(BM_Protozero_Simple_Libprotobuf);
193BENCHMARK(BM_Protozero_Simple_Protozero);
194BENCHMARK(BM_Protozero_Simple_SpeedOfLight);
195
196BENCHMARK(BM_Protozero_Nested_Libprotobuf);
197BENCHMARK(BM_Protozero_Nested_Protozero);
198BENCHMARK(BM_Protozero_Nested_SpeedOfLight);