blob: c11fe9bbf23f71d5c536e060dec64534f620ffad [file] [log] [blame]
Jared Duke13689fe2019-04-16 16:22:07 -04001/* Copyright 2019 Google LLC. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040016#include "trace.h"
17
18#include <algorithm>
Alex Stark8221a672019-08-07 17:41:19 -040019#include <cerrno> // IWYU pragma: keep
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040020#include <cstdio>
Alex Stark8221a672019-08-07 17:41:19 -040021#include <cstdlib>
Benoit Jacob14d1d8a2019-06-06 16:41:42 -040022#include <string>
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040023#include <vector>
24
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040025#include "check_macros.h"
Benoit Jacob2756b862019-07-25 15:15:39 -040026#include "side_pair.h"
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040027#include "time.h"
28
29namespace ruy {
30
31#ifdef RUY_TRACE
32
Benoit Jacob2756b862019-07-25 15:15:39 -040033enum class TraceEvent : std::uint8_t {
34 kNone,
35 kThreadStart,
36 kThreadLoopStart,
37 kThreadEnd,
38 kBlockReserved,
39 kBlockPackedLhs,
40 kBlockPackedRhs,
41 kBlockFinished
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040042};
43
Benoit Jacob2756b862019-07-25 15:15:39 -040044struct TraceEntry {
45 TimePoint time_point;
46 TraceEvent event;
47 // ruy-internal thread id i.e. contiguous index into array of threads,
48 // with 0 designating the main thread.
49 std::uint16_t thread_id = 0;
50 // Additional parameters whose meaning depends on the 'event' type.
51 std::uint32_t params[1];
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040052};
53
54struct Trace {
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040055 BlockMap block_map;
Benoit Jacob2756b862019-07-25 15:15:39 -040056 // During recording, to avoid having to use locks or atomics, we let
57 // each thread append to its own specific vector.
58 std::vector<std::vector<TraceEntry>> thread_specific_entries;
59 // Global vector of entries into which we coalesce thread_specific_entries
60 // after recording is finished, when dumping a trace. See
61 // AggregateThreadSpecificEntries.
62 std::vector<TraceEntry> entries;
Benoit Jacob842bfaf2019-04-11 10:43:28 -040063 TimePoint time_start;
64 TimePoint time_execute;
65 TimePoint time_end;
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040066};
67
Benoit Jacob2756b862019-07-25 15:15:39 -040068namespace {
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040069
Benoit Jacob2756b862019-07-25 15:15:39 -040070// Coalesce Trace::thread_specific_entries into Trace::entries.
71void AggregateThreadSpecificEntries(Trace* trace) {
72 RUY_CHECK(trace->entries.empty());
73 for (auto& thread_specific_entries_vector : trace->thread_specific_entries) {
74 for (const TraceEntry& entry : thread_specific_entries_vector) {
75 trace->entries.push_back(entry);
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040076 }
Benoit Jacob2756b862019-07-25 15:15:39 -040077 thread_specific_entries_vector.clear();
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040078 }
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -040079}
80
Benoit Jacob2756b862019-07-25 15:15:39 -040081// Sort Trace::entries by ascending time. In case of equal timepoints,
82// sort by some semi-arbitrary ordering of event types.
83void Sort(Trace* trace) {
84 std::sort(std::begin(trace->entries), std::end(trace->entries),
85 [](const TraceEntry& a, const TraceEntry& b) -> bool {
86 return a.time_point < b.time_point ||
87 (a.time_point == b.time_point &&
88 static_cast<int>(a.event) < static_cast<int>(b.event));
89 });
90}
91
92// Dump a trace. Assumes that AggregateThreadSpecificEntries and Sort have
93// already been called on it.
Alex Stark8221a672019-08-07 17:41:19 -040094//
95// On some architectures long long ints are not same as std::int64_t, and
96// time is printed as %lld, so static_casts are necessary.
Benoit Jacob2756b862019-07-25 15:15:39 -040097void Dump(const Trace& trace) {
98 const char* trace_filename = getenv("RUY_TRACE_FILE");
99 FILE* trace_file = trace_filename ? fopen(trace_filename, "w") : stderr;
100 if (!trace_file) {
101 fprintf(stderr, "Failed to open %s for write, errno=%d\n", trace_filename,
102 errno);
103 RUY_CHECK(false);
104 }
Benoit Jacobf5851162019-09-19 16:16:33 -0400105 fprintf(trace_file, "thread_count:%d\n", trace.block_map.thread_count);
Benoit Jacob2756b862019-07-25 15:15:39 -0400106 fprintf(trace_file, "rows:%d\n", trace.block_map.dims[Side::kLhs]);
107 fprintf(trace_file, "cols:%d\n", trace.block_map.dims[Side::kRhs]);
Alex Stark8221a672019-08-07 17:41:19 -0400108 fprintf(trace_file, "Execute: %lld\n",
109 static_cast<long long int>(
110 ToInt64Nanoseconds(trace.time_execute - trace.time_start)));
Benoit Jacob2756b862019-07-25 15:15:39 -0400111 for (const TraceEntry& entry : trace.entries) {
Alex Stark8221a672019-08-07 17:41:19 -0400112 long long int time = static_cast<long long int>(
113 ToInt64Nanoseconds(entry.time_point - trace.time_start));
Benoit Jacob2756b862019-07-25 15:15:39 -0400114 switch (entry.event) {
115 case TraceEvent::kThreadStart:
Alex Stark8221a672019-08-07 17:41:19 -0400116 fprintf(trace_file, "ThreadStart: %lld, %d\n", time, entry.thread_id);
Benoit Jacob2756b862019-07-25 15:15:39 -0400117 break;
118 case TraceEvent::kThreadLoopStart:
Alex Stark8221a672019-08-07 17:41:19 -0400119 fprintf(trace_file, "ThreadLoopStart: %lld, %d\n", time,
Benoit Jacob2756b862019-07-25 15:15:39 -0400120 entry.thread_id);
121 break;
122 case TraceEvent::kThreadEnd:
Alex Stark8221a672019-08-07 17:41:19 -0400123 fprintf(trace_file, "ThreadEnd: %lld, %d\n", time, entry.thread_id);
Benoit Jacob2756b862019-07-25 15:15:39 -0400124 break;
125 case TraceEvent::kBlockReserved: {
126 std::uint32_t block_id = entry.params[0];
127 SidePair<int> block;
128 GetBlockByIndex(trace.block_map, block_id, &block);
129 SidePair<int> start, end;
130 GetBlockMatrixCoords(trace.block_map, block, &start, &end);
131 fprintf(trace_file,
Alex Stark8221a672019-08-07 17:41:19 -0400132 "BlockReserved: %lld, %d, %d, %d, %d, %d, %d, %d, %d\n", time,
Benoit Jacob2756b862019-07-25 15:15:39 -0400133 entry.thread_id, block_id, block[Side::kLhs], block[Side::kRhs],
134 start[Side::kLhs], start[Side::kRhs], end[Side::kLhs],
135 end[Side::kRhs]);
136 break;
137 }
138 case TraceEvent::kBlockPackedLhs: {
139 std::uint32_t block = entry.params[0];
140 int start, end;
141 GetBlockMatrixCoords(Side::kLhs, trace.block_map, block, &start, &end);
Alex Stark8221a672019-08-07 17:41:19 -0400142 fprintf(trace_file, "BlockPackedLhs: %lld, %d, %d, %d, %d\n", time,
Benoit Jacob2756b862019-07-25 15:15:39 -0400143 entry.thread_id, block, start, end);
144 break;
145 }
146 case TraceEvent::kBlockPackedRhs: {
147 std::uint32_t block = entry.params[0];
148 int start, end;
149 GetBlockMatrixCoords(Side::kRhs, trace.block_map, block, &start, &end);
Alex Stark8221a672019-08-07 17:41:19 -0400150 fprintf(trace_file, "BlockPackedRhs: %lld, %d, %d, %d, %d\n", time,
Benoit Jacob2756b862019-07-25 15:15:39 -0400151 entry.thread_id, block, start, end);
152 break;
153 }
154 case TraceEvent::kBlockFinished: {
155 std::uint32_t block_id = entry.params[0];
156 SidePair<int> block;
157 GetBlockByIndex(trace.block_map, block_id, &block);
Alex Stark8221a672019-08-07 17:41:19 -0400158 fprintf(trace_file, "BlockFinished: %lld, %d, %d, %d, %d\n", time,
Benoit Jacob2756b862019-07-25 15:15:39 -0400159 entry.thread_id, block_id, block[Side::kLhs],
160 block[Side::kRhs]);
161 break;
162 }
163 default:
164 RUY_CHECK(false);
165 }
166 }
Alex Stark8221a672019-08-07 17:41:19 -0400167 fprintf(trace_file, "End: %lld\n",
168 static_cast<long long int>(
169 ToInt64Nanoseconds(trace.time_end - trace.time_start)));
Benoit Jacob2756b862019-07-25 15:15:39 -0400170 if (trace_filename) {
171 fclose(trace_file);
172 }
173}
174
175} // anonymous namespace
176
177// Get a Trace object to record to, or null of tracing is not enabled.
Benoit Jacob842bfaf2019-04-11 10:43:28 -0400178Trace* NewTraceOrNull(TracingContext* tracing, int rows, int depth, int cols) {
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400179 if (!tracing->initialized) {
180 tracing->initialized = true;
181 tracing->enabled = getenv("RUY_TRACE");
182 if (!tracing->enabled) {
183 return nullptr;
184 }
185 if (getenv("RUY_TRACE_FILTER_ROWS")) {
186 tracing->filter_shape_rows = std::stoi(getenv("RUY_TRACE_FILTER_ROWS"));
187 }
188 if (getenv("RUY_TRACE_FILTER_DEPTH")) {
189 tracing->filter_shape_depth = std::stoi(getenv("RUY_TRACE_FILTER_DEPTH"));
190 }
191 if (getenv("RUY_TRACE_FILTER_COLS")) {
192 tracing->filter_shape_cols = std::stoi(getenv("RUY_TRACE_FILTER_COLS"));
193 }
194 }
195 if (!tracing->enabled) {
196 return nullptr;
197 }
198 if (tracing->filter_shape_rows && rows != tracing->filter_shape_rows) {
199 return nullptr;
200 }
201 if (tracing->filter_shape_depth && depth != tracing->filter_shape_depth) {
202 return nullptr;
203 }
204 if (tracing->filter_shape_cols && cols != tracing->filter_shape_cols) {
205 return nullptr;
206 }
Benoit Jacob842bfaf2019-04-11 10:43:28 -0400207 // Delete any existing trace.
208 delete tracing->trace;
209 // Create a new one.
210 tracing->trace = new Trace;
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400211 return tracing->trace;
212}
213
Benoit Jacob2756b862019-07-25 15:15:39 -0400214// The trace recorded on a context is finalized and dumped by
215// this TracingContext destructor.
216//
217// The idea of dumping on context destructor is that typically one wants to
218// run many matrix multiplications, e.g. to hit a steady state in terms of
219// performance characteristics, but only trace the last repetition of the
220// workload, when that steady state was attained.
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400221TracingContext::~TracingContext() {
222 if (trace) {
Benoit Jacob2756b862019-07-25 15:15:39 -0400223 AggregateThreadSpecificEntries(trace);
224 Sort(trace);
225 Dump(*trace);
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400226 }
227 delete trace;
228}
229
Benoit Jacob2756b862019-07-25 15:15:39 -0400230void TraceRecordStart(Trace* trace) {
231 if (trace) {
Benoit Jacob4ac87c72019-07-30 13:23:08 -0400232 trace->time_start = Now();
Benoit Jacob2756b862019-07-25 15:15:39 -0400233 }
234}
235
Benoit Jacobf5851162019-09-19 16:16:33 -0400236void TraceRecordExecute(const BlockMap& block_map, Trace* trace) {
Benoit Jacob2756b862019-07-25 15:15:39 -0400237 if (trace) {
Benoit Jacob4ac87c72019-07-30 13:23:08 -0400238 trace->time_execute = Now();
Benoit Jacob2756b862019-07-25 15:15:39 -0400239 trace->block_map = block_map;
Benoit Jacobf5851162019-09-19 16:16:33 -0400240 trace->thread_specific_entries.resize(block_map.thread_count);
241 for (int thread = 0; thread < block_map.thread_count; thread++) {
Benoit Jacob2756b862019-07-25 15:15:39 -0400242 trace->thread_specific_entries[thread].clear();
243 // Reserve some large size to avoid frequent heap allocations
244 // affecting the recorded timings.
245 trace->thread_specific_entries[thread].reserve(16384);
246 }
247 }
248}
249
250void TraceRecordEnd(Trace* trace) {
251 if (trace) {
Benoit Jacob4ac87c72019-07-30 13:23:08 -0400252 trace->time_end = Now();
Benoit Jacob2756b862019-07-25 15:15:39 -0400253 }
254}
255
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400256void TraceRecordThreadStart(std::uint32_t thread_id, Trace* trace) {
257 if (trace) {
Benoit Jacob2756b862019-07-25 15:15:39 -0400258 TraceEntry entry;
259 entry.event = TraceEvent::kThreadStart;
Benoit Jacob4ac87c72019-07-30 13:23:08 -0400260 entry.time_point = Now();
Benoit Jacob2756b862019-07-25 15:15:39 -0400261 entry.thread_id = thread_id;
262 trace->thread_specific_entries[thread_id].push_back(entry);
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400263 }
264}
265
266void TraceRecordThreadLoopStart(std::uint32_t thread_id, Trace* trace) {
267 if (trace) {
Benoit Jacob2756b862019-07-25 15:15:39 -0400268 TraceEntry entry;
269 entry.event = TraceEvent::kThreadLoopStart;
Benoit Jacob4ac87c72019-07-30 13:23:08 -0400270 entry.time_point = Now();
Benoit Jacob2756b862019-07-25 15:15:39 -0400271 entry.thread_id = thread_id;
272 trace->thread_specific_entries[thread_id].push_back(entry);
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400273 }
274}
275
276void TraceRecordBlockReserved(std::uint32_t thread_id, std::uint32_t block_id,
277 Trace* trace) {
278 if (trace) {
Benoit Jacob2756b862019-07-25 15:15:39 -0400279 TraceEntry entry;
280 entry.event = TraceEvent::kBlockReserved;
Benoit Jacob4ac87c72019-07-30 13:23:08 -0400281 entry.time_point = Now();
Benoit Jacob2756b862019-07-25 15:15:39 -0400282 entry.thread_id = thread_id;
283 entry.params[0] = block_id;
284 trace->thread_specific_entries[thread_id].push_back(entry);
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400285 }
286}
287
Benoit Jacob2756b862019-07-25 15:15:39 -0400288void TraceRecordBlockPacked(std::uint32_t thread_id, Side side, int block,
289 Trace* trace) {
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400290 if (trace) {
Benoit Jacob2756b862019-07-25 15:15:39 -0400291 TraceEntry entry;
292 entry.event = side == Side::kLhs ? TraceEvent::kBlockPackedLhs
293 : TraceEvent::kBlockPackedRhs;
Benoit Jacob4ac87c72019-07-30 13:23:08 -0400294 entry.time_point = Now();
Benoit Jacob2756b862019-07-25 15:15:39 -0400295 entry.thread_id = thread_id;
296 entry.params[0] = block;
297 trace->thread_specific_entries[thread_id].push_back(entry);
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400298 }
299}
300
Benoit Jacob2756b862019-07-25 15:15:39 -0400301void TraceRecordBlockFinished(std::uint32_t thread_id, std::uint32_t block_id,
302 Trace* trace) {
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400303 if (trace) {
Benoit Jacob2756b862019-07-25 15:15:39 -0400304 TraceEntry entry;
305 entry.event = TraceEvent::kBlockFinished;
Benoit Jacob4ac87c72019-07-30 13:23:08 -0400306 entry.time_point = Now();
Benoit Jacob2756b862019-07-25 15:15:39 -0400307 entry.thread_id = thread_id;
308 entry.params[0] = block_id;
309 trace->thread_specific_entries[thread_id].push_back(entry);
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400310 }
311}
312
313void TraceRecordThreadEnd(std::uint32_t thread_id, Trace* trace) {
314 if (trace) {
Benoit Jacob2756b862019-07-25 15:15:39 -0400315 TraceEntry entry;
316 entry.event = TraceEvent::kThreadEnd;
Benoit Jacob4ac87c72019-07-30 13:23:08 -0400317 entry.time_point = Now();
Benoit Jacob2756b862019-07-25 15:15:39 -0400318 entry.thread_id = thread_id;
319 trace->thread_specific_entries[thread_id].push_back(entry);
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -0400320 }
321}
322
323#endif
324
325} // namespace ruy