arm_compute v18.11
diff --git a/tests/framework/instruments/OpenCLTimer.cpp b/tests/framework/instruments/OpenCLTimer.cpp
index 4391c43..2fd5714 100644
--- a/tests/framework/instruments/OpenCLTimer.cpp
+++ b/tests/framework/instruments/OpenCLTimer.cpp
@@ -39,12 +39,21 @@
{
namespace framework
{
-std::string OpenCLTimer::id() const
+template <bool output_timestamps>
+std::string OpenCLClock<output_timestamps>::id() const
{
- return "OpenCLTimer";
+ if(output_timestamps)
+ {
+ return "OpenCLTimestamps";
+ }
+ else
+ {
+ return "OpenCLTimer";
+ }
}
-OpenCLTimer::OpenCLTimer(ScaleFactor scale_factor)
+template <bool output_timestamps>
+OpenCLClock<output_timestamps>::OpenCLClock(ScaleFactor scale_factor)
: _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix(), _timer_enabled(false)
{
auto q = CLScheduler::get().queue();
@@ -77,7 +86,8 @@
}
}
-void OpenCLTimer::test_start()
+template <bool output_timestamps>
+void OpenCLClock<output_timestamps>::test_start()
{
// Start intercepting enqueues:
ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
@@ -100,9 +110,9 @@
ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
ARM_COMPUTE_UNUSED(event);
- OpenCLTimer::kernel_info info;
- cl::Kernel cpp_kernel(kernel, true);
- std::stringstream ss;
+ kernel_info info;
+ cl::Kernel cpp_kernel(kernel, true);
+ std::stringstream ss;
ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
if(gws != nullptr)
{
@@ -144,17 +154,20 @@
graph::TaskExecutor::get().execute_function = task_interceptor;
}
-void OpenCLTimer::start()
+template <bool output_timestamps>
+void OpenCLClock<output_timestamps>::start()
{
_kernels.clear();
_timer_enabled = true;
}
-void OpenCLTimer::stop()
+template <bool output_timestamps>
+void OpenCLClock<output_timestamps>::stop()
{
_timer_enabled = false;
}
-void OpenCLTimer::test_stop()
+template <bool output_timestamps>
+void OpenCLClock<output_timestamps>::test_stop()
{
// Restore real function
CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
@@ -163,20 +176,66 @@
_real_function = nullptr;
}
-Instrument::MeasurementsMap OpenCLTimer::measurements() const
+template <bool output_timestamps>
+Instrument::MeasurementsMap OpenCLClock<output_timestamps>::measurements() const
{
MeasurementsMap measurements;
unsigned int kernel_number = 0;
for(auto kernel : _kernels)
{
- cl_ulong start = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
- cl_ulong end = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
+ cl_ulong queued, flushed, start, end;
+ kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued);
+ kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &flushed);
+ kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
+ kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
+ std::string name = kernel.name + " #" + support::cpp11::to_string(kernel_number++);
- measurements.emplace(kernel.name + " #" + support::cpp11::to_string(kernel_number++), Measurement((end - start) / _scale_factor, _unit));
+ if(output_timestamps)
+ {
+ measurements.emplace("[start]" + name, Measurement(start / static_cast<cl_ulong>(_scale_factor), _unit));
+ measurements.emplace("[queued]" + name, Measurement(queued / static_cast<cl_ulong>(_scale_factor), _unit));
+ measurements.emplace("[flushed]" + name, Measurement(flushed / static_cast<cl_ulong>(_scale_factor), _unit));
+ measurements.emplace("[end]" + name, Measurement(end / static_cast<cl_ulong>(_scale_factor), _unit));
+ }
+ else
+ {
+ measurements.emplace(name, Measurement((end - start) / _scale_factor, _unit));
+ }
}
return measurements;
}
+
+template <bool output_timestamps>
+Instrument::MeasurementsMap OpenCLClock<output_timestamps>::test_measurements() const
+{
+ MeasurementsMap measurements;
+
+ if(output_timestamps)
+ {
+ // The OpenCL clock and the wall clock are not in sync, so we use
+ // this trick to calculate the offset between the two clocks:
+ ::cl::Event event;
+ cl_ulong now_gpu;
+
+ // Enqueue retrieve current CPU clock and enqueue a dummy marker
+ std::chrono::high_resolution_clock::time_point now_cpu = std::chrono::high_resolution_clock::now();
+ CLScheduler::get().queue().enqueueMarker(&event);
+
+ CLScheduler::get().queue().finish();
+ //Access the time at which the marker was enqueued:
+ event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &now_gpu);
+
+ measurements.emplace("Now Wall clock", Measurement(now_cpu.time_since_epoch().count() / 1000, "us"));
+ measurements.emplace("Now OpenCL", Measurement(now_gpu / static_cast<cl_ulong>(_scale_factor), _unit));
+ }
+
+ return measurements;
+}
+
} // namespace framework
} // namespace test
} // namespace arm_compute
+
+template class arm_compute::test::framework::OpenCLClock<true>;
+template class arm_compute::test::framework::OpenCLClock<false>;