blob: ab69732559d7266a703a36f8bb58d4328f527a94 [file] [log] [blame]
Benoit Jacoba0ba3ac2019-04-08 12:00:37 -04001#include "blocking_counter.h"
2
3#include <chrono>
4#include <thread>
5
6#include "check_macros.h"
7#include "time.h"
8
9namespace ruy {
10
11static constexpr float kBlockingCounterMaxBusyWaitSeconds = 2e-3f;
12
13void BlockingCounter::Reset(std::size_t initial_count) {
14 std::size_t old_count_value = count_.load(std::memory_order_relaxed);
15 RUY_DCHECK_EQ(old_count_value, 0);
16 (void)old_count_value;
17 count_.store(initial_count, std::memory_order_release);
18}
19
20bool BlockingCounter::DecrementCount() {
21 std::size_t old_count_value = count_.fetch_sub(1, std::memory_order_acq_rel);
22 RUY_DCHECK_GT(old_count_value, 0);
23 std::size_t count_value = old_count_value - 1;
24 return count_value == 0;
25}
26
27void BlockingCounter::Wait() {
28 // Busy-wait until the count value is 0.
29 const std::int64_t wait_duration = static_cast<std::int64_t>(
30 TimeFrequency() * kBlockingCounterMaxBusyWaitSeconds);
31 std::int64_t wait_end = TimeNowRelaxed() + wait_duration;
32
33 while (count_.load(std::memory_order_acquire)) {
34 if (TimeNowRelaxed() > wait_end) {
35 // If we are unlucky, the blocking thread (that calls DecrementCount)
36 // and the blocked thread (here, calling Wait) may be scheduled on
37 // the same CPU, so the busy-waiting of the present thread may prevent
38 // the blocking thread from resuming and unblocking.
39 // If we are even unluckier, the priorities of the present thread
40 // might be higher than that of the blocking thread, so just yielding
41 // wouldn't allow the blocking thread to resume. So we sleep for
42 // a substantial amount of time in that case. Notice that we only
43 // do so after having busy-waited for kMaxBusyWaitNOPs, which is
44 // typically several milliseconds, so sleeping 1 more millisecond
45 // isn't terrible at that point.
46 //
47 // How this is mitigated in practice:
48 // In practice, it is well known that the application should be
49 // conservative in choosing how many threads to tell gemmlowp to use,
50 // as it's hard to know how many CPU cores it will get to run on,
51 // on typical mobile devices.
52 // It seems impossible for gemmlowp to make this choice automatically,
53 // which is why gemmlowp's default is to use only 1 thread, and
54 // applications may override that if they know that they can count on
55 // using more than that.
56 std::this_thread::sleep_for(std::chrono::milliseconds(1));
57 wait_end = TimeNowRelaxed() + wait_duration;
58 }
59 }
60}
61
62} // namespace ruy