Benoit Jacob | a0ba3ac | 2019-04-08 12:00:37 -0400 | [diff] [blame^] | 1 | #include "blocking_counter.h" |
| 2 | |
| 3 | #include <chrono> |
| 4 | #include <thread> |
| 5 | |
| 6 | #include "check_macros.h" |
| 7 | #include "time.h" |
| 8 | |
| 9 | namespace ruy { |
| 10 | |
| 11 | static constexpr float kBlockingCounterMaxBusyWaitSeconds = 2e-3f; |
| 12 | |
| 13 | void BlockingCounter::Reset(std::size_t initial_count) { |
| 14 | std::size_t old_count_value = count_.load(std::memory_order_relaxed); |
| 15 | RUY_DCHECK_EQ(old_count_value, 0); |
| 16 | (void)old_count_value; |
| 17 | count_.store(initial_count, std::memory_order_release); |
| 18 | } |
| 19 | |
| 20 | bool BlockingCounter::DecrementCount() { |
| 21 | std::size_t old_count_value = count_.fetch_sub(1, std::memory_order_acq_rel); |
| 22 | RUY_DCHECK_GT(old_count_value, 0); |
| 23 | std::size_t count_value = old_count_value - 1; |
| 24 | return count_value == 0; |
| 25 | } |
| 26 | |
| 27 | void BlockingCounter::Wait() { |
| 28 | // Busy-wait until the count value is 0. |
| 29 | const std::int64_t wait_duration = static_cast<std::int64_t>( |
| 30 | TimeFrequency() * kBlockingCounterMaxBusyWaitSeconds); |
| 31 | std::int64_t wait_end = TimeNowRelaxed() + wait_duration; |
| 32 | |
| 33 | while (count_.load(std::memory_order_acquire)) { |
| 34 | if (TimeNowRelaxed() > wait_end) { |
| 35 | // If we are unlucky, the blocking thread (that calls DecrementCount) |
| 36 | // and the blocked thread (here, calling Wait) may be scheduled on |
| 37 | // the same CPU, so the busy-waiting of the present thread may prevent |
| 38 | // the blocking thread from resuming and unblocking. |
| 39 | // If we are even unluckier, the priorities of the present thread |
| 40 | // might be higher than that of the blocking thread, so just yielding |
| 41 | // wouldn't allow the blocking thread to resume. So we sleep for |
| 42 | // a substantial amount of time in that case. Notice that we only |
| 43 | // do so after having busy-waited for kMaxBusyWaitNOPs, which is |
| 44 | // typically several milliseconds, so sleeping 1 more millisecond |
| 45 | // isn't terrible at that point. |
| 46 | // |
| 47 | // How this is mitigated in practice: |
| 48 | // In practice, it is well known that the application should be |
| 49 | // conservative in choosing how many threads to tell gemmlowp to use, |
| 50 | // as it's hard to know how many CPU cores it will get to run on, |
| 51 | // on typical mobile devices. |
| 52 | // It seems impossible for gemmlowp to make this choice automatically, |
| 53 | // which is why gemmlowp's default is to use only 1 thread, and |
| 54 | // applications may override that if they know that they can count on |
| 55 | // using more than that. |
| 56 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); |
| 57 | wait_end = TimeNowRelaxed() + wait_duration; |
| 58 | } |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | } // namespace ruy |