blob: 072e0cf3a251f83b75b26e2a79d98d5ea7d0a86e [file] [log] [blame]
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_CONTEXT_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_CONTEXT_H_
#include <memory>
#include <vector>
#include "allocator.h"
#include "path.h"
#include "thread_pool.h"
#include "trace.h"
#include "tune.h"
namespace ruy {
// The state private to each Ruy thread.
struct PerThreadState {
// Each thread may be running on a different microarchitecture. For example,
// some threads may be on big cores, while others are on little cores. Thus,
// it's best for the tuning to be per-thread.
TuningResolver tuning_resolver;
// Each thread has its own local allocator.
Allocator allocator;
};
// A Context holds runtime information used by Ruy. It holds runtime resources
// such as the workers thread pool and the allocator (which holds buffers for
// temporary data), as well as runtime options controlling which Paths are
// enabled (typically based on which instruction sets are detected) and how
// many threads to use.
struct Context final {
Path last_taken_path = Path::kNone;
Tuning explicit_tuning = Tuning::kAuto;
// Allocator for main thread work before invoking the threadpool.
// Our simple Allocator does not allow reserving/allocating more blocks
// while it's already in committed state, so the main thread needs both
// this allocator, and its per-thread allocator.
std::unique_ptr<Allocator> main_allocator;
ThreadPool workers_pool;
int max_num_threads = 1;
// State for each thread in the thread pool. Entry 0 is the main thread.
std::vector<std::unique_ptr<PerThreadState>> per_thread_states;
TracingContext tracing;
void EnsureNPerThreadStates(int thread_count) {
while (per_thread_states.size() < thread_count) {
per_thread_states.emplace_back(new PerThreadState);
}
}
void SetRuntimeEnabledPaths(Path paths);
Path GetRuntimeEnabledPaths();
private:
Path runtime_enabled_paths_ = Path::kNone;
};
} // end namespace ruy
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_CONTEXT_H_