blob: d1a9631301862981cf8175e7b359f2127d5f9a1e [file] [log] [blame]
Jim Cownie33f7b242014-04-09 15:40:23 +00001//===----------------------------------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9
10
11#ifndef OFFLOAD_ENGINE_H_INCLUDED
12#define OFFLOAD_ENGINE_H_INCLUDED
13
14#include <limits.h>
15
16#include <list>
17#include <set>
18#include <map>
19#include "offload_common.h"
20#include "coi/coi_client.h"
21
22// Address range
23class MemRange {
24public:
25 MemRange() : m_start(0), m_length(0) {}
26 MemRange(const void *addr, uint64_t len) : m_start(addr), m_length(len) {}
27
28 const void* start() const {
29 return m_start;
30 }
31
32 const void* end() const {
33 return static_cast<const char*>(m_start) + m_length;
34 }
35
36 uint64_t length() const {
37 return m_length;
38 }
39
40 // returns true if given range overlaps with another one
41 bool overlaps(const MemRange &o) const {
42 // Two address ranges A[start, end) and B[start,end) overlap
43 // if A.start < B.end and A.end > B.start.
44 return start() < o.end() && end() > o.start();
45 }
46
47 // returns true if given range contains the other range
48 bool contains(const MemRange &o) const {
49 return start() <= o.start() && o.end() <= end();
50 }
51
52private:
53 const void* m_start;
54 uint64_t m_length;
55};
56
57// Data associated with a pointer variable
58class PtrData {
59public:
60 PtrData(const void *addr, uint64_t len) :
61 cpu_addr(addr, len), cpu_buf(0),
62 mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0),
63 ref_count(0), is_static(false)
64 {}
65
66 //
67 // Copy constructor
68 //
69 PtrData(const PtrData& ptr):
70 cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf),
71 mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp),
72 mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset),
73 ref_count(ptr.ref_count), is_static(ptr.is_static)
74 {}
75
76 bool operator<(const PtrData &o) const {
77 // Variables are sorted by the CPU start address.
78 // Overlapping memory ranges are considered equal.
79 return (cpu_addr.start() < o.cpu_addr.start()) &&
80 !cpu_addr.overlaps(o.cpu_addr);
81 }
82
83 long add_reference() {
84 if (is_static) {
85 return LONG_MAX;
86 }
87#ifndef TARGET_WINNT
88 return __sync_fetch_and_add(&ref_count, 1);
89#else // TARGET_WINNT
90 return _InterlockedIncrement(&ref_count) - 1;
91#endif // TARGET_WINNT
92 }
93
94 long remove_reference() {
95 if (is_static) {
96 return LONG_MAX;
97 }
98#ifndef TARGET_WINNT
99 return __sync_sub_and_fetch(&ref_count, 1);
100#else // TARGET_WINNT
101 return _InterlockedDecrement(&ref_count);
102#endif // TARGET_WINNT
103 }
104
105 long get_reference() const {
106 if (is_static) {
107 return LONG_MAX;
108 }
109 return ref_count;
110 }
111
112public:
113 // CPU address range
114 const MemRange cpu_addr;
115
116 // CPU and MIC buffers
117 COIBUFFER cpu_buf;
118 COIBUFFER mic_buf;
119
120 // placeholder for buffer address on mic
121 uint64_t mic_addr;
122
123 uint64_t alloc_disp;
124
125 // additional offset to pointer data on MIC for improving bandwidth for
126 // data which is not 4K aligned
127 uint32_t mic_offset;
128
129 // if true buffers are created from static memory
130 bool is_static;
131 mutex_t alloc_ptr_data_lock;
132
133private:
134 // reference count for the entry
135 long ref_count;
136};
137
138typedef std::list<PtrData*> PtrDataList;
139
140// Data associated with automatic variable
141class AutoData {
142public:
143 AutoData(const void *addr, uint64_t len) :
144 cpu_addr(addr, len), ref_count(0)
145 {}
146
147 bool operator<(const AutoData &o) const {
148 // Variables are sorted by the CPU start address.
149 // Overlapping memory ranges are considered equal.
150 return (cpu_addr.start() < o.cpu_addr.start()) &&
151 !cpu_addr.overlaps(o.cpu_addr);
152 }
153
154 long add_reference() {
155#ifndef TARGET_WINNT
156 return __sync_fetch_and_add(&ref_count, 1);
157#else // TARGET_WINNT
158 return _InterlockedIncrement(&ref_count) - 1;
159#endif // TARGET_WINNT
160 }
161
162 long remove_reference() {
163#ifndef TARGET_WINNT
164 return __sync_sub_and_fetch(&ref_count, 1);
165#else // TARGET_WINNT
166 return _InterlockedDecrement(&ref_count);
167#endif // TARGET_WINNT
168 }
169
170 long get_reference() const {
171 return ref_count;
172 }
173
174public:
175 // CPU address range
176 const MemRange cpu_addr;
177
178private:
179 // reference count for the entry
180 long ref_count;
181};
182
183// Set of autimatic variables
184typedef std::set<AutoData> AutoSet;
185
186// Target image data
187struct TargetImage
188{
189 TargetImage(const char *_name, const void *_data, uint64_t _size,
190 const char *_origin, uint64_t _offset) :
191 name(_name), data(_data), size(_size),
192 origin(_origin), offset(_offset)
193 {}
194
195 // library name
196 const char* name;
197
198 // contents and size
199 const void* data;
200 uint64_t size;
201
202 // file of origin and offset within that file
203 const char* origin;
204 uint64_t offset;
205};
206
207typedef std::list<TargetImage> TargetImageList;
208
209// Data associated with persistent auto objects
210struct PersistData
211{
212 PersistData(const void *addr, uint64_t routine_num, uint64_t size) :
213 stack_cpu_addr(addr), routine_id(routine_num)
214 {
215 stack_ptr_data = new PtrData(0, size);
216 }
Alp Tokerc2d5e612014-06-01 18:28:36 +0000217 // 1-st key value - beginning of the stack at CPU
Jim Cownie33f7b242014-04-09 15:40:23 +0000218 const void * stack_cpu_addr;
219 // 2-nd key value - identifier of routine invocation at CPU
220 uint64_t routine_id;
221 // corresponded PtrData; only stack_ptr_data->mic_buf is used
222 PtrData * stack_ptr_data;
223 // used to get offset of the variable in stack buffer
224 char * cpu_stack_addr;
225};
226
227typedef std::list<PersistData> PersistDataList;
228
229// class representing a single engine
230struct Engine {
231 friend void __offload_init_library_once(void);
232 friend void __offload_fini_library(void);
233
234#define check_result(res, tag, ...) \
235 { \
236 if (res == COI_PROCESS_DIED) { \
237 fini_process(true); \
238 exit(1); \
239 } \
240 if (res != COI_SUCCESS) { \
241 __liboffload_error_support(tag, __VA_ARGS__); \
242 exit(1); \
243 } \
244 }
245
246 int get_logical_index() const {
247 return m_index;
248 }
249
250 int get_physical_index() const {
251 return m_physical_index;
252 }
253
254 const COIPROCESS& get_process() const {
255 return m_process;
256 }
257
258 // initialize device
259 void init(void);
260
261 // add new library
262 void add_lib(const TargetImage &lib)
263 {
264 m_lock.lock();
265 m_ready = false;
266 m_images.push_back(lib);
267 m_lock.unlock();
268 }
269
270 COIRESULT compute(
271 const std::list<COIBUFFER> &buffers,
272 const void* data,
273 uint16_t data_size,
274 void* ret,
275 uint16_t ret_size,
276 uint32_t num_deps,
277 const COIEVENT* deps,
278 COIEVENT* event
279 );
280
281#ifdef MYO_SUPPORT
282 // temporary workaround for blocking behavior for myoiLibInit/Fini calls
283 void init_myo(COIEVENT *event) {
284 COIRESULT res;
285 res = COI::PipelineRunFunction(get_pipeline(),
286 m_funcs[c_func_myo_init],
287 0, 0, 0, 0, 0, 0, 0, 0, 0,
288 event);
289 check_result(res, c_pipeline_run_func, m_index, res);
290 }
291
292 void fini_myo(COIEVENT *event) {
293 COIRESULT res;
294 res = COI::PipelineRunFunction(get_pipeline(),
295 m_funcs[c_func_myo_fini],
296 0, 0, 0, 0, 0, 0, 0, 0, 0,
297 event);
298 check_result(res, c_pipeline_run_func, m_index, res);
299 }
300#endif // MYO_SUPPORT
301
302 //
303 // Memory association table
304 //
305 PtrData* find_ptr_data(const void *ptr) {
306 m_ptr_lock.lock();
307 PtrSet::iterator res = m_ptr_set.find(PtrData(ptr, 0));
308 m_ptr_lock.unlock();
309 if (res == m_ptr_set.end()) {
310 return 0;
311 }
312 return const_cast<PtrData*>(res.operator->());
313 }
314
315 PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
316 m_ptr_lock.lock();
317 std::pair<PtrSet::iterator, bool> res =
318 m_ptr_set.insert(PtrData(ptr, len));
319 PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
320 m_ptr_lock.unlock();
321
322 is_new = res.second;
323 if (is_new) {
324 // It's necessary to lock as soon as possible.
325 // unlock must be done at call site of insert_ptr_data at
326 // branch for is_new
327 ptr_data->alloc_ptr_data_lock.lock();
328 }
329 return ptr_data;
330 }
331
332 void remove_ptr_data(const void *ptr) {
333 m_ptr_lock.lock();
334 m_ptr_set.erase(PtrData(ptr, 0));
335 m_ptr_lock.unlock();
336 }
337
338 //
339 // Automatic variables
340 //
341 AutoData* find_auto_data(const void *ptr) {
342 AutoSet &auto_vars = get_auto_vars();
343 AutoSet::iterator res = auto_vars.find(AutoData(ptr, 0));
344 if (res == auto_vars.end()) {
345 return 0;
346 }
347 return const_cast<AutoData*>(res.operator->());
348 }
349
350 AutoData* insert_auto_data(const void *ptr, uint64_t len) {
351 AutoSet &auto_vars = get_auto_vars();
352 std::pair<AutoSet::iterator, bool> res =
353 auto_vars.insert(AutoData(ptr, len));
354 return const_cast<AutoData*>(res.first.operator->());
355 }
356
357 void remove_auto_data(const void *ptr) {
358 get_auto_vars().erase(AutoData(ptr, 0));
359 }
360
361 //
362 // Signals
363 //
364 void add_signal(const void *signal, OffloadDescriptor *desc) {
365 m_signal_lock.lock();
366 m_signal_map[signal] = desc;
367 m_signal_lock.unlock();
368 }
369
370 OffloadDescriptor* find_signal(const void *signal, bool remove) {
371 OffloadDescriptor *desc = 0;
372
373 m_signal_lock.lock();
374 {
375 SignalMap::iterator it = m_signal_map.find(signal);
376 if (it != m_signal_map.end()) {
377 desc = it->second;
378 if (remove) {
379 m_signal_map.erase(it);
380 }
381 }
382 }
383 m_signal_lock.unlock();
384
385 return desc;
386 }
387
388 // stop device process
389 void fini_process(bool verbose);
390
391 // list of stacks active at the engine
392 PersistDataList m_persist_list;
393
394private:
395 Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false),
396 m_proc_number(0)
397 {}
398
399 ~Engine() {
400 if (m_process != 0) {
401 fini_process(false);
402 }
403 }
404
405 // set indexes
406 void set_indexes(int logical_index, int physical_index) {
407 m_index = logical_index;
408 m_physical_index = physical_index;
409 }
410
411 // start process on device
412 void init_process();
413
414 void load_libraries(void);
415 void init_ptr_data(void);
416
417 // performs library intialization on the device side
418 pid_t init_device(void);
419
420private:
421 // get pipeline associated with a calling thread
422 COIPIPELINE get_pipeline(void);
423
424 // get automatic vars set associated with the calling thread
425 AutoSet& get_auto_vars(void);
426
427 // destructor for thread data
428 static void destroy_thread_data(void *data);
429
430private:
431 typedef std::set<PtrData> PtrSet;
432 typedef std::map<const void*, OffloadDescriptor*> SignalMap;
433
434 // device indexes
435 int m_index;
436 int m_physical_index;
437
438 // number of COI pipes created for the engine
439 long m_proc_number;
440
441 // process handle
442 COIPROCESS m_process;
443
444 // If false, device either has not been initialized or new libraries
445 // have been added.
446 bool m_ready;
447 mutex_t m_lock;
448
449 // List of libraries to be loaded
450 TargetImageList m_images;
451
452 // var table
453 PtrSet m_ptr_set;
454 mutex_t m_ptr_lock;
455
456 // signals
457 SignalMap m_signal_map;
458 mutex_t m_signal_lock;
459
460 // constants for accessing device function handles
461 enum {
462 c_func_compute = 0,
463#ifdef MYO_SUPPORT
464 c_func_myo_init,
465 c_func_myo_fini,
466#endif // MYO_SUPPORT
467 c_func_init,
468 c_func_var_table_size,
469 c_func_var_table_copy,
470 c_funcs_total
471 };
472 static const char* m_func_names[c_funcs_total];
473
474 // device function handles
475 COIFUNCTION m_funcs[c_funcs_total];
476
477 // int -> name mapping for device signals
478 static const int c_signal_max = 32;
479 static const char* c_signal_names[c_signal_max];
480};
481
482#endif // OFFLOAD_ENGINE_H_INCLUDED