blob: a4a578fa2913b7d92c4549fad6ae033b85583d2c [file] [log] [blame]
Allan MacKinnon4359d522018-06-19 13:57:04 -07001/*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can
5 * be found in the LICENSE file.
6 *
7 */
8
9//
10//
11//
12
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <assert.h>
17
18//
19//
20//
21
22#include "context.h"
23#include "block.h"
24#include "grid.h"
25#include "common/cl/assert_cl.h"
26#include "config_cl.h"
Allan MacKinnon4359d522018-06-19 13:57:04 -070027#include "runtime_cl_12.h"
28#include "export_cl_12.h"
29
30//
31//
32//
33
Allan MacKinnonc110e792018-06-21 09:09:56 -070034static
Allan MacKinnon4359d522018-06-19 13:57:04 -070035void
36skc_block_pool_create(struct skc_runtime * const runtime, cl_command_queue cq)
37{
38 // save size
39 runtime->block_pool.size = &runtime->config->block_pool;
40
41 // create block extent
42 skc_extent_pdrw_alloc(runtime,
43 &runtime->block_pool.blocks,
Allan MacKinnonc110e792018-06-21 09:09:56 -070044 runtime->block_pool.size->pool_size *
Allan MacKinnon4359d522018-06-19 13:57:04 -070045 runtime->config->block.bytes);
46
47 // allocate block pool ids
48 skc_extent_pdrw_alloc(runtime,
49 &runtime->block_pool.ids,
50 runtime->block_pool.size->ring_pow2 * sizeof(skc_uint));
51
52 // allocate block pool atomics
53 skc_extent_phr_pdrw_alloc(runtime,
54 &runtime->block_pool.atomics,
55 sizeof(union skc_block_pool_atomic));
56
57 // acquire pool id and atomic initialization kernels
58 cl_kernel k0 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS);
59 cl_kernel k1 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS);
60
61 // init ids
62 cl(SetKernelArg(k0,0,sizeof(runtime->block_pool.ids.drw),&runtime->block_pool.ids.drw));
63 cl(SetKernelArg(k0,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));
64
65 // the kernel grid is shaped by the target device -- always 2 for atomics
66 skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS,
67 cq,k0,runtime->block_pool.size->pool_size,
68 0,NULL,NULL);
69
70 // init atomics
71 cl(SetKernelArg(k1,0,sizeof(runtime->block_pool.atomics.drw),&runtime->block_pool.atomics.drw));
72 cl(SetKernelArg(k1,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));
73
74 // the kernel grid is shaped by the target device
75 skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS,
76 cq,k1,2,
77 0,NULL,NULL);
78
79 // kickstart kernel execution
80 cl(Flush(cq));
81
82 // release kernels
83 cl(ReleaseKernel(k0));
84 cl(ReleaseKernel(k1));
85}
86
Allan MacKinnonc110e792018-06-21 09:09:56 -070087static
Allan MacKinnon4359d522018-06-19 13:57:04 -070088void
89skc_block_pool_dispose(struct skc_runtime * const runtime)
90{
91 skc_extent_phr_pdrw_free(runtime,&runtime->block_pool.atomics);
92 skc_extent_pdrw_free (runtime,&runtime->block_pool.ids);
93 skc_extent_pdrw_free (runtime,&runtime->block_pool.blocks);
94}
95
96//
97//
98//
99
100static
101bool
102skc_runtime_yield(struct skc_runtime * const runtime)
103{
104 return skc_scheduler_yield(runtime->scheduler);
105}
106
107static
Allan MacKinnonc110e792018-06-21 09:09:56 -0700108void
Allan MacKinnon4359d522018-06-19 13:57:04 -0700109skc_runtime_wait(struct skc_runtime * const runtime)
110{
111 skc_scheduler_wait(runtime->scheduler);
112}
113
114//
115//
116//
117
118skc_err
119skc_runtime_cl_12_create(struct skc_context * const context,
Allan MacKinnonc110e792018-06-21 09:09:56 -0700120 cl_context context_cl,
121 cl_device_id device_id_cl)
Allan MacKinnon4359d522018-06-19 13:57:04 -0700122{
123 // allocate the runtime
124 struct skc_runtime * const runtime = malloc(sizeof(*runtime));
125
Allan MacKinnonc110e792018-06-21 09:09:56 -0700126 // save off CL objects
127 runtime->cl.context = context_cl;
128 runtime->cl.device_id = device_id_cl;
129
130 // query device alignment
131 cl_uint align_bits;
132
133 cl(GetDeviceInfo(device_id_cl,
134 CL_DEVICE_MEM_BASE_ADDR_ALIGN,
135 sizeof(align_bits),
136 &align_bits,
137 NULL));
138
139 runtime->cl.align_bytes = align_bits / 8;
Allan MacKinnon4359d522018-06-19 13:57:04 -0700140
141 // create device
142 skc_device_create(runtime);
143
144 // create the host and device allocators
145 skc_allocator_host_create(runtime);
146 skc_allocator_device_create(runtime);
147
148 // how many slots in the scheduler?
149 runtime->scheduler = skc_scheduler_create(runtime,runtime->config->scheduler.size);
150
151 // allocate deps structure
152 runtime->deps = skc_grid_deps_create(runtime,
153 runtime->scheduler,
154 runtime->config->block_pool.pool_size);
155
156 // initialize cq pool
157 skc_cq_pool_create(runtime,
158 &runtime->cq_pool,
Allan MacKinnonc110e792018-06-21 09:09:56 -0700159 runtime->config->cq_pool.cq_props,
Allan MacKinnon4359d522018-06-19 13:57:04 -0700160 runtime->config->cq_pool.size);
161
162 // acquire in-order cq
163 cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);
164
165 // initialize block pool
166 skc_block_pool_create(runtime,cq);
167
168 // intialize handle pool
169 skc_handle_pool_create(runtime,
170 &runtime->handle_pool,
171 runtime->config->handle_pool.size,
172 runtime->config->handle_pool.width,
173 runtime->config->handle_pool.recs);
174
175 //
176 // initialize pfns
177 //
178 // FIXME -- at this point we will have identified which device we've
179 // targeted and will load a DLL (or select from a built-in library)
180 // that contains all the pfns.
181 //
182 context->runtime = runtime;
183
184 context->yield = skc_runtime_yield;
185 context->wait = skc_runtime_wait;
Allan MacKinnonc110e792018-06-21 09:09:56 -0700186
Allan MacKinnon4359d522018-06-19 13:57:04 -0700187 context->path_builder = skc_path_builder_cl_12_create;
188 context->path_retain = skc_runtime_path_host_retain;
189 context->path_release = skc_runtime_path_host_release;
190 context->path_flush = skc_runtime_path_host_flush;
191
192 context->raster_builder = skc_raster_builder_cl_12_create;
193 context->raster_retain = skc_runtime_raster_host_retain;
194 context->raster_release = skc_runtime_raster_host_release;
195 context->raster_flush = skc_runtime_raster_host_flush;
196
197 context->composition = skc_composition_cl_12_create;
198 context->styling = skc_styling_cl_12_create;
Allan MacKinnonc110e792018-06-21 09:09:56 -0700199
Allan MacKinnon4359d522018-06-19 13:57:04 -0700200 context->surface = skc_surface_cl_12_create;
201
202 // block on pool creation
203 cl(Finish(cq));
204
205 // dispose of in-order cq
206 skc_runtime_release_cq_in_order(runtime,cq);
207
Allan MacKinnonc110e792018-06-21 09:09:56 -0700208 return SKC_ERR_SUCCESS;
Allan MacKinnon4359d522018-06-19 13:57:04 -0700209};
210
211//
212//
213//
214
215skc_err
216skc_runtime_cl_12_dispose(struct skc_context * const context)
217{
218 //
219 // FIXME -- incomplete
220 //
221 fprintf(stderr,"%s incomplete!\n",__func__);
222
223 struct skc_runtime * runtime = context->runtime;
224
225 skc_allocator_device_dispose(runtime);
226 skc_allocator_host_dispose(runtime);
227
228 skc_scheduler_dispose(context->runtime,context->runtime->scheduler);
229
230 skc_grid_deps_dispose(context->runtime->deps);
231
232 skc_cq_pool_dispose(runtime,&runtime->cq_pool);
233
234 skc_block_pool_dispose(context->runtime);
235
236 // skc_handle_pool_dispose(context->runtime);
Allan MacKinnonc110e792018-06-21 09:09:56 -0700237
Allan MacKinnon4359d522018-06-19 13:57:04 -0700238 return SKC_ERR_SUCCESS;
239}
240
241//
242// TEMPORARY BENCHMARK
243//
244
245#if 1
246
247#include <windows.h>
248
249#define SKC_FRAMES_MASK 0x7F
250#define SKC_FRAMES (SKC_FRAMES_MASK + 1)
251
252void
253skc_runtime_cl_12_debug(struct skc_context * const context)
254{
255#ifdef NDEBUG
256 static skc_uint frames=0;
257 static LARGE_INTEGER StartingTime={0}, EndingTime;
258
259 if ((frames++ & SKC_FRAMES_MASK) != SKC_FRAMES_MASK)
260 return;
261
262 QueryPerformanceCounter(&EndingTime);
Allan MacKinnonc110e792018-06-21 09:09:56 -0700263
Allan MacKinnon4359d522018-06-19 13:57:04 -0700264 LARGE_INTEGER ElapsedMicroseconds, Frequency;
265
266 ElapsedMicroseconds.QuadPart = EndingTime.QuadPart - StartingTime.QuadPart;
267
Allan MacKinnonc110e792018-06-21 09:09:56 -0700268 QueryPerformanceFrequency(&Frequency);
Allan MacKinnon4359d522018-06-19 13:57:04 -0700269
270 double const msecs_total = 1000.0 * ElapsedMicroseconds.QuadPart / Frequency.QuadPart;
271 double const msecs_frame = msecs_total / SKC_FRAMES;
272
273 printf("Frames / Total / Per : %u / %.3f / %.3f\n",
274 SKC_FRAMES,msecs_total,msecs_frame);
275#endif
276
277 struct skc_runtime * const runtime = context->runtime;
Allan MacKinnonc110e792018-06-21 09:09:56 -0700278
Allan MacKinnon4359d522018-06-19 13:57:04 -0700279 // acquire out-of-order cq
280 cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);
281
282 // copy atomics to host
283 skc_extent_phr_pdrw_read(&runtime->block_pool.atomics,cq,NULL);
284
285 // block until complete
286 cl(Finish(cq));
287
288 // dispose of out-of-order cq
289 skc_runtime_release_cq_in_order(runtime,cq);
290
291 union skc_block_pool_atomic const * const bp_atomic = runtime->block_pool.atomics.hr;
292
293 skc_uint const available = bp_atomic->writes - bp_atomic->reads;
294 skc_uint const inuse = runtime->config->block_pool.pool_size - available;
295
296 fprintf(stderr,"w/r/f/a: %9u - %9u = %9u : %6.2f MB\n",
297 bp_atomic->writes,
298 bp_atomic->reads,
299 available,
300 (inuse * runtime->config->block.bytes) / (1024.0*1024.0));
301
302 if (available >= (1<<27))
303 {
304 fprintf(stderr,"block pool corrupted!\n");
305 exit(-1);
306 }
307
308 //
309 //
310 //
311#ifdef NDEBUG
312 QueryPerformanceCounter(&StartingTime);
313#endif
314}
315
316#endif
317
318//
319//
320//