Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2017 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can |
| 5 | * be found in the LICENSE file. |
| 6 | * |
| 7 | */ |
| 8 | |
| 9 | // |
| 10 | // |
| 11 | // |
| 12 | |
| 13 | #include <stdio.h> |
| 14 | #include <stdlib.h> |
| 15 | #include <string.h> |
| 16 | #include <assert.h> |
| 17 | |
| 18 | // |
| 19 | // |
| 20 | // |
| 21 | |
| 22 | #include "context.h" |
| 23 | #include "block.h" |
| 24 | #include "grid.h" |
| 25 | #include "common/cl/assert_cl.h" |
| 26 | #include "config_cl.h" |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 27 | #include "runtime_cl_12.h" |
| 28 | #include "export_cl_12.h" |
| 29 | |
| 30 | // |
| 31 | // |
| 32 | // |
| 33 | |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 34 | static |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 35 | void |
| 36 | skc_block_pool_create(struct skc_runtime * const runtime, cl_command_queue cq) |
| 37 | { |
| 38 | // save size |
| 39 | runtime->block_pool.size = &runtime->config->block_pool; |
| 40 | |
| 41 | // create block extent |
| 42 | skc_extent_pdrw_alloc(runtime, |
| 43 | &runtime->block_pool.blocks, |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 44 | runtime->block_pool.size->pool_size * |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 45 | runtime->config->block.bytes); |
| 46 | |
| 47 | // allocate block pool ids |
| 48 | skc_extent_pdrw_alloc(runtime, |
| 49 | &runtime->block_pool.ids, |
| 50 | runtime->block_pool.size->ring_pow2 * sizeof(skc_uint)); |
| 51 | |
| 52 | // allocate block pool atomics |
| 53 | skc_extent_phr_pdrw_alloc(runtime, |
| 54 | &runtime->block_pool.atomics, |
| 55 | sizeof(union skc_block_pool_atomic)); |
| 56 | |
| 57 | // acquire pool id and atomic initialization kernels |
| 58 | cl_kernel k0 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS); |
| 59 | cl_kernel k1 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS); |
| 60 | |
| 61 | // init ids |
| 62 | cl(SetKernelArg(k0,0,sizeof(runtime->block_pool.ids.drw),&runtime->block_pool.ids.drw)); |
| 63 | cl(SetKernelArg(k0,1,SKC_CL_ARG(runtime->block_pool.size->pool_size))); |
| 64 | |
| 65 | // the kernel grid is shaped by the target device -- always 2 for atomics |
| 66 | skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS, |
| 67 | cq,k0,runtime->block_pool.size->pool_size, |
| 68 | 0,NULL,NULL); |
| 69 | |
| 70 | // init atomics |
| 71 | cl(SetKernelArg(k1,0,sizeof(runtime->block_pool.atomics.drw),&runtime->block_pool.atomics.drw)); |
| 72 | cl(SetKernelArg(k1,1,SKC_CL_ARG(runtime->block_pool.size->pool_size))); |
| 73 | |
| 74 | // the kernel grid is shaped by the target device |
| 75 | skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS, |
| 76 | cq,k1,2, |
| 77 | 0,NULL,NULL); |
| 78 | |
| 79 | // kickstart kernel execution |
| 80 | cl(Flush(cq)); |
| 81 | |
| 82 | // release kernels |
| 83 | cl(ReleaseKernel(k0)); |
| 84 | cl(ReleaseKernel(k1)); |
| 85 | } |
| 86 | |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 87 | static |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 88 | void |
| 89 | skc_block_pool_dispose(struct skc_runtime * const runtime) |
| 90 | { |
| 91 | skc_extent_phr_pdrw_free(runtime,&runtime->block_pool.atomics); |
| 92 | skc_extent_pdrw_free (runtime,&runtime->block_pool.ids); |
| 93 | skc_extent_pdrw_free (runtime,&runtime->block_pool.blocks); |
| 94 | } |
| 95 | |
| 96 | // |
| 97 | // |
| 98 | // |
| 99 | |
| 100 | static |
| 101 | bool |
| 102 | skc_runtime_yield(struct skc_runtime * const runtime) |
| 103 | { |
| 104 | return skc_scheduler_yield(runtime->scheduler); |
| 105 | } |
| 106 | |
| 107 | static |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 108 | void |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 109 | skc_runtime_wait(struct skc_runtime * const runtime) |
| 110 | { |
| 111 | skc_scheduler_wait(runtime->scheduler); |
| 112 | } |
| 113 | |
| 114 | // |
| 115 | // |
| 116 | // |
| 117 | |
| 118 | skc_err |
| 119 | skc_runtime_cl_12_create(struct skc_context * const context, |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 120 | cl_context context_cl, |
| 121 | cl_device_id device_id_cl) |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 122 | { |
| 123 | // allocate the runtime |
| 124 | struct skc_runtime * const runtime = malloc(sizeof(*runtime)); |
| 125 | |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 126 | // save off CL objects |
| 127 | runtime->cl.context = context_cl; |
| 128 | runtime->cl.device_id = device_id_cl; |
| 129 | |
| 130 | // query device alignment |
| 131 | cl_uint align_bits; |
| 132 | |
| 133 | cl(GetDeviceInfo(device_id_cl, |
| 134 | CL_DEVICE_MEM_BASE_ADDR_ALIGN, |
| 135 | sizeof(align_bits), |
| 136 | &align_bits, |
| 137 | NULL)); |
| 138 | |
| 139 | runtime->cl.align_bytes = align_bits / 8; |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 140 | |
| 141 | // create device |
| 142 | skc_device_create(runtime); |
| 143 | |
| 144 | // create the host and device allocators |
| 145 | skc_allocator_host_create(runtime); |
| 146 | skc_allocator_device_create(runtime); |
| 147 | |
| 148 | // how many slots in the scheduler? |
| 149 | runtime->scheduler = skc_scheduler_create(runtime,runtime->config->scheduler.size); |
| 150 | |
| 151 | // allocate deps structure |
| 152 | runtime->deps = skc_grid_deps_create(runtime, |
| 153 | runtime->scheduler, |
| 154 | runtime->config->block_pool.pool_size); |
| 155 | |
| 156 | // initialize cq pool |
| 157 | skc_cq_pool_create(runtime, |
| 158 | &runtime->cq_pool, |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 159 | runtime->config->cq_pool.cq_props, |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 160 | runtime->config->cq_pool.size); |
| 161 | |
| 162 | // acquire in-order cq |
| 163 | cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime); |
| 164 | |
| 165 | // initialize block pool |
| 166 | skc_block_pool_create(runtime,cq); |
| 167 | |
| 168 | // intialize handle pool |
| 169 | skc_handle_pool_create(runtime, |
| 170 | &runtime->handle_pool, |
| 171 | runtime->config->handle_pool.size, |
| 172 | runtime->config->handle_pool.width, |
| 173 | runtime->config->handle_pool.recs); |
| 174 | |
| 175 | // |
| 176 | // initialize pfns |
| 177 | // |
| 178 | // FIXME -- at this point we will have identified which device we've |
| 179 | // targeted and will load a DLL (or select from a built-in library) |
| 180 | // that contains all the pfns. |
| 181 | // |
| 182 | context->runtime = runtime; |
| 183 | |
| 184 | context->yield = skc_runtime_yield; |
| 185 | context->wait = skc_runtime_wait; |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 186 | |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 187 | context->path_builder = skc_path_builder_cl_12_create; |
| 188 | context->path_retain = skc_runtime_path_host_retain; |
| 189 | context->path_release = skc_runtime_path_host_release; |
| 190 | context->path_flush = skc_runtime_path_host_flush; |
| 191 | |
| 192 | context->raster_builder = skc_raster_builder_cl_12_create; |
| 193 | context->raster_retain = skc_runtime_raster_host_retain; |
| 194 | context->raster_release = skc_runtime_raster_host_release; |
| 195 | context->raster_flush = skc_runtime_raster_host_flush; |
| 196 | |
| 197 | context->composition = skc_composition_cl_12_create; |
| 198 | context->styling = skc_styling_cl_12_create; |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 199 | |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 200 | context->surface = skc_surface_cl_12_create; |
| 201 | |
| 202 | // block on pool creation |
| 203 | cl(Finish(cq)); |
| 204 | |
| 205 | // dispose of in-order cq |
| 206 | skc_runtime_release_cq_in_order(runtime,cq); |
| 207 | |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 208 | return SKC_ERR_SUCCESS; |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 209 | }; |
| 210 | |
| 211 | // |
| 212 | // |
| 213 | // |
| 214 | |
| 215 | skc_err |
| 216 | skc_runtime_cl_12_dispose(struct skc_context * const context) |
| 217 | { |
| 218 | // |
| 219 | // FIXME -- incomplete |
| 220 | // |
| 221 | fprintf(stderr,"%s incomplete!\n",__func__); |
| 222 | |
| 223 | struct skc_runtime * runtime = context->runtime; |
| 224 | |
| 225 | skc_allocator_device_dispose(runtime); |
| 226 | skc_allocator_host_dispose(runtime); |
| 227 | |
| 228 | skc_scheduler_dispose(context->runtime,context->runtime->scheduler); |
| 229 | |
| 230 | skc_grid_deps_dispose(context->runtime->deps); |
| 231 | |
| 232 | skc_cq_pool_dispose(runtime,&runtime->cq_pool); |
| 233 | |
| 234 | skc_block_pool_dispose(context->runtime); |
| 235 | |
| 236 | // skc_handle_pool_dispose(context->runtime); |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 237 | |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 238 | return SKC_ERR_SUCCESS; |
| 239 | } |
| 240 | |
| 241 | // |
| 242 | // TEMPORARY BENCHMARK |
| 243 | // |
| 244 | |
| 245 | #if 1 |
| 246 | |
| 247 | #include <windows.h> |
| 248 | |
| 249 | #define SKC_FRAMES_MASK 0x7F |
| 250 | #define SKC_FRAMES (SKC_FRAMES_MASK + 1) |
| 251 | |
| 252 | void |
| 253 | skc_runtime_cl_12_debug(struct skc_context * const context) |
| 254 | { |
| 255 | #ifdef NDEBUG |
| 256 | static skc_uint frames=0; |
| 257 | static LARGE_INTEGER StartingTime={0}, EndingTime; |
| 258 | |
| 259 | if ((frames++ & SKC_FRAMES_MASK) != SKC_FRAMES_MASK) |
| 260 | return; |
| 261 | |
| 262 | QueryPerformanceCounter(&EndingTime); |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 263 | |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 264 | LARGE_INTEGER ElapsedMicroseconds, Frequency; |
| 265 | |
| 266 | ElapsedMicroseconds.QuadPart = EndingTime.QuadPart - StartingTime.QuadPart; |
| 267 | |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 268 | QueryPerformanceFrequency(&Frequency); |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 269 | |
| 270 | double const msecs_total = 1000.0 * ElapsedMicroseconds.QuadPart / Frequency.QuadPart; |
| 271 | double const msecs_frame = msecs_total / SKC_FRAMES; |
| 272 | |
| 273 | printf("Frames / Total / Per : %u / %.3f / %.3f\n", |
| 274 | SKC_FRAMES,msecs_total,msecs_frame); |
| 275 | #endif |
| 276 | |
| 277 | struct skc_runtime * const runtime = context->runtime; |
Allan MacKinnon | c110e79 | 2018-06-21 09:09:56 -0700 | [diff] [blame^] | 278 | |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 279 | // acquire out-of-order cq |
| 280 | cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime); |
| 281 | |
| 282 | // copy atomics to host |
| 283 | skc_extent_phr_pdrw_read(&runtime->block_pool.atomics,cq,NULL); |
| 284 | |
| 285 | // block until complete |
| 286 | cl(Finish(cq)); |
| 287 | |
| 288 | // dispose of out-of-order cq |
| 289 | skc_runtime_release_cq_in_order(runtime,cq); |
| 290 | |
| 291 | union skc_block_pool_atomic const * const bp_atomic = runtime->block_pool.atomics.hr; |
| 292 | |
| 293 | skc_uint const available = bp_atomic->writes - bp_atomic->reads; |
| 294 | skc_uint const inuse = runtime->config->block_pool.pool_size - available; |
| 295 | |
| 296 | fprintf(stderr,"w/r/f/a: %9u - %9u = %9u : %6.2f MB\n", |
| 297 | bp_atomic->writes, |
| 298 | bp_atomic->reads, |
| 299 | available, |
| 300 | (inuse * runtime->config->block.bytes) / (1024.0*1024.0)); |
| 301 | |
| 302 | if (available >= (1<<27)) |
| 303 | { |
| 304 | fprintf(stderr,"block pool corrupted!\n"); |
| 305 | exit(-1); |
| 306 | } |
| 307 | |
| 308 | // |
| 309 | // |
| 310 | // |
| 311 | #ifdef NDEBUG |
| 312 | QueryPerformanceCounter(&StartingTime); |
| 313 | #endif |
| 314 | } |
| 315 | |
| 316 | #endif |
| 317 | |
| 318 | // |
| 319 | // |
| 320 | // |