| /* |
| * Copyright 2017 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can |
| * be found in the LICENSE file. |
| * |
| */ |
| |
| // |
| // |
| // |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <assert.h> |
| |
| // |
| // |
| // |
| |
| #include "context.h" |
| #include "block.h" |
| #include "grid.h" |
| #include "common/cl/assert_cl.h" |
| #include "config_cl.h" |
| #include "runtime_cl_12.h" |
| #include "export_cl_12.h" |
| |
| // |
| // |
| // |
| |
| static |
| void |
| skc_block_pool_create(struct skc_runtime * const runtime, cl_command_queue cq) |
| { |
| // save size |
| runtime->block_pool.size = &runtime->config->block_pool; |
| |
| // create block extent |
| skc_extent_pdrw_alloc(runtime, |
| &runtime->block_pool.blocks, |
| runtime->block_pool.size->pool_size * |
| runtime->config->block.bytes); |
| |
| // allocate block pool ids |
| skc_extent_pdrw_alloc(runtime, |
| &runtime->block_pool.ids, |
| runtime->block_pool.size->ring_pow2 * sizeof(skc_uint)); |
| |
| // allocate block pool atomics |
| skc_extent_phr_pdrw_alloc(runtime, |
| &runtime->block_pool.atomics, |
| sizeof(union skc_block_pool_atomic)); |
| |
| // acquire pool id and atomic initialization kernels |
| cl_kernel k0 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS); |
| cl_kernel k1 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS); |
| |
| // init ids |
| cl(SetKernelArg(k0,0,sizeof(runtime->block_pool.ids.drw),&runtime->block_pool.ids.drw)); |
| cl(SetKernelArg(k0,1,SKC_CL_ARG(runtime->block_pool.size->pool_size))); |
| |
| // the kernel grid is shaped by the target device -- always 2 for atomics |
| skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS, |
| cq,k0,runtime->block_pool.size->pool_size, |
| 0,NULL,NULL); |
| |
| // init atomics |
| cl(SetKernelArg(k1,0,sizeof(runtime->block_pool.atomics.drw),&runtime->block_pool.atomics.drw)); |
| cl(SetKernelArg(k1,1,SKC_CL_ARG(runtime->block_pool.size->pool_size))); |
| |
| // the kernel grid is shaped by the target device |
| skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS, |
| cq,k1,2, |
| 0,NULL,NULL); |
| |
| // kickstart kernel execution |
| cl(Flush(cq)); |
| |
| // release kernels |
| cl(ReleaseKernel(k0)); |
| cl(ReleaseKernel(k1)); |
| } |
| |
| static |
| void |
| skc_block_pool_dispose(struct skc_runtime * const runtime) |
| { |
| skc_extent_phr_pdrw_free(runtime,&runtime->block_pool.atomics); |
| skc_extent_pdrw_free (runtime,&runtime->block_pool.ids); |
| skc_extent_pdrw_free (runtime,&runtime->block_pool.blocks); |
| } |
| |
| // |
| // |
| // |
| |
| static |
| bool |
| skc_runtime_yield(struct skc_runtime * const runtime) |
| { |
| return skc_scheduler_yield(runtime->scheduler); |
| } |
| |
| static |
| void |
| skc_runtime_wait(struct skc_runtime * const runtime) |
| { |
| skc_scheduler_wait(runtime->scheduler); |
| } |
| |
| // |
| // |
| // |
| |
| skc_err |
| skc_runtime_cl_12_create(struct skc_context * const context, |
| cl_context context_cl, |
| cl_device_id device_id_cl) |
| { |
| // allocate the runtime |
| struct skc_runtime * const runtime = malloc(sizeof(*runtime)); |
| |
| // save off CL objects |
| runtime->cl.context = context_cl; |
| runtime->cl.device_id = device_id_cl; |
| |
| // query device alignment |
| cl_uint align_bits; |
| |
| cl(GetDeviceInfo(device_id_cl, |
| CL_DEVICE_MEM_BASE_ADDR_ALIGN, |
| sizeof(align_bits), |
| &align_bits, |
| NULL)); |
| |
| runtime->cl.align_bytes = align_bits / 8; |
| |
| // create device |
| skc_device_create(runtime); |
| |
| // create the host and device allocators |
| skc_allocator_host_create(runtime); |
| skc_allocator_device_create(runtime); |
| |
| // how many slots in the scheduler? |
| runtime->scheduler = skc_scheduler_create(runtime,runtime->config->scheduler.size); |
| |
| // allocate deps structure |
| runtime->deps = skc_grid_deps_create(runtime, |
| runtime->scheduler, |
| runtime->config->block_pool.pool_size); |
| |
| // initialize cq pool |
| skc_cq_pool_create(runtime, |
| &runtime->cq_pool, |
| runtime->config->cq_pool.cq_props, |
| runtime->config->cq_pool.size); |
| |
| // acquire in-order cq |
| cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime); |
| |
| // initialize block pool |
| skc_block_pool_create(runtime,cq); |
| |
| // intialize handle pool |
| skc_handle_pool_create(runtime, |
| &runtime->handle_pool, |
| runtime->config->handle_pool.size, |
| runtime->config->handle_pool.width, |
| runtime->config->handle_pool.recs); |
| |
| // |
| // initialize pfns |
| // |
| // FIXME -- at this point we will have identified which device we've |
| // targeted and will load a DLL (or select from a built-in library) |
| // that contains all the pfns. |
| // |
| context->runtime = runtime; |
| |
| context->yield = skc_runtime_yield; |
| context->wait = skc_runtime_wait; |
| |
| context->path_builder = skc_path_builder_cl_12_create; |
| context->path_retain = skc_runtime_path_host_retain; |
| context->path_release = skc_runtime_path_host_release; |
| context->path_flush = skc_runtime_path_host_flush; |
| |
| context->raster_builder = skc_raster_builder_cl_12_create; |
| context->raster_retain = skc_runtime_raster_host_retain; |
| context->raster_release = skc_runtime_raster_host_release; |
| context->raster_flush = skc_runtime_raster_host_flush; |
| |
| context->composition = skc_composition_cl_12_create; |
| context->styling = skc_styling_cl_12_create; |
| |
| context->surface = skc_surface_cl_12_create; |
| |
| // block on pool creation |
| cl(Finish(cq)); |
| |
| // dispose of in-order cq |
| skc_runtime_release_cq_in_order(runtime,cq); |
| |
| return SKC_ERR_SUCCESS; |
| }; |
| |
| // |
| // |
| // |
| |
| skc_err |
| skc_runtime_cl_12_dispose(struct skc_context * const context) |
| { |
| // |
| // FIXME -- incomplete |
| // |
| fprintf(stderr,"%s incomplete!\n",__func__); |
| |
| struct skc_runtime * runtime = context->runtime; |
| |
| skc_allocator_device_dispose(runtime); |
| skc_allocator_host_dispose(runtime); |
| |
| skc_scheduler_dispose(context->runtime,context->runtime->scheduler); |
| |
| skc_grid_deps_dispose(context->runtime->deps); |
| |
| skc_cq_pool_dispose(runtime,&runtime->cq_pool); |
| |
| skc_block_pool_dispose(context->runtime); |
| |
| // skc_handle_pool_dispose(context->runtime); |
| |
| return SKC_ERR_SUCCESS; |
| } |
| |
| // |
| // REPORT BLOCK POOL ALLOCATION |
| // |
| |
| void |
| skc_runtime_cl_12_debug(struct skc_context * const context) |
| { |
| struct skc_runtime * const runtime = context->runtime; |
| |
| // acquire out-of-order cq |
| cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime); |
| |
| // copy atomics to host |
| skc_extent_phr_pdrw_read(&runtime->block_pool.atomics,cq,NULL); |
| |
| // block until complete |
| cl(Finish(cq)); |
| |
| // dispose of out-of-order cq |
| skc_runtime_release_cq_in_order(runtime,cq); |
| |
| union skc_block_pool_atomic const * const bp_atomic = runtime->block_pool.atomics.hr; |
| |
| skc_uint const available = bp_atomic->writes - bp_atomic->reads; |
| skc_uint const inuse = runtime->config->block_pool.pool_size - available; |
| |
| fprintf(stderr, |
| "writes/reads/avail/alloc: %9u / %9u / %9u = %6.2f MB / %9u = %6.2f MB\n", |
| bp_atomic->writes, |
| bp_atomic->reads, |
| available, |
| (available * runtime->config->block.bytes) / (1024.0*1024.0), |
| inuse, |
| (inuse * runtime->config->block.bytes) / (1024.0*1024.0)); |
| } |
| |
| // |
| // |
| // |
| |