| /* |
| * Copyright 2013-2017 Advanced Micro Devices, Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| * |
| */ |
| |
| #include <libsync.h> |
| |
| #include "util/os_time.h" |
| #include "util/u_memory.h" |
| #include "util/u_queue.h" |
| |
| #include "si_pipe.h" |
| |
| struct si_multi_fence { |
| struct pipe_reference reference; |
| struct pipe_fence_handle *gfx; |
| struct pipe_fence_handle *sdma; |
| struct tc_unflushed_batch_token *tc_token; |
| struct util_queue_fence ready; |
| |
| /* If the context wasn't flushed at fence creation, this is non-NULL. */ |
| struct { |
| struct r600_common_context *ctx; |
| unsigned ib_index; |
| } gfx_unflushed; |
| }; |
| |
| static void si_add_fence_dependency(struct r600_common_context *rctx, |
| struct pipe_fence_handle *fence) |
| { |
| struct radeon_winsys *ws = rctx->ws; |
| |
| if (rctx->dma.cs) |
| ws->cs_add_fence_dependency(rctx->dma.cs, fence); |
| ws->cs_add_fence_dependency(rctx->gfx.cs, fence); |
| } |
| |
| static void si_fence_reference(struct pipe_screen *screen, |
| struct pipe_fence_handle **dst, |
| struct pipe_fence_handle *src) |
| { |
| struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws; |
| struct si_multi_fence **rdst = (struct si_multi_fence **)dst; |
| struct si_multi_fence *rsrc = (struct si_multi_fence *)src; |
| |
| if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { |
| ws->fence_reference(&(*rdst)->gfx, NULL); |
| ws->fence_reference(&(*rdst)->sdma, NULL); |
| tc_unflushed_batch_token_reference(&(*rdst)->tc_token, NULL); |
| FREE(*rdst); |
| } |
| *rdst = rsrc; |
| } |
| |
| static struct si_multi_fence *si_create_multi_fence() |
| { |
| struct si_multi_fence *fence = CALLOC_STRUCT(si_multi_fence); |
| if (!fence) |
| return NULL; |
| |
| pipe_reference_init(&fence->reference, 1); |
| util_queue_fence_init(&fence->ready); |
| |
| return fence; |
| } |
| |
| struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx, |
| struct tc_unflushed_batch_token *tc_token) |
| { |
| struct si_multi_fence *fence = si_create_multi_fence(); |
| if (!fence) |
| return NULL; |
| |
| util_queue_fence_reset(&fence->ready); |
| tc_unflushed_batch_token_reference(&fence->tc_token, tc_token); |
| |
| return (struct pipe_fence_handle *)fence; |
| } |
| |
| static void si_fence_server_sync(struct pipe_context *ctx, |
| struct pipe_fence_handle *fence) |
| { |
| struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
| struct si_multi_fence *rfence = (struct si_multi_fence *)fence; |
| |
| util_queue_fence_wait(&rfence->ready); |
| |
| /* Unflushed fences from the same context are no-ops. */ |
| if (rfence->gfx_unflushed.ctx && |
| rfence->gfx_unflushed.ctx == rctx) |
| return; |
| |
| /* All unflushed commands will not start execution before |
| * this fence dependency is signalled. |
| * |
| * Should we flush the context to allow more GPU parallelism? |
| */ |
| if (rfence->sdma) |
| si_add_fence_dependency(rctx, rfence->sdma); |
| if (rfence->gfx) |
| si_add_fence_dependency(rctx, rfence->gfx); |
| } |
| |
| static boolean si_fence_finish(struct pipe_screen *screen, |
| struct pipe_context *ctx, |
| struct pipe_fence_handle *fence, |
| uint64_t timeout) |
| { |
| struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; |
| struct si_multi_fence *rfence = (struct si_multi_fence *)fence; |
| struct r600_common_context *rctx; |
| int64_t abs_timeout = os_time_get_absolute_timeout(timeout); |
| |
| ctx = threaded_context_unwrap_sync(ctx); |
| rctx = ctx ? (struct r600_common_context*)ctx : NULL; |
| |
| if (!util_queue_fence_is_signalled(&rfence->ready)) { |
| if (!timeout) |
| return false; |
| |
| if (rfence->tc_token) { |
| /* Ensure that si_flush_from_st will be called for |
| * this fence, but only if we're in the API thread |
| * where the context is current. |
| * |
| * Note that the batch containing the flush may already |
| * be in flight in the driver thread, so the fence |
| * may not be ready yet when this call returns. |
| */ |
| threaded_context_flush(ctx, rfence->tc_token); |
| } |
| |
| if (timeout == PIPE_TIMEOUT_INFINITE) { |
| util_queue_fence_wait(&rfence->ready); |
| } else { |
| if (!util_queue_fence_wait_timeout(&rfence->ready, abs_timeout)) |
| return false; |
| } |
| } |
| |
| if (rfence->sdma) { |
| if (!rws->fence_wait(rws, rfence->sdma, timeout)) |
| return false; |
| |
| /* Recompute the timeout after waiting. */ |
| if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { |
| int64_t time = os_time_get_nano(); |
| timeout = abs_timeout > time ? abs_timeout - time : 0; |
| } |
| } |
| |
| if (!rfence->gfx) |
| return true; |
| |
| /* Flush the gfx IB if it hasn't been flushed yet. */ |
| if (rctx && |
| rfence->gfx_unflushed.ctx == rctx && |
| rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { |
| rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL); |
| rfence->gfx_unflushed.ctx = NULL; |
| |
| if (!timeout) |
| return false; |
| |
| /* Recompute the timeout after all that. */ |
| if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { |
| int64_t time = os_time_get_nano(); |
| timeout = abs_timeout > time ? abs_timeout - time : 0; |
| } |
| } |
| |
| return rws->fence_wait(rws, rfence->gfx, timeout); |
| } |
| |
| static void si_create_fence_fd(struct pipe_context *ctx, |
| struct pipe_fence_handle **pfence, int fd) |
| { |
| struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; |
| struct radeon_winsys *ws = rscreen->ws; |
| struct si_multi_fence *rfence; |
| |
| *pfence = NULL; |
| |
| if (!rscreen->info.has_sync_file) |
| return; |
| |
| rfence = si_create_multi_fence(); |
| if (!rfence) |
| return; |
| |
| rfence->gfx = ws->fence_import_sync_file(ws, fd); |
| if (!rfence->gfx) { |
| FREE(rfence); |
| return; |
| } |
| |
| *pfence = (struct pipe_fence_handle*)rfence; |
| } |
| |
| static int si_fence_get_fd(struct pipe_screen *screen, |
| struct pipe_fence_handle *fence) |
| { |
| struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
| struct radeon_winsys *ws = rscreen->ws; |
| struct si_multi_fence *rfence = (struct si_multi_fence *)fence; |
| int gfx_fd = -1, sdma_fd = -1; |
| |
| if (!rscreen->info.has_sync_file) |
| return -1; |
| |
| util_queue_fence_wait(&rfence->ready); |
| |
| /* Deferred fences aren't supported. */ |
| assert(!rfence->gfx_unflushed.ctx); |
| if (rfence->gfx_unflushed.ctx) |
| return -1; |
| |
| if (rfence->sdma) { |
| sdma_fd = ws->fence_export_sync_file(ws, rfence->sdma); |
| if (sdma_fd == -1) |
| return -1; |
| } |
| if (rfence->gfx) { |
| gfx_fd = ws->fence_export_sync_file(ws, rfence->gfx); |
| if (gfx_fd == -1) { |
| if (sdma_fd != -1) |
| close(sdma_fd); |
| return -1; |
| } |
| } |
| |
| /* If we don't have FDs at this point, it means we don't have fences |
| * either. */ |
| if (sdma_fd == -1) |
| return gfx_fd; |
| if (gfx_fd == -1) |
| return sdma_fd; |
| |
| /* Get a fence that will be a combination of both fences. */ |
| sync_accumulate("radeonsi", &gfx_fd, sdma_fd); |
| close(sdma_fd); |
| return gfx_fd; |
| } |
| |
| static void si_flush_from_st(struct pipe_context *ctx, |
| struct pipe_fence_handle **fence, |
| unsigned flags) |
| { |
| struct pipe_screen *screen = ctx->screen; |
| struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
| struct radeon_winsys *ws = rctx->ws; |
| struct pipe_fence_handle *gfx_fence = NULL; |
| struct pipe_fence_handle *sdma_fence = NULL; |
| bool deferred_fence = false; |
| unsigned rflags = RADEON_FLUSH_ASYNC; |
| |
| if (flags & PIPE_FLUSH_END_OF_FRAME) |
| rflags |= RADEON_FLUSH_END_OF_FRAME; |
| |
| /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ |
| if (rctx->dma.cs) |
| rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL); |
| |
| if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) { |
| if (fence) |
| ws->fence_reference(&gfx_fence, rctx->last_gfx_fence); |
| if (!(flags & PIPE_FLUSH_DEFERRED)) |
| ws->cs_sync_flush(rctx->gfx.cs); |
| } else { |
| /* Instead of flushing, create a deferred fence. Constraints: |
| * - The state tracker must allow a deferred flush. |
| * - The state tracker must request a fence. |
| * - fence_get_fd is not allowed. |
| * Thread safety in fence_finish must be ensured by the state tracker. |
| */ |
| if (flags & PIPE_FLUSH_DEFERRED && |
| !(flags & PIPE_FLUSH_FENCE_FD) && |
| fence) { |
| gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs); |
| deferred_fence = true; |
| } else { |
| rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL); |
| } |
| } |
| |
| /* Both engines can signal out of order, so we need to keep both fences. */ |
| if (fence) { |
| struct si_multi_fence *multi_fence; |
| |
| if (flags & TC_FLUSH_ASYNC) { |
| multi_fence = (struct si_multi_fence *)*fence; |
| assert(multi_fence); |
| } else { |
| multi_fence = si_create_multi_fence(); |
| if (!multi_fence) { |
| ws->fence_reference(&sdma_fence, NULL); |
| ws->fence_reference(&gfx_fence, NULL); |
| goto finish; |
| } |
| |
| screen->fence_reference(screen, fence, NULL); |
| *fence = (struct pipe_fence_handle*)multi_fence; |
| } |
| |
| /* If both fences are NULL, fence_finish will always return true. */ |
| multi_fence->gfx = gfx_fence; |
| multi_fence->sdma = sdma_fence; |
| |
| if (deferred_fence) { |
| multi_fence->gfx_unflushed.ctx = rctx; |
| multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes; |
| } |
| |
| if (flags & TC_FLUSH_ASYNC) { |
| util_queue_fence_signal(&multi_fence->ready); |
| tc_unflushed_batch_token_reference(&multi_fence->tc_token, NULL); |
| } |
| } |
| finish: |
| if (!(flags & PIPE_FLUSH_DEFERRED)) { |
| if (rctx->dma.cs) |
| ws->cs_sync_flush(rctx->dma.cs); |
| ws->cs_sync_flush(rctx->gfx.cs); |
| } |
| } |
| |
| void si_init_fence_functions(struct si_context *ctx) |
| { |
| ctx->b.b.flush = si_flush_from_st; |
| ctx->b.b.create_fence_fd = si_create_fence_fd; |
| ctx->b.b.fence_server_sync = si_fence_server_sync; |
| } |
| |
| void si_init_screen_fence_functions(struct si_screen *screen) |
| { |
| screen->b.b.fence_finish = si_fence_finish; |
| screen->b.b.fence_reference = si_fence_reference; |
| screen->b.b.fence_get_fd = si_fence_get_fd; |
| } |