blob: a8f61653f170eb107b2b1d66cdaa779cfb1e111d [file] [log] [blame]
/*
* XGL
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#include "genhw/genhw.h"
#include "img.h"
#include "buf.h"
#include "cmd_priv.h"
enum {
MEM_CACHE = 1 << 0,
DATA_READ_CACHE = 1 << 1,
DATA_WRITE_CACHE = 1 << 2,
RENDER_CACHE = 1 << 3,
SAMPLER_CACHE = 1 << 4,
};
static uint32_t img_get_layout_caches(const struct intel_img *img,
XGL_IMAGE_LAYOUT layout)
{
uint32_t caches;
switch (layout) {
case XGL_IMAGE_LAYOUT_GENERAL:
// General layout when image can be used for any kind of access
caches = MEM_CACHE | DATA_READ_CACHE | DATA_WRITE_CACHE | RENDER_CACHE | SAMPLER_CACHE;
break;
case XGL_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
// Optimal layout when image is only used for color attachment read/write
caches = DATA_WRITE_CACHE | RENDER_CACHE;
break;
case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
// Optimal layout when image is only used for depth/stencil attachment read/write
caches = DATA_WRITE_CACHE | RENDER_CACHE;
break;
case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
// Optimal layout when image is used for read only depth/stencil attachment and shader access
caches = RENDER_CACHE;
break;
case XGL_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
// Optimal layout when image is used for read only shader access
caches = DATA_READ_CACHE | SAMPLER_CACHE;
break;
case XGL_IMAGE_LAYOUT_CLEAR_OPTIMAL:
// Optimal layout when image is used only for clear operations
caches = RENDER_CACHE;
break;
case XGL_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL:
// Optimal layout when image is used only as source of transfer operations
caches = MEM_CACHE | DATA_READ_CACHE | RENDER_CACHE | SAMPLER_CACHE;
break;
case XGL_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL:
// Optimal layout when image is used only as destination of transfer operations
caches = MEM_CACHE | DATA_WRITE_CACHE | RENDER_CACHE;
break;
default:
caches = 0;
break;
}
return caches;
}
static uint32_t cmd_get_flush_flags(const struct intel_cmd *cmd,
uint32_t old_caches,
uint32_t new_caches,
bool is_ds)
{
uint32_t flags = 0;
/* not dirty */
if (!(old_caches & (MEM_CACHE | RENDER_CACHE | DATA_WRITE_CACHE)))
return 0;
if ((old_caches & RENDER_CACHE) && (new_caches & ~RENDER_CACHE)) {
if (is_ds)
flags |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
else
flags |= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH;
}
if ((old_caches & DATA_WRITE_CACHE) &&
(new_caches & ~(DATA_READ_CACHE | DATA_WRITE_CACHE))) {
if (cmd_gen(cmd) >= INTEL_GEN(7))
flags |= GEN7_PIPE_CONTROL_DC_FLUSH_ENABLE;
}
if (new_caches & SAMPLER_CACHE)
flags |= GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
if ((new_caches & DATA_READ_CACHE) && old_caches != DATA_WRITE_CACHE)
flags |= GEN6_PIPE_CONTROL_CONSTANT_CACHE_INVALIDATE;
if (!flags)
return 0;
flags |= GEN6_PIPE_CONTROL_CS_STALL;
return flags;
}
static void cmd_memory_barriers(struct intel_cmd *cmd,
uint32_t flush_flags,
XGL_UINT memory_barrier_count,
const XGL_VOID* memory_barriers)
{
uint32_t i;
XGL_MEMORY_BARRIER *memory_barrier;
XGL_BUFFER_MEMORY_BARRIER *buffer_memory_barrier;
XGL_IMAGE_MEMORY_BARRIER *image_memory_barrier;
XGL_FLAGS input_mask = 0;
XGL_FLAGS output_mask = 0;
for (i = 0; i < memory_barrier_count; i++) {
memory_barrier = &((XGL_MEMORY_BARRIER *) memory_barriers)[i];
switch(memory_barrier->sType)
{
case XGL_STRUCTURE_TYPE_MEMORY_BARRIER:
output_mask |= memory_barrier->outputMask;
input_mask |= memory_barrier->inputMask;
break;
case XGL_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER:
buffer_memory_barrier = (XGL_BUFFER_MEMORY_BARRIER *) memory_barrier;
output_mask |= buffer_memory_barrier->outputMask;
input_mask |= buffer_memory_barrier->inputMask;
break;
case XGL_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER:
image_memory_barrier = (XGL_IMAGE_MEMORY_BARRIER *) memory_barrier;
output_mask |= image_memory_barrier->outputMask;
input_mask |= image_memory_barrier->inputMask;
{
struct intel_img *img = intel_img(image_memory_barrier->image);
flush_flags |= cmd_get_flush_flags(cmd,
img_get_layout_caches(img, image_memory_barrier->oldLayout),
img_get_layout_caches(img, image_memory_barrier->newLayout),
(img->layout.format.numericFormat == XGL_NUM_FMT_DS));
}
break;
default:
break;
}
}
if (output_mask & XGL_MEMORY_OUTPUT_SHADER_WRITE_BIT) {
flush_flags |= GEN7_PIPE_CONTROL_DC_FLUSH_ENABLE;
}
if (output_mask & XGL_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT) {
flush_flags |= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH;
}
if (output_mask & XGL_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT) {
flush_flags |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
}
/* CPU write is cache coherent, so XGL_MEMORY_OUTPUT_CPU_WRITE_BIT needs no flush. */
/* Meta handles flushes, so XGL_MEMORY_OUTPUT_COPY_BIT needs no flush. */
if (input_mask & (XGL_MEMORY_INPUT_SHADER_READ_BIT | XGL_MEMORY_INPUT_UNIFORM_READ_BIT)) {
flush_flags |= GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
}
if (input_mask & XGL_MEMORY_INPUT_UNIFORM_READ_BIT) {
flush_flags |= GEN6_PIPE_CONTROL_CONSTANT_CACHE_INVALIDATE;
}
if (input_mask & XGL_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT) {
flush_flags |= GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE;
}
/* These bits have no corresponding cache invalidate operation.
* XGL_MEMORY_INPUT_CPU_READ_BIT
* XGL_MEMORY_INPUT_INDIRECT_COMMAND_BIT
* XGL_MEMORY_INPUT_INDEX_FETCH_BIT
* XGL_MEMORY_INPUT_COLOR_ATTACHMENT_BIT
* XGL_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT
* XGL_MEMORY_INPUT_COPY_BIT
*/
cmd_batch_flush(cmd, flush_flags);
}
ICD_EXPORT XGL_VOID XGLAPI xglCmdWaitEvents(
XGL_CMD_BUFFER cmdBuffer,
const XGL_EVENT_WAIT_INFO* pWaitInfo)
{
struct intel_cmd *cmd = intel_cmd(cmdBuffer);
/* This hardware will always wait at XGL_WAIT_EVENT_TOP_OF_PIPE.
* Passing a pWaitInfo->waitEvent of XGL_WAIT_EVENT_BEFORE_FRAGMENT_PROCESSING
* does not change that.
*/
/* Because the command buffer is serialized, reaching
* a pipelined wait is always after completion of prior events.
* pWaitInfo->pEvents need not be examined.
* xglCmdWaitEvents is equivalent to memory barrier part of xglCmdPipelineBarrier.
* cmd_memory_barriers will wait for GEN6_PIPE_CONTROL_CS_STALL and perform
* appropriate cache control.
*/
cmd_memory_barriers(cmd,
GEN6_PIPE_CONTROL_CS_STALL,
pWaitInfo->memBarrierCount, pWaitInfo->pMemBarriers);
}
ICD_EXPORT XGL_VOID XGLAPI xglCmdPipelineBarrier(
XGL_CMD_BUFFER cmdBuffer,
const XGL_PIPELINE_BARRIER* pBarrier)
{
struct intel_cmd *cmd = intel_cmd(cmdBuffer);
uint32_t pipe_control_flags = 0;
uint32_t i;
/* This hardware will always wait at XGL_WAIT_EVENT_TOP_OF_PIPE.
* Passing a pBarrier->waitEvent of XGL_WAIT_EVENT_BEFORE_FRAGMENT_PROCESSING
* does not change that.
*/
/* Cache control is done with PIPE_CONTROL flags.
* With no GEN6_PIPE_CONTROL_CS_STALL flag set, it behaves as XGL_SET_EVENT_TOP_OF_PIPE.
* All other pEvents values will behave as XGL_SET_EVENT_GPU_COMMANDS_COMPLETE.
*/
for (i = 0; i < pBarrier->eventCount; i++) {
switch(pBarrier->pEvents[i])
{
case XGL_SET_EVENT_TOP_OF_PIPE:
break;
case XGL_SET_EVENT_VERTEX_PROCESSING_COMPLETE:
case XGL_SET_EVENT_FRAGMENT_PROCESSING_COMPLETE:
case XGL_SET_EVENT_GRAPHICS_PIPELINE_COMPLETE:
case XGL_SET_EVENT_COMPUTE_PIPELINE_COMPLETE:
case XGL_SET_EVENT_TRANSFER_COMPLETE:
case XGL_SET_EVENT_GPU_COMMANDS_COMPLETE:
pipe_control_flags |= GEN6_PIPE_CONTROL_CS_STALL;
break;
default:
cmd->result = XGL_ERROR_UNKNOWN;
return;
break;
}
}
/* cmd_memory_barriers can wait for GEN6_PIPE_CONTROL_CS_STALL and perform
* appropriate cache control.
*/
cmd_memory_barriers(cmd,
pipe_control_flags,
pBarrier->memBarrierCount, pBarrier->pMemBarriers);
}