blob: 6ec50c0cc8cd5d0ff2e41749b3f22ef9c799cc4b [file] [log] [blame]
/*
*
* Copyright (C) 2015 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Author: Chia-I Wu <olvaffe@gmail.com>
* Author: Chia-I Wu <olv@lunarg.com>
* Author: Cody Northrop <cody@lunarg.com>
* Author: Courtney Goeltzenleuchter <courtney@LunarG.com>
*
*/
#include <math.h>
#include "genhw/genhw.h"
#include "buf.h"
#include "desc.h"
#include "img.h"
#include "mem.h"
#include "pipeline.h"
#include "sampler.h"
#include "shader.h"
#include "state.h"
#include "view.h"
#include "cmd_priv.h"
#include "fb.h"
static void gen6_3DPRIMITIVE(struct intel_cmd *cmd,
int prim_type, bool indexed,
uint32_t vertex_count,
uint32_t vertex_start,
uint32_t instance_count,
uint32_t instance_start,
uint32_t vertex_base)
{
const uint8_t cmd_len = 6;
uint32_t dw0, *dw;
CMD_ASSERT(cmd, 6, 6);
dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) |
prim_type << GEN6_3DPRIM_DW0_TYPE__SHIFT |
(cmd_len - 2);
if (indexed)
dw0 |= GEN6_3DPRIM_DW0_ACCESS_RANDOM;
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = vertex_count;
dw[2] = vertex_start;
dw[3] = instance_count;
dw[4] = instance_start;
dw[5] = vertex_base;
}
static void gen7_3DPRIMITIVE(struct intel_cmd *cmd,
int prim_type, bool indexed,
uint32_t vertex_count,
uint32_t vertex_start,
uint32_t instance_count,
uint32_t instance_start,
uint32_t vertex_base)
{
const uint8_t cmd_len = 7;
uint32_t dw0, dw1, *dw;
CMD_ASSERT(cmd, 7, 7.5);
dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2);
dw1 = prim_type << GEN7_3DPRIM_DW1_TYPE__SHIFT;
if (indexed)
dw1 |= GEN7_3DPRIM_DW1_ACCESS_RANDOM;
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = dw1;
dw[2] = vertex_count;
dw[3] = vertex_start;
dw[4] = instance_count;
dw[5] = instance_start;
dw[6] = vertex_base;
}
static void gen6_PIPE_CONTROL(struct intel_cmd *cmd, uint32_t dw1,
struct intel_bo *bo, uint32_t bo_offset,
uint64_t imm)
{
const uint8_t cmd_len = 5;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, PIPE_CONTROL) |
(cmd_len - 2);
uint32_t reloc_flags = INTEL_RELOC_WRITE;
uint32_t *dw;
uint32_t pos;
CMD_ASSERT(cmd, 6, 7.5);
assert(bo_offset % 8 == 0);
if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 73:
*
* "1 of the following must also be set (when CS stall is set):
*
* * Depth Cache Flush Enable ([0] of DW1)
* * Stall at Pixel Scoreboard ([1] of DW1)
* * Depth Stall ([13] of DW1)
* * Post-Sync Operation ([13] of DW1)
* * Render Target Cache Flush Enable ([12] of DW1)
* * Notify Enable ([8] of DW1)"
*
* From the Ivy Bridge PRM, volume 2 part 1, page 61:
*
* "One of the following must also be set (when CS stall is set):
*
* * Render Target Cache Flush Enable ([12] of DW1)
* * Depth Cache Flush Enable ([0] of DW1)
* * Stall at Pixel Scoreboard ([1] of DW1)
* * Depth Stall ([13] of DW1)
* * Post-Sync Operation ([13] of DW1)"
*/
uint32_t bit_test = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
GEN6_PIPE_CONTROL_DEPTH_STALL;
/* post-sync op */
bit_test |= GEN6_PIPE_CONTROL_WRITE_IMM |
GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT |
GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
if (cmd_gen(cmd) == INTEL_GEN(6))
bit_test |= GEN6_PIPE_CONTROL_NOTIFY_ENABLE;
assert(dw1 & bit_test);
}
if (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 73:
*
* "Following bits must be clear (when Depth Stall is set):
*
* * Render Target Cache Flush Enable ([12] of DW1)
* * Depth Cache Flush Enable ([0] of DW1)"
*/
assert(!(dw1 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
}
/*
* From the Sandy Bridge PRM, volume 1 part 3, page 19:
*
* "[DevSNB] PPGTT memory writes by MI_* (such as MI_STORE_DATA_IMM)
* and PIPE_CONTROL are not supported."
*
* The kernel will add the mapping automatically (when write domain is
* INTEL_DOMAIN_INSTRUCTION).
*/
if (cmd_gen(cmd) == INTEL_GEN(6) && bo) {
bo_offset |= GEN6_PIPE_CONTROL_DW2_USE_GGTT;
reloc_flags |= INTEL_RELOC_GGTT;
}
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = dw1;
dw[2] = 0;
dw[3] = (uint32_t) imm;
dw[4] = (uint32_t) (imm >> 32);
if (bo) {
cmd_reserve_reloc(cmd, 1);
cmd_batch_reloc(cmd, pos + 2, bo, bo_offset, reloc_flags);
}
}
static bool gen6_can_primitive_restart(const struct intel_cmd *cmd)
{
const struct intel_pipeline *p = cmd->bind.pipeline.graphics;
bool supported;
CMD_ASSERT(cmd, 6, 7.5);
if (cmd_gen(cmd) >= INTEL_GEN(7.5))
return (p->prim_type != GEN6_3DPRIM_RECTLIST);
switch (p->prim_type) {
case GEN6_3DPRIM_POINTLIST:
case GEN6_3DPRIM_LINELIST:
case GEN6_3DPRIM_LINESTRIP:
case GEN6_3DPRIM_TRILIST:
case GEN6_3DPRIM_TRISTRIP:
supported = true;
break;
default:
supported = false;
break;
}
if (!supported)
return false;
switch (cmd->bind.index.type) {
case VK_INDEX_TYPE_UINT16:
supported = (p->primitive_restart_index != 0xffffu);
break;
case VK_INDEX_TYPE_UINT32:
supported = (p->primitive_restart_index != 0xffffffffu);
break;
default:
supported = false;
break;
}
return supported;
}
static void gen6_3DSTATE_INDEX_BUFFER(struct intel_cmd *cmd,
const struct intel_buf *buf,
VkDeviceSize offset,
VkIndexType type,
bool enable_cut_index)
{
const uint8_t cmd_len = 3;
uint32_t dw0, end_offset, *dw;
unsigned offset_align;
uint32_t pos;
CMD_ASSERT(cmd, 6, 7.5);
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2);
/* the bit is moved to 3DSTATE_VF */
if (cmd_gen(cmd) >= INTEL_GEN(7.5))
assert(!enable_cut_index);
if (enable_cut_index)
dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
switch (type) {
case VK_INDEX_TYPE_UINT16:
dw0 |= GEN6_IB_DW0_FORMAT_WORD;
offset_align = 2;
break;
case VK_INDEX_TYPE_UINT32:
dw0 |= GEN6_IB_DW0_FORMAT_DWORD;
offset_align = 4;
break;
default:
assert(!"unsupported index type");
break;
}
/* aligned and inclusive */
end_offset = buf->size - (buf->size % offset_align) - 1;
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
cmd_reserve_reloc(cmd, 2);
cmd_batch_reloc(cmd, pos + 1, buf->obj.mem->bo, offset, 0);
cmd_batch_reloc(cmd, pos + 2, buf->obj.mem->bo, end_offset, 0);
}
static void gen75_3DSTATE_VF(struct intel_cmd *cmd,
bool enable_cut_index,
uint32_t cut_index)
{
const uint8_t cmd_len = 2;
uint32_t dw0, *dw;
CMD_ASSERT(cmd, 7.5, 7.5);
dw0 = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2);
if (enable_cut_index)
dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = cut_index;
}
static void gen6_add_scratch_space(struct intel_cmd *cmd,
uint32_t batch_pos,
const struct intel_pipeline *pipeline,
const struct intel_pipeline_shader *sh)
{
int scratch_space;
CMD_ASSERT(cmd, 6, 7.5);
assert(sh->per_thread_scratch_size &&
sh->per_thread_scratch_size % 1024 == 0 &&
u_is_pow2(sh->per_thread_scratch_size) &&
sh->scratch_offset % 1024 == 0);
scratch_space = u_ffs(sh->per_thread_scratch_size) - 11;
cmd_reserve_reloc(cmd, 1);
cmd_batch_reloc(cmd, batch_pos, pipeline->obj.mem->bo,
sh->scratch_offset | scratch_space, INTEL_RELOC_WRITE);
}
static void gen6_3DSTATE_GS(struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *gs = &pipeline->gs;
const uint8_t cmd_len = 7;
uint32_t dw0, dw2, dw4, dw5, dw6, *dw;
CMD_ASSERT(cmd, 6, 6);
int vue_read_len = 0;
int pos = 0;
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) {
// based on ilo_gpe_init_gs_cso_gen6
vue_read_len = (gs->in_count + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
dw2 = (gs->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
gs->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT |
GEN6_THREADDISP_SPF;
dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
gs->urb_grf_start << GEN6_GS_DW4_URB_GRF_START__SHIFT;
dw5 = (gs->max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
GEN6_GS_DW5_STATISTICS |
GEN6_GS_DW5_RENDER_ENABLE;
dw6 = GEN6_GS_DW6_GS_ENABLE;
if (gs->discard_adj)
dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
} else {
dw2 = 0;
dw4 = 0;
dw5 = GEN6_GS_DW5_STATISTICS;
dw6 = 0;
}
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = cmd->bind.pipeline.gs_offset;
dw[2] = dw2;
dw[3] = 0;
dw[4] = dw4;
dw[5] = dw5;
dw[6] = dw6;
if (gs->per_thread_scratch_size)
gen6_add_scratch_space(cmd, pos + 3, pipeline, gs);
}
static void gen7_3DSTATE_GS(struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *gs = &pipeline->gs;
const uint8_t cmd_len = 7;
uint32_t dw0, dw2, dw4, dw5, dw6, *dw;
CMD_ASSERT(cmd, 7, 7.5);
int vue_read_len = 0;
int pos = 0;
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) {
// based on upload_gs_state
dw2 = (gs->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
gs->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
vue_read_len = (gs->in_count + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
dw4 = (gs->output_size_hwords * 2 - 1) << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
gs->output_topology << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
gs->urb_grf_start << GEN7_GS_DW4_URB_GRF_START__SHIFT;
dw5 = gs->control_data_header_size_hwords << GEN7_GS_DW5_CONTROL_DATA_HEADER_SIZE__SHIFT |
(gs->invocations - 1) << GEN7_GS_DW5_INSTANCE_CONTROL__SHIFT |
GEN7_GS_DW5_STATISTICS |
GEN7_GS_DW5_GS_ENABLE;
dw5 |= (gs->dual_instanced_dispatch) ? GEN7_GS_DW5_DISPATCH_MODE_DUAL_INSTANCE
: GEN7_GS_DW5_DISPATCH_MODE_DUAL_OBJECT;
if (gs->include_primitive_id)
dw5 |= GEN7_GS_DW5_INCLUDE_PRIMITIVE_ID;
if (cmd_gen(cmd) >= INTEL_GEN(7.5)) {
dw5 |= (gs->max_threads - 1) << GEN75_GS_DW5_MAX_THREADS__SHIFT;
dw5 |= GEN75_GS_DW5_REORDER_TRAILING;
dw6 = gs->control_data_format << GEN75_GS_DW6_GSCTRL__SHIFT;
} else {
dw5 |= (gs->max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT;
dw5 |= gs->control_data_format << GEN7_GS_DW5_GSCTRL__SHIFT;
dw6 = 0;
}
} else {
dw2 = 0;
dw4 = 0;
dw5 = GEN7_GS_DW5_STATISTICS;
dw6 = 0;
}
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = cmd->bind.pipeline.gs_offset;
dw[2] = dw2;
dw[3] = 0;
dw[4] = dw4;
dw[5] = dw5;
dw[6] = dw6;
if (gs->per_thread_scratch_size)
gen6_add_scratch_space(cmd, pos + 3, pipeline, gs);
}
static void gen6_3DSTATE_DRAWING_RECTANGLE(struct intel_cmd *cmd,
uint32_t width, uint32_t height)
{
const uint8_t cmd_len = 4;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) |
(cmd_len - 2);
uint32_t *dw;
CMD_ASSERT(cmd, 6, 7.5);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
if (width && height) {
dw[1] = 0;
dw[2] = (height - 1) << 16 |
(width - 1);
} else {
dw[1] = 1;
dw[2] = 0;
}
dw[3] = 0;
}
static void gen7_fill_3DSTATE_SF_body(const struct intel_cmd *cmd,
uint32_t body[6])
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_render_pass *rp = cmd->bind.render_pass;
const struct intel_render_pass_subpass *subpass =
cmd->bind.render_pass_subpass;
const struct intel_dynamic_line_width *line_width = &cmd->bind.state.line_width;
const struct intel_dynamic_depth_bias *depth_bias = &cmd->bind.state.depth_bias;
uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
CMD_ASSERT(cmd, 6, 7.5);
dw1 = GEN7_SF_DW1_STATISTICS |
GEN7_SF_DW1_DEPTH_OFFSET_SOLID |
GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME |
GEN7_SF_DW1_DEPTH_OFFSET_POINT |
GEN7_SF_DW1_VIEWPORT_ENABLE |
pipeline->cmd_sf_fill;
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
int format = GEN6_ZFORMAT_D32_FLOAT;
if (subpass->ds_index < rp->attachment_count) {
switch (rp->attachments[subpass->ds_index].format) {
case VK_FORMAT_D16_UNORM:
format = GEN6_ZFORMAT_D16_UNORM;
break;
case VK_FORMAT_D32_SFLOAT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
format = GEN6_ZFORMAT_D32_FLOAT;
break;
default:
assert(!"unsupported depth/stencil format");
break;
}
}
dw1 |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
}
dw2 = pipeline->cmd_sf_cull;
/* Scissor is always enabled */
dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
// TODO: line width support
(void) line_width;
if (pipeline->sample_count > 1) {
dw2 |= 128 << GEN7_SF_DW2_LINE_WIDTH__SHIFT |
GEN7_SF_DW2_MSRASTMODE_ON_PATTERN;
} else {
dw2 |= 0 << GEN7_SF_DW2_LINE_WIDTH__SHIFT |
GEN7_SF_DW2_MSRASTMODE_OFF_PIXEL;
}
dw3 = 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT |
GEN7_SF_DW3_SUBPIXEL_8BITS;
if (pipeline->depthBiasEnable) {
dw4 = u_fui((float) depth_bias->depth_bias * 2.0f);
dw5 = u_fui(depth_bias->slope_scaled_depth_bias);
dw6 = u_fui(depth_bias->depth_bias_clamp);
} else {
dw4 = 0;
dw5 = 0;
dw6 = 0;
}
body[0] = dw1;
body[1] = dw2;
body[2] = dw3;
body[3] = dw4;
body[4] = dw5;
body[5] = dw6;
}
static void gen6_3DSTATE_SF(struct intel_cmd *cmd)
{
const uint8_t cmd_len = 20;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) |
(cmd_len - 2);
const uint32_t *sbe = cmd->bind.pipeline.graphics->cmd_3dstate_sbe;
uint32_t sf[6];
uint32_t *dw;
CMD_ASSERT(cmd, 6, 6);
gen7_fill_3DSTATE_SF_body(cmd, sf);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = sbe[1];
memcpy(&dw[2], sf, sizeof(sf));
memcpy(&dw[8], &sbe[2], 12);
}
static void gen7_3DSTATE_SF(struct intel_cmd *cmd)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
CMD_ASSERT(cmd, 7, 7.5);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) |
(cmd_len - 2);
gen7_fill_3DSTATE_SF_body(cmd, &dw[1]);
}
static void gen6_3DSTATE_CLIP(struct intel_cmd *cmd)
{
const uint8_t cmd_len = 4;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) |
(cmd_len - 2);
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *vs = &pipeline->vs;
const struct intel_pipeline_shader *fs = &pipeline->fs;
const struct intel_dynamic_viewport *viewport = &cmd->bind.state.viewport;
uint32_t dw1, dw2, dw3, *dw;
CMD_ASSERT(cmd, 6, 7.5);
dw1 = GEN6_CLIP_DW1_STATISTICS;
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
dw1 |= GEN7_CLIP_DW1_SUBPIXEL_8BITS |
GEN7_CLIP_DW1_EARLY_CULL_ENABLE |
pipeline->cmd_clip_cull;
}
dw2 = GEN6_CLIP_DW2_CLIP_ENABLE |
GEN6_CLIP_DW2_APIMODE_D3D | /* depth range [0, 1] */
GEN6_CLIP_DW2_XY_TEST_ENABLE |
(vs->enable_user_clip ? 1 : 0) << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
if (pipeline->rasterizerDiscardEnable)
dw2 |= GEN6_CLIP_DW2_CLIPMODE_REJECT_ALL;
else
dw2 |= GEN6_CLIP_DW2_CLIPMODE_NORMAL;
if (pipeline->depthClipEnable)
dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
if (fs->barycentric_interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
GEN6_INTERP_NONPERSPECTIVE_CENTROID |
GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT |
(viewport->viewport_count - 1);
/* TODO: framebuffer requests layer_count > 1 */
if (cmd->bind.fb->array_size == 1) {
dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO;
}
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = dw1;
dw[2] = dw2;
dw[3] = dw3;
}
static void gen6_3DSTATE_WM(struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *fs = &pipeline->fs;
const uint8_t cmd_len = 9;
uint32_t pos;
uint32_t dw0, dw2, dw4, dw5, dw6, dw8, *dw;
CMD_ASSERT(cmd, 6, 6);
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw2 = (fs->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
fs->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
dw4 = GEN6_WM_DW4_STATISTICS |
fs->urb_grf_start << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
fs->urb_grf_start_16 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
dw5 = (fs->max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
GEN6_WM_DW5_PS_DISPATCH_ENABLE |
GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
if (fs->offset_16)
dw5 |= GEN6_PS_DISPATCH_16 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
if (fs->uses & INTEL_SHADER_USE_KILL ||
pipeline->alphaToCoverageEnable)
dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
if (fs->computed_depth_mode)
dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
if (fs->uses & INTEL_SHADER_USE_DEPTH)
dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
if (fs->uses & INTEL_SHADER_USE_W)
dw5 |= GEN6_WM_DW5_PS_USE_W;
if (pipeline->dual_source_blend_enable)
dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
dw6 = fs->in_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
GEN6_WM_DW6_PS_POSOFFSET_NONE |
GEN6_WM_DW6_ZW_INTERP_PIXEL |
fs->barycentric_interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT |
GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
if (pipeline->sample_count > 1) {
dw6 |= GEN6_WM_DW6_MSRASTMODE_ON_PATTERN |
GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
} else {
dw6 |= GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL |
GEN6_WM_DW6_MSDISPMODE_PERSAMPLE;
}
dw8 = (fs->offset_16) ? cmd->bind.pipeline.fs_offset + fs->offset_16 : 0;
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = cmd->bind.pipeline.fs_offset;
dw[2] = dw2;
dw[3] = 0; /* scratch */
dw[4] = dw4;
dw[5] = dw5;
dw[6] = dw6;
dw[7] = 0; /* kernel 1 */
dw[8] = dw8; /* kernel 2 */
if (fs->per_thread_scratch_size)
gen6_add_scratch_space(cmd, pos + 3, pipeline, fs);
}
static void gen7_3DSTATE_WM(struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *fs = &pipeline->fs;
const uint8_t cmd_len = 3;
uint32_t dw0, dw1, dw2, *dw;
CMD_ASSERT(cmd, 7, 7.5);
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw1 = GEN7_WM_DW1_STATISTICS |
GEN7_WM_DW1_PS_DISPATCH_ENABLE |
GEN7_WM_DW1_ZW_INTERP_PIXEL |
fs->barycentric_interps << GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT |
GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
if (fs->uses & INTEL_SHADER_USE_KILL ||
pipeline->alphaToCoverageEnable)
dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL;
dw1 |= fs->computed_depth_mode << GEN7_WM_DW1_PSCDEPTH__SHIFT;
if (fs->uses & INTEL_SHADER_USE_DEPTH)
dw1 |= GEN7_WM_DW1_PS_USE_DEPTH;
if (fs->uses & INTEL_SHADER_USE_W)
dw1 |= GEN7_WM_DW1_PS_USE_W;
dw2 = 0;
if (pipeline->sample_count > 1) {
dw1 |= GEN7_WM_DW1_MSRASTMODE_ON_PATTERN;
dw2 |= GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
} else {
dw1 |= GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL;
dw2 |= GEN7_WM_DW2_MSDISPMODE_PERSAMPLE;
}
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = dw1;
dw[2] = dw2;
}
static void gen7_3DSTATE_PS(struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *fs = &pipeline->fs;
const uint8_t cmd_len = 8;
uint32_t dw0, dw2, dw4, dw5, dw7, *dw;
uint32_t pos;
CMD_ASSERT(cmd, 7, 7.5);
dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw2 = (fs->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
fs->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
dw4 = GEN7_PS_DW4_POSOFFSET_NONE |
GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
if (fs->offset_16)
dw4 |= GEN6_PS_DISPATCH_16 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
if (cmd_gen(cmd) >= INTEL_GEN(7.5)) {
dw4 |= (fs->max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
dw4 |= pipeline->cmd_sample_mask << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
} else {
dw4 |= (fs->max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
}
if (fs->in_count)
dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
if (pipeline->dual_source_blend_enable)
dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
dw5 = fs->urb_grf_start << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
fs->urb_grf_start_16 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
dw7 = (fs->offset_16) ? cmd->bind.pipeline.fs_offset + fs->offset_16 : 0;
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = cmd->bind.pipeline.fs_offset;
dw[2] = dw2;
dw[3] = 0; /* scratch */
dw[4] = dw4;
dw[5] = dw5;
dw[6] = 0; /* kernel 1 */
dw[7] = dw7; /* kernel 2 */
if (fs->per_thread_scratch_size)
gen6_add_scratch_space(cmd, pos + 3, pipeline, fs);
}
static void gen6_3DSTATE_MULTISAMPLE(struct intel_cmd *cmd,
uint32_t sample_count)
{
const uint8_t cmd_len = (cmd_gen(cmd) >= INTEL_GEN(7)) ? 4 : 3;
uint32_t dw1, dw2, dw3, *dw;
CMD_ASSERT(cmd, 6, 7.5);
switch (sample_count) {
case 4:
dw1 = GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
dw2 = cmd->dev->sample_pattern_4x;
dw3 = 0;
break;
case 8:
assert(cmd_gen(cmd) >= INTEL_GEN(7));
dw1 = GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
dw2 = cmd->dev->sample_pattern_8x[0];
dw3 = cmd->dev->sample_pattern_8x[1];
break;
default:
assert(sample_count <= 1);
dw1 = GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
dw2 = 0;
dw3 = 0;
break;
}
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2);
dw[1] = dw1;
dw[2] = dw2;
if (cmd_gen(cmd) >= INTEL_GEN(7))
dw[3] = dw3;
}
static void gen6_3DSTATE_DEPTH_BUFFER(struct intel_cmd *cmd,
const struct intel_att_view *view,
bool optimal_ds)
{
const uint8_t cmd_len = 7;
uint32_t dw0, *dw;
uint32_t pos;
CMD_ASSERT(cmd, 6, 7.5);
dw0 = (cmd_gen(cmd) >= INTEL_GEN(7)) ?
GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) :
GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER);
dw0 |= (cmd_len - 2);
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = view->att_cmd[0];
/* note that we only enable HiZ on Gen7+ */
if (!optimal_ds)
dw[1] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE;
dw[2] = 0;
dw[3] = view->att_cmd[2];
dw[4] = view->att_cmd[3];
dw[5] = view->att_cmd[4];
dw[6] = view->att_cmd[5];
if (view->img) {
cmd_reserve_reloc(cmd, 1);
cmd_batch_reloc(cmd, pos + 2, view->img->obj.mem->bo,
view->att_cmd[1], INTEL_RELOC_WRITE);
}
}
static void gen6_3DSTATE_STENCIL_BUFFER(struct intel_cmd *cmd,
const struct intel_att_view *view,
bool optimal_ds)
{
const uint8_t cmd_len = 3;
uint32_t dw0, *dw;
uint32_t pos;
CMD_ASSERT(cmd, 6, 7.5);
dw0 = (cmd_gen(cmd) >= INTEL_GEN(7)) ?
GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) :
GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER);
dw0 |= (cmd_len - 2);
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
if (view->has_stencil) {
dw[1] = view->att_cmd[6];
cmd_reserve_reloc(cmd, 1);
cmd_batch_reloc(cmd, pos + 2, view->img->obj.mem->bo,
view->att_cmd[7], INTEL_RELOC_WRITE);
} else {
dw[1] = 0;
dw[2] = 0;
}
}
static void gen6_3DSTATE_HIER_DEPTH_BUFFER(struct intel_cmd *cmd,
const struct intel_att_view *view,
bool optimal_ds)
{
const uint8_t cmd_len = 3;
uint32_t dw0, *dw;
uint32_t pos;
CMD_ASSERT(cmd, 6, 7.5);
dw0 = (cmd_gen(cmd) >= INTEL_GEN(7)) ?
GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) :
GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER);
dw0 |= (cmd_len - 2);
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
if (view->has_hiz && optimal_ds) {
dw[1] = view->att_cmd[8];
cmd_reserve_reloc(cmd, 1);
cmd_batch_reloc(cmd, pos + 2, view->img->obj.mem->bo,
view->att_cmd[9], INTEL_RELOC_WRITE);
} else {
dw[1] = 0;
dw[2] = 0;
}
}
static void gen6_3DSTATE_CLEAR_PARAMS(struct intel_cmd *cmd,
uint32_t clear_val)
{
const uint8_t cmd_len = 2;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) |
GEN6_CLEAR_PARAMS_DW0_VALID |
(cmd_len - 2);
uint32_t *dw;
CMD_ASSERT(cmd, 6, 6);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = clear_val;
}
static void gen7_3DSTATE_CLEAR_PARAMS(struct intel_cmd *cmd,
uint32_t clear_val)
{
const uint8_t cmd_len = 3;
const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) |
(cmd_len - 2);
uint32_t *dw;
CMD_ASSERT(cmd, 7, 7.5);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = clear_val;
dw[2] = 1;
}
static void gen6_3DSTATE_CC_STATE_POINTERS(struct intel_cmd *cmd,
uint32_t blend_offset,
uint32_t ds_offset,
uint32_t cc_offset)
{
const uint8_t cmd_len = 4;
uint32_t dw0, *dw;
CMD_ASSERT(cmd, 6, 6);
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) |
(cmd_len - 2);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = blend_offset | 1;
dw[2] = ds_offset | 1;
dw[3] = cc_offset | 1;
}
static void gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct intel_cmd *cmd,
uint32_t clip_offset,
uint32_t sf_offset,
uint32_t cc_offset)
{
const uint8_t cmd_len = 4;
uint32_t dw0, *dw;
CMD_ASSERT(cmd, 6, 6);
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) |
GEN6_VP_PTR_DW0_CLIP_CHANGED |
GEN6_VP_PTR_DW0_SF_CHANGED |
GEN6_VP_PTR_DW0_CC_CHANGED |
(cmd_len - 2);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = clip_offset;
dw[2] = sf_offset;
dw[3] = cc_offset;
}
static void gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct intel_cmd *cmd,
uint32_t scissor_offset)
{
const uint8_t cmd_len = 2;
uint32_t dw0, *dw;
CMD_ASSERT(cmd, 6, 6);
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) |
(cmd_len - 2);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = scissor_offset;
}
static void gen6_3DSTATE_BINDING_TABLE_POINTERS(struct intel_cmd *cmd,
uint32_t vs_offset,
uint32_t gs_offset,
uint32_t ps_offset)
{
const uint8_t cmd_len = 4;
uint32_t dw0, *dw;
CMD_ASSERT(cmd, 6, 6);
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) |
GEN6_BINDING_TABLE_PTR_DW0_VS_CHANGED |
GEN6_BINDING_TABLE_PTR_DW0_GS_CHANGED |
GEN6_BINDING_TABLE_PTR_DW0_PS_CHANGED |
(cmd_len - 2);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = vs_offset;
dw[2] = gs_offset;
dw[3] = ps_offset;
}
static void gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct intel_cmd *cmd,
uint32_t vs_offset,
uint32_t gs_offset,
uint32_t ps_offset)
{
const uint8_t cmd_len = 4;
uint32_t dw0, *dw;
CMD_ASSERT(cmd, 6, 6);
dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) |
GEN6_SAMPLER_PTR_DW0_VS_CHANGED |
GEN6_SAMPLER_PTR_DW0_GS_CHANGED |
GEN6_SAMPLER_PTR_DW0_PS_CHANGED |
(cmd_len - 2);
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = vs_offset;
dw[2] = gs_offset;
dw[3] = ps_offset;
}
static void gen7_3dstate_pointer(struct intel_cmd *cmd,
int subop, uint32_t offset)
{
const uint8_t cmd_len = 2;
const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop | (cmd_len - 2);
uint32_t *dw;
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = offset;
}
static uint32_t gen6_BLEND_STATE(struct intel_cmd *cmd)
{
const uint8_t cmd_align = GEN6_ALIGNMENT_BLEND_STATE;
const uint8_t cmd_len = INTEL_MAX_RENDER_TARGETS * 2;
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
CMD_ASSERT(cmd, 6, 7.5);
STATIC_ASSERT(ARRAY_SIZE(pipeline->cmd_cb) >= INTEL_MAX_RENDER_TARGETS);
return cmd_state_write(cmd, INTEL_CMD_ITEM_BLEND, cmd_align, cmd_len, pipeline->cmd_cb);
}
static uint32_t gen6_DEPTH_STENCIL_STATE(struct intel_cmd *cmd,
const struct intel_dynamic_stencil *stencil_state)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const uint8_t cmd_align = GEN6_ALIGNMENT_DEPTH_STENCIL_STATE;
const uint8_t cmd_len = 3;
uint32_t dw[3];
dw[0] = pipeline->cmd_depth_stencil;
/* TODO: enable back facing stencil state */
/* same read and write masks for both front and back faces */
dw[1] = (stencil_state->front.stencil_compare_mask & 0xff) << 24 |
(stencil_state->front.stencil_write_mask & 0xff) << 16 |
(stencil_state->front.stencil_compare_mask & 0xff) << 8 |
(stencil_state->front.stencil_write_mask & 0xff);
dw[2] = pipeline->cmd_depth_test;
CMD_ASSERT(cmd, 6, 7.5);
if (stencil_state->front.stencil_write_mask && pipeline->stencilTestEnable)
dw[0] |= 1 << 18;
return cmd_state_write(cmd, INTEL_CMD_ITEM_DEPTH_STENCIL,
cmd_align, cmd_len, dw);
}
static uint32_t gen6_COLOR_CALC_STATE(struct intel_cmd *cmd,
uint32_t stencil_ref,
const uint32_t blend_color[4])
{
const uint8_t cmd_align = GEN6_ALIGNMENT_COLOR_CALC_STATE;
const uint8_t cmd_len = 6;
uint32_t offset, *dw;
CMD_ASSERT(cmd, 6, 7.5);
offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_COLOR_CALC,
cmd_align, cmd_len, &dw);
dw[0] = stencil_ref;
dw[1] = 0;
dw[2] = blend_color[0];
dw[3] = blend_color[1];
dw[4] = blend_color[2];
dw[5] = blend_color[3];
return offset;
}
static void cmd_wa_gen6_pre_depth_stall_write(struct intel_cmd *cmd)
{
CMD_ASSERT(cmd, 6, 7.5);
if (!cmd->bind.draw_count)
return;
if (cmd->bind.wa_flags & INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE)
return;
cmd->bind.wa_flags |= INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 60:
*
* "Pipe-control with CS-stall bit set must be sent BEFORE the
* pipe-control with a post-sync op and no write-cache flushes."
*
* The workaround below necessitates this workaround.
*/
gen6_PIPE_CONTROL(cmd,
GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
NULL, 0, 0);
gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_WRITE_IMM,
cmd->scratch_bo, 0, 0);
}
static void cmd_wa_gen6_pre_command_scoreboard_stall(struct intel_cmd *cmd)
{
CMD_ASSERT(cmd, 6, 7.5);
if (!cmd->bind.draw_count)
return;
gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
NULL, 0, 0);
}
static void cmd_wa_gen7_pre_vs_depth_stall_write(struct intel_cmd *cmd)
{
CMD_ASSERT(cmd, 7, 7.5);
if (!cmd->bind.draw_count)
return;
cmd_wa_gen6_pre_depth_stall_write(cmd);
gen6_PIPE_CONTROL(cmd,
GEN6_PIPE_CONTROL_DEPTH_STALL | GEN6_PIPE_CONTROL_WRITE_IMM,
cmd->scratch_bo, 0, 0);
}
static void cmd_wa_gen7_post_command_cs_stall(struct intel_cmd *cmd)
{
CMD_ASSERT(cmd, 7, 7.5);
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 61:
*
* "One of the following must also be set (when CS stall is set):
*
* * Render Target Cache Flush Enable ([12] of DW1)
* * Depth Cache Flush Enable ([0] of DW1)
* * Stall at Pixel Scoreboard ([1] of DW1)
* * Depth Stall ([13] of DW1)
* * Post-Sync Operation ([13] of DW1)"
*/
gen6_PIPE_CONTROL(cmd,
GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
NULL, 0, 0);
}
static void cmd_wa_gen7_post_command_depth_stall(struct intel_cmd *cmd)
{
CMD_ASSERT(cmd, 7, 7.5);
cmd_wa_gen6_pre_depth_stall_write(cmd);
gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_DEPTH_STALL, NULL, 0, 0);
}
static void cmd_wa_gen6_pre_multisample_depth_flush(struct intel_cmd *cmd)
{
CMD_ASSERT(cmd, 6, 7.5);
if (!cmd->bind.draw_count)
return;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 305:
*
* "Driver must guarentee that all the caches in the depth pipe are
* flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
* requires driver to send a PIPE_CONTROL with a CS stall along with
* a Depth Flush prior to this command."
*
* From the Ivy Bridge PRM, volume 2 part 1, page 304:
*
* "Driver must ierarchi that all the caches in the depth pipe are
* flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
* requires driver to send a PIPE_CONTROL with a CS stall along with
* a Depth Flush prior to this command.
*/
gen6_PIPE_CONTROL(cmd,
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_CS_STALL,
NULL, 0, 0);
}
static void cmd_wa_gen6_pre_ds_flush(struct intel_cmd *cmd)
{
CMD_ASSERT(cmd, 6, 7.5);
if (!cmd->bind.draw_count)
return;
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
* "Driver must send a least one PIPE_CONTROL command with CS Stall
* and a post sync operation prior to the group of depth
* commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
* 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
*
* This workaround satifies all the conditions.
*/
cmd_wa_gen6_pre_depth_stall_write(cmd);
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
* "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
* any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
* 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
* issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
* set), followed by a pipelined depth cache flush (PIPE_CONTROL with
* Depth Flush Bit set, followed by another pipelined depth stall
* (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
* guarantee that the pipeline from WM onwards is already flushed
* (e.g., via a preceding MI_FLUSH)."
*/
gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_DEPTH_STALL, NULL, 0, 0);
gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH, NULL, 0, 0);
gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_DEPTH_STALL, NULL, 0, 0);
}
void cmd_batch_state_base_address(struct intel_cmd *cmd)
{
const uint8_t cmd_len = 10;
const uint32_t dw0 = GEN6_RENDER_CMD(COMMON, STATE_BASE_ADDRESS) |
(cmd_len - 2);
const uint32_t mocs = (cmd_gen(cmd) >= INTEL_GEN(7)) ?
(GEN7_MOCS_L3_WB << 8 | GEN7_MOCS_L3_WB << 4) : 0;
uint32_t pos;
uint32_t *dw;
CMD_ASSERT(cmd, 6, 7.5);
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
/* start offsets */
dw[1] = mocs | 1;
dw[2] = 1;
dw[3] = 1;
dw[4] = 1;
dw[5] = 1;
/* end offsets */
dw[6] = 1;
dw[7] = 1 + 0xfffff000;
dw[8] = 1 + 0xfffff000;
dw[9] = 1;
cmd_reserve_reloc(cmd, 3);
cmd_batch_reloc_writer(cmd, pos + 2, INTEL_CMD_WRITER_SURFACE,
cmd->writers[INTEL_CMD_WRITER_SURFACE].sba_offset + 1);
cmd_batch_reloc_writer(cmd, pos + 3, INTEL_CMD_WRITER_STATE,
cmd->writers[INTEL_CMD_WRITER_STATE].sba_offset + 1);
cmd_batch_reloc_writer(cmd, pos + 5, INTEL_CMD_WRITER_INSTRUCTION,
cmd->writers[INTEL_CMD_WRITER_INSTRUCTION].sba_offset + 1);
}
void cmd_batch_push_const_alloc(struct intel_cmd *cmd)
{
const uint32_t size = (cmd->dev->gpu->gt == 3) ? 16 : 8;
const uint8_t cmd_len = 2;
uint32_t offset = 0;
uint32_t *dw;
if (cmd_gen(cmd) <= INTEL_GEN(6))
return;
CMD_ASSERT(cmd, 7, 7.5);
/* 3DSTATE_PUSH_CONSTANT_ALLOC_x */
cmd_batch_pointer(cmd, cmd_len * 5, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) | (cmd_len - 2);
dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
size << GEN7_PCB_ALLOC_DW1_SIZE__SHIFT;
offset += size;
dw += 2;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) | (cmd_len - 2);
dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
size << GEN7_PCB_ALLOC_DW1_SIZE__SHIFT;
dw += 2;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) | (cmd_len - 2);
dw[1] = 0 << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
0 << GEN7_PCB_ALLOC_DW1_SIZE__SHIFT;
dw += 2;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_DS) | (cmd_len - 2);
dw[1] = 0 << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
0 << GEN7_PCB_ALLOC_DW1_SIZE__SHIFT;
dw += 2;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_GS) | (cmd_len - 2);
dw[1] = 0 << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
0 << GEN7_PCB_ALLOC_DW1_SIZE__SHIFT;
/*
*
* From the Ivy Bridge PRM, volume 2 part 1, page 292:
*
* "A PIPE_CONTOL command with the CS Stall bit set must be programmed
* in the ring after this instruction
* (3DSTATE_PUSH_CONSTANT_ALLOC_PS)."
*/
cmd_wa_gen7_post_command_cs_stall(cmd);
}
void cmd_batch_flush(struct intel_cmd *cmd, uint32_t pipe_control_dw0)
{
if (pipe_control_dw0 == 0)
return;
if (!cmd->bind.draw_count)
return;
assert(!(pipe_control_dw0 & GEN6_PIPE_CONTROL_WRITE__MASK));
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 60:
*
* "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a
* PIPE_CONTROL with any non-zero post-sync-op is required."
*/
if (pipe_control_dw0 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH)
cmd_wa_gen6_pre_depth_stall_write(cmd);
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 61:
*
* "One of the following must also be set (when CS stall is set):
*
* * Render Target Cache Flush Enable ([12] of DW1)
* * Depth Cache Flush Enable ([0] of DW1)
* * Stall at Pixel Scoreboard ([1] of DW1)
* * Depth Stall ([13] of DW1)
* * Post-Sync Operation ([13] of DW1)"
*/
if ((pipe_control_dw0 & GEN6_PIPE_CONTROL_CS_STALL) &&
!(pipe_control_dw0 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
GEN6_PIPE_CONTROL_DEPTH_STALL)))
pipe_control_dw0 |= GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
gen6_PIPE_CONTROL(cmd, pipe_control_dw0, NULL, 0, 0);
}
void cmd_batch_flush_all(struct intel_cmd *cmd)
{
cmd_batch_flush(cmd, GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_CS_STALL);
}
void cmd_batch_depth_count(struct intel_cmd *cmd,
struct intel_bo *bo,
VkDeviceSize offset)
{
cmd_wa_gen6_pre_depth_stall_write(cmd);
gen6_PIPE_CONTROL(cmd,
GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT,
bo, offset, 0);
}
void cmd_batch_timestamp(struct intel_cmd *cmd,
struct intel_bo *bo,
VkDeviceSize offset)
{
/* need any WA or stall? */
gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_WRITE_TIMESTAMP, bo, offset, 0);
}
void cmd_batch_immediate(struct intel_cmd *cmd,
uint32_t pipe_control_flags,
struct intel_bo *bo,
VkDeviceSize offset,
uint64_t val)
{
/* need any WA or stall? */
gen6_PIPE_CONTROL(cmd,
GEN6_PIPE_CONTROL_WRITE_IMM | pipe_control_flags,
bo, offset, val);
}
static void gen6_cc_states(struct intel_cmd *cmd)
{
const struct intel_dynamic_blend *blend = &cmd->bind.state.blend;
const struct intel_dynamic_stencil *ss = &cmd->bind.state.stencil;
uint32_t blend_offset, ds_offset, cc_offset;
uint32_t stencil_ref;
uint32_t blend_color[4];
CMD_ASSERT(cmd, 6, 6);
blend_offset = gen6_BLEND_STATE(cmd);
if (blend)
memcpy(blend_color, blend->blend_const, sizeof(blend_color));
else
memset(blend_color, 0, sizeof(blend_color));
if (ss) {
ds_offset = gen6_DEPTH_STENCIL_STATE(cmd, ss);
/* TODO: enable back facing stencil state */
/* same reference for both front and back faces */
stencil_ref = (ss->front.stencil_reference & 0xff) << 24 |
(ss->front.stencil_reference & 0xff) << 16;
} else {
ds_offset = 0;
stencil_ref = 0;
}
cc_offset = gen6_COLOR_CALC_STATE(cmd, stencil_ref, blend_color);
gen6_3DSTATE_CC_STATE_POINTERS(cmd, blend_offset, ds_offset, cc_offset);
}
static void gen6_viewport_states(struct intel_cmd *cmd)
{
const struct intel_dynamic_viewport *viewport = &cmd->bind.state.viewport;
uint32_t sf_offset, clip_offset, cc_offset, scissor_offset;
if (!viewport)
return;
assert(viewport->cmd_len == (8 + 4 + 2) *
/* viewports */ viewport->viewport_count + (/* scissor */ viewport->viewport_count * 2));
sf_offset = cmd_state_write(cmd, INTEL_CMD_ITEM_SF_VIEWPORT,
GEN6_ALIGNMENT_SF_VIEWPORT, 8 * viewport->viewport_count,
viewport->cmd);
clip_offset = cmd_state_write(cmd, INTEL_CMD_ITEM_CLIP_VIEWPORT,
GEN6_ALIGNMENT_CLIP_VIEWPORT, 4 * viewport->viewport_count,
&viewport->cmd[viewport->cmd_clip_pos]);
cc_offset = cmd_state_write(cmd, INTEL_CMD_ITEM_CC_VIEWPORT,
GEN6_ALIGNMENT_SF_VIEWPORT, 2 * viewport->viewport_count,
&viewport->cmd[viewport->cmd_cc_pos]);
scissor_offset = cmd_state_write(cmd, INTEL_CMD_ITEM_SCISSOR_RECT,
GEN6_ALIGNMENT_SCISSOR_RECT, 2 * viewport->viewport_count,
&viewport->cmd[viewport->cmd_scissor_rect_pos]);
gen6_3DSTATE_VIEWPORT_STATE_POINTERS(cmd,
clip_offset, sf_offset, cc_offset);
gen6_3DSTATE_SCISSOR_STATE_POINTERS(cmd, scissor_offset);
}
static void gen7_cc_states(struct intel_cmd *cmd)
{
const struct intel_dynamic_blend *blend = &cmd->bind.state.blend;
const struct intel_dynamic_depth_bounds *ds = &cmd->bind.state.depth_bounds;
const struct intel_dynamic_stencil *ss = &cmd->bind.state.stencil;
uint32_t stencil_ref;
uint32_t blend_color[4];
uint32_t offset;
CMD_ASSERT(cmd, 7, 7.5);
if (!blend && !ds)
return;
offset = gen6_BLEND_STATE(cmd);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS, offset);
if (blend)
memcpy(blend_color, blend->blend_const, sizeof(blend_color));
else
memset(blend_color, 0, sizeof(blend_color));
if (ss) {
offset = gen6_DEPTH_STENCIL_STATE(cmd, ss);
/* TODO: enable back facing stencil state */
/* same reference for both front and back faces */
stencil_ref = (ss->front.stencil_reference & 0xff) << 24 |
(ss->front.stencil_reference & 0xff) << 16;
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS,
offset);
} else {
stencil_ref = 0;
}
offset = gen6_COLOR_CALC_STATE(cmd, stencil_ref, blend_color);
gen7_3dstate_pointer(cmd,
GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, offset);
}
static void gen7_viewport_states(struct intel_cmd *cmd)
{
const struct intel_dynamic_viewport *viewport = &cmd->bind.state.viewport;
uint32_t offset;
if (!viewport)
return;
assert(viewport->cmd_len == (16 + 2 + 2) * viewport->viewport_count);
offset = cmd_state_write(cmd, INTEL_CMD_ITEM_SF_VIEWPORT,
GEN7_ALIGNMENT_SF_CLIP_VIEWPORT, 16 * viewport->viewport_count,
viewport->cmd);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
offset);
offset = cmd_state_write(cmd, INTEL_CMD_ITEM_CC_VIEWPORT,
GEN6_ALIGNMENT_CC_VIEWPORT, 2 * viewport->viewport_count,
&viewport->cmd[viewport->cmd_cc_pos]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
offset);
offset = cmd_state_write(cmd, INTEL_CMD_ITEM_SCISSOR_RECT,
GEN6_ALIGNMENT_SCISSOR_RECT, 2 * viewport->viewport_count,
&viewport->cmd[viewport->cmd_scissor_rect_pos]);
gen7_3dstate_pointer(cmd,
GEN6_RENDER_OPCODE_3DSTATE_SCISSOR_STATE_POINTERS,
offset);
}
static void gen6_pcb(struct intel_cmd *cmd, int subop,
const struct intel_pipeline_shader *sh)
{
const uint8_t cmd_len = 5;
uint32_t *dw;
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
}
static void gen7_pcb(struct intel_cmd *cmd, int subop,
const struct intel_pipeline_shader *sh)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
dw[5] = 0;
dw[6] = 0;
}
static uint32_t emit_samplers(struct intel_cmd *cmd,
const struct intel_pipeline_rmap *rmap)
{
const struct intel_desc_region *region = cmd->dev->desc_region;
const struct intel_cmd_dset_data *data = &cmd->bind.dset.graphics_data;
const uint32_t border_len = (cmd_gen(cmd) >= INTEL_GEN(7)) ? 4 : 12;
const uint32_t border_stride =
u_align(border_len, GEN6_ALIGNMENT_SAMPLER_BORDER_COLOR_STATE / 4);
uint32_t border_offset, *border_dw, sampler_offset, *sampler_dw;
uint32_t surface_count;
uint32_t i;
CMD_ASSERT(cmd, 6, 7.5);
if (!rmap || !rmap->sampler_count)
return 0;
surface_count = rmap->rt_count + rmap->texture_resource_count + rmap->resource_count + rmap->uav_count;
/*
* note that we cannot call cmd_state_pointer() here as the following
* cmd_state_pointer() would invalidate the pointer
*/
border_offset = cmd_state_reserve(cmd, INTEL_CMD_ITEM_BLOB,
GEN6_ALIGNMENT_SAMPLER_BORDER_COLOR_STATE,
border_stride * rmap->sampler_count);
sampler_offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_SAMPLER,
GEN6_ALIGNMENT_SAMPLER_STATE,
4 * rmap->sampler_count, &sampler_dw);
cmd_state_update(cmd, border_offset,
border_stride * rmap->sampler_count, &border_dw);
for (i = 0; i < rmap->sampler_count; i++) {
const struct intel_pipeline_rmap_slot *slot =
&rmap->slots[surface_count + i];
struct intel_desc_offset desc_offset;
const struct intel_sampler *sampler;
switch (slot->type) {
case INTEL_PIPELINE_RMAP_SAMPLER:
intel_desc_offset_add(&desc_offset, &slot->u.sampler,
&data->set_offsets[slot->index]);
intel_desc_region_read_sampler(region, &desc_offset, &sampler);
break;
case INTEL_PIPELINE_RMAP_UNUSED:
sampler = NULL;
break;
default:
assert(!"unexpected rmap type");
sampler = NULL;
break;
}
if (sampler) {
memcpy(border_dw, &sampler->cmd[3], border_len * 4);
sampler_dw[0] = sampler->cmd[0];
sampler_dw[1] = sampler->cmd[1];
sampler_dw[2] = border_offset;
sampler_dw[3] = sampler->cmd[2];
} else {
sampler_dw[0] = GEN6_SAMPLER_DW0_DISABLE;
sampler_dw[1] = 0;
sampler_dw[2] = 0;
sampler_dw[3] = 0;
}
border_offset += border_stride * 4;
border_dw += border_stride;
sampler_dw += 4;
}
return sampler_offset;
}
static uint32_t emit_binding_table(struct intel_cmd *cmd,
const struct intel_pipeline_rmap *rmap,
const VkShaderStageFlagBits stage)
{
const struct intel_desc_region *region = cmd->dev->desc_region;
const struct intel_cmd_dset_data *data = &cmd->bind.dset.graphics_data;
const uint32_t sba_offset =
cmd->writers[INTEL_CMD_WRITER_SURFACE].sba_offset;
uint32_t binding_table[256], offset;
uint32_t surface_count, i;
CMD_ASSERT(cmd, 6, 7.5);
surface_count = (rmap) ?
rmap->rt_count + rmap->texture_resource_count + rmap->resource_count + rmap->uav_count : 0;
if (!surface_count)
return 0;
assert(surface_count <= ARRAY_SIZE(binding_table));
for (i = 0; i < surface_count; i++) {
const struct intel_pipeline_rmap_slot *slot = &rmap->slots[i];
struct intel_null_view null_view;
bool need_null_view = false;
switch (slot->type) {
case INTEL_PIPELINE_RMAP_RT:
{
const struct intel_render_pass_subpass *subpass =
cmd->bind.render_pass_subpass;
const struct intel_fb *fb = cmd->bind.fb;
const struct intel_att_view *view =
(slot->index < subpass->color_count &&
subpass->color_indices[slot->index] < fb->view_count) ?
fb->views[subpass->color_indices[slot->index]] : NULL;
if (view) {
offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_SURFACE,
GEN6_ALIGNMENT_SURFACE_STATE,
view->cmd_len, view->att_cmd);
cmd_reserve_reloc(cmd, 1);
cmd_surface_reloc(cmd, offset, 1, view->img->obj.mem->bo,
view->att_cmd[1], INTEL_RELOC_WRITE);
} else {
need_null_view = true;
}
}
break;
case INTEL_PIPELINE_RMAP_SURFACE:
{
const struct intel_pipeline_layout U_ASSERT_ONLY *pipeline_layout =
cmd->bind.pipeline.graphics->pipeline_layout;
const int32_t dyn_idx = slot->u.surface.dynamic_offset_index;
struct intel_desc_offset desc_offset;
const struct intel_mem *mem;
bool read_only;
const uint32_t *cmd_data;
uint32_t cmd_len;
assert(dyn_idx < 0 ||
dyn_idx < pipeline_layout->total_dynamic_desc_count);
intel_desc_offset_add(&desc_offset, &slot->u.surface.offset,
&data->set_offsets[slot->index]);
intel_desc_region_read_surface(region, &desc_offset, stage,
&mem, &read_only, &cmd_data, &cmd_len);
if (mem) {
const uint32_t dynamic_offset = (dyn_idx >= 0) ?
data->dynamic_offsets[dyn_idx] : 0;
const uint32_t reloc_flags =
(read_only) ? 0 : INTEL_RELOC_WRITE;
offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_SURFACE,
GEN6_ALIGNMENT_SURFACE_STATE,
cmd_len, cmd_data);
cmd_reserve_reloc(cmd, 1);
cmd_surface_reloc(cmd, offset, 1, mem->bo,
cmd_data[1] + dynamic_offset, reloc_flags);
} else {
need_null_view = true;
}
}
break;
case INTEL_PIPELINE_RMAP_UNUSED:
need_null_view = true;
break;
default:
assert(!"unexpected rmap type");
need_null_view = true;
break;
}
if (need_null_view) {
intel_null_view_init(&null_view, cmd->dev);
offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_SURFACE,
GEN6_ALIGNMENT_SURFACE_STATE,
null_view.cmd_len, null_view.cmd);
}
binding_table[i] = offset - sba_offset;
}
offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_BINDING_TABLE,
GEN6_ALIGNMENT_BINDING_TABLE_STATE,
surface_count, binding_table) - sba_offset;
/* there is a 64KB limit on BINIDNG_TABLE_STATEs */
assert(offset + sizeof(uint32_t) * surface_count <= 64 * 1024);
return offset;
}
static void gen6_3DSTATE_VERTEX_BUFFERS(struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const uint8_t cmd_len = 1 + 4 * pipeline->vb_count;
uint32_t *dw;
uint32_t pos, i;
CMD_ASSERT(cmd, 6, 7.5);
if (!pipeline->vb_count)
return;
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2);
dw++;
pos++;
for (i = 0; i < pipeline->vb_count; i++) {
assert(pipeline->vb[i].stride <= 2048);
dw[0] = i << GEN6_VB_DW0_INDEX__SHIFT |
pipeline->vb[i].stride;
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
dw[0] |= GEN7_MOCS_L3_WB << GEN6_VB_DW0_MOCS__SHIFT |
GEN7_VB_DW0_ADDR_MODIFIED;
}
switch (pipeline->vb[i].inputRate) {
case VK_VERTEX_INPUT_RATE_VERTEX:
dw[0] |= GEN6_VB_DW0_ACCESS_VERTEXDATA;
dw[3] = 0;
break;
case VK_VERTEX_INPUT_RATE_INSTANCE:
dw[0] |= GEN6_VB_DW0_ACCESS_INSTANCEDATA;
dw[3] = 1;
break;
default:
assert(!"unknown step rate");
dw[0] |= GEN6_VB_DW0_ACCESS_VERTEXDATA;
dw[3] = 0;
break;
}
if (cmd->bind.vertex.buf[i]) {
const struct intel_buf *buf = cmd->bind.vertex.buf[i];
const VkDeviceSize offset = cmd->bind.vertex.offset[i];
cmd_reserve_reloc(cmd, 2);
cmd_batch_reloc(cmd, pos + 1, buf->obj.mem->bo, offset, 0);
cmd_batch_reloc(cmd, pos + 2, buf->obj.mem->bo, buf->size - 1, 0);
} else {
dw[0] |= GEN6_VB_DW0_IS_NULL;
dw[1] = 0;
dw[2] = 0;
}
dw += 4;
pos += 4;
}
}
static void gen6_3DSTATE_VS(struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
const struct intel_pipeline_shader *vs = &pipeline->vs;
const uint8_t cmd_len = 6;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
uint32_t dw2, dw4, dw5, *dw;
uint32_t pos;
int vue_read_len;
CMD_ASSERT(cmd, 6, 7.5);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 135:
*
* "(Vertex URB Entry Read Length) Specifies the number of pairs of
* 128-bit vertex elements to be passed into the payload for each
* vertex."
*
* "It is UNDEFINED to set this field to 0 indicating no Vertex URB
* data to be read and passed to the thread."
*/
vue_read_len = (vs->in_count + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
dw2 = (vs->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
vs->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
dw4 = vs->urb_grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
dw5 = GEN6_VS_DW5_STATISTICS |
GEN6_VS_DW5_VS_ENABLE;
if (cmd_gen(cmd) >= INTEL_GEN(7.5))
dw5 |= (vs->max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
else
dw5 |= (vs->max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
if (pipeline->disable_vs_cache)
dw5 |= GEN6_VS_DW5_CACHE_DISABLE;
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = dw0;
dw[1] = cmd->bind.pipeline.vs_offset;
dw[2] = dw2;
dw[3] = 0; /* scratch */
dw[4] = dw4;
dw[5] = dw5;
if (vs->per_thread_scratch_size)
gen6_add_scratch_space(cmd, pos + 3, pipeline, vs);
}
static void emit_shader_resources(struct intel_cmd *cmd)
{
/* five HW shader stages */
uint32_t binding_tables[5], samplers[5];
binding_tables[0] = emit_binding_table(cmd,
cmd->bind.pipeline.graphics->vs.rmap,
VK_SHADER_STAGE_VERTEX_BIT);
binding_tables[1] = emit_binding_table(cmd,
cmd->bind.pipeline.graphics->tcs.rmap,
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
binding_tables[2] = emit_binding_table(cmd,
cmd->bind.pipeline.graphics->tes.rmap,
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
binding_tables[3] = emit_binding_table(cmd,
cmd->bind.pipeline.graphics->gs.rmap,
VK_SHADER_STAGE_GEOMETRY_BIT);
binding_tables[4] = emit_binding_table(cmd,
cmd->bind.pipeline.graphics->fs.rmap,
VK_SHADER_STAGE_FRAGMENT_BIT);
samplers[0] = emit_samplers(cmd, cmd->bind.pipeline.graphics->vs.rmap);
samplers[1] = emit_samplers(cmd, cmd->bind.pipeline.graphics->tcs.rmap);
samplers[2] = emit_samplers(cmd, cmd->bind.pipeline.graphics->tes.rmap);
samplers[3] = emit_samplers(cmd, cmd->bind.pipeline.graphics->gs.rmap);
samplers[4] = emit_samplers(cmd, cmd->bind.pipeline.graphics->fs.rmap);
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_VS,
binding_tables[0]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_HS,
binding_tables[1]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_DS,
binding_tables[2]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_GS,
binding_tables[3]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS,
binding_tables[4]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_VS,
samplers[0]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_HS,
samplers[1]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_DS,
samplers[2]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_GS,
samplers[3]);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS,
samplers[4]);
} else {
assert(!binding_tables[1] && !binding_tables[2]);
gen6_3DSTATE_BINDING_TABLE_POINTERS(cmd,
binding_tables[0], binding_tables[3], binding_tables[4]);
assert(!samplers[1] && !samplers[2]);
gen6_3DSTATE_SAMPLER_STATE_POINTERS(cmd,
samplers[0], samplers[3], samplers[4]);
}
}
static void emit_msaa(struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
if (!cmd->bind.render_pass_changed)
return;
cmd_wa_gen6_pre_multisample_depth_flush(cmd);
gen6_3DSTATE_MULTISAMPLE(cmd, pipeline->sample_count);
}
static void emit_rt(struct intel_cmd *cmd)
{
const struct intel_fb *fb = cmd->bind.fb;
if (!cmd->bind.render_pass_changed)
return;
cmd_wa_gen6_pre_depth_stall_write(cmd);
gen6_3DSTATE_DRAWING_RECTANGLE(cmd, fb->width,
fb->height);
}
static void emit_ds(struct intel_cmd *cmd)
{
const struct intel_render_pass *rp = cmd->bind.render_pass;
const struct intel_render_pass_subpass *subpass =
cmd->bind.render_pass_subpass;
const struct intel_fb *fb = cmd->bind.fb;
const struct intel_att_view *view =
(subpass->ds_index < rp->attachment_count) ?
fb->views[subpass->ds_index] : NULL;
if (!cmd->bind.render_pass_changed)
return;
if (!view) {
/* all zeros */
static const struct intel_att_view null_view;
view = &null_view;
}
cmd_wa_gen6_pre_ds_flush(cmd);
gen6_3DSTATE_DEPTH_BUFFER(cmd, view, subpass->ds_optimal);
gen6_3DSTATE_STENCIL_BUFFER(cmd, view, subpass->ds_optimal);
gen6_3DSTATE_HIER_DEPTH_BUFFER(cmd, view, subpass->ds_optimal);
if (cmd_gen(cmd) >= INTEL_GEN(7))
gen7_3DSTATE_CLEAR_PARAMS(cmd, 0);
else
gen6_3DSTATE_CLEAR_PARAMS(cmd, 0);
}
static uint32_t emit_shader(struct intel_cmd *cmd,
const struct intel_pipeline_shader *shader)
{
struct intel_cmd_shader_cache *cache = &cmd->bind.shader_cache;
uint32_t offset;
uint32_t i;
/* see if the shader is already in the cache */
for (i = 0; i < cache->used; i++) {
if (cache->entries[i].shader == (const void *) shader)
return cache->entries[i].kernel_offset;
}
offset = cmd_instruction_write(cmd, shader->codeSize, shader->pCode);
/* grow the cache if full */
if (cache->used >= cache->count) {
const uint32_t count = cache->count + 16;
void *entries;
entries = intel_alloc(cmd, sizeof(cache->entries[0]) * count, 0,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (entries) {
if (cache->entries) {
memcpy(entries, cache->entries,
sizeof(cache->entries[0]) * cache->used);
intel_free(cmd, cache->entries);
}
cache->entries = entries;
cache->count = count;
}
}
/* add the shader to the cache */
if (cache->used < cache->count) {
cache->entries[cache->used].shader = (const void *) shader;
cache->entries[cache->used].kernel_offset = offset;
cache->used++;
}
return offset;
}
static void emit_graphics_pipeline(struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
if (pipeline->wa_flags & INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE)
cmd_wa_gen6_pre_depth_stall_write(cmd);
if (pipeline->wa_flags & INTEL_CMD_WA_GEN6_PRE_COMMAND_SCOREBOARD_STALL)
cmd_wa_gen6_pre_command_scoreboard_stall(cmd);
if (pipeline->wa_flags & INTEL_CMD_WA_GEN7_PRE_VS_DEPTH_STALL_WRITE)
cmd_wa_gen7_pre_vs_depth_stall_write(cmd);
/* 3DSTATE_URB_VS and etc. */
assert(pipeline->cmd_len);
cmd_batch_write(cmd, pipeline->cmd_len, pipeline->cmds);
if (pipeline->active_shaders & SHADER_VERTEX_FLAG) {
cmd->bind.pipeline.vs_offset = emit_shader(cmd, &pipeline->vs);
}
if (pipeline->active_shaders & SHADER_TESS_CONTROL_FLAG) {
cmd->bind.pipeline.tcs_offset = emit_shader(cmd, &pipeline->tcs);
}
if (pipeline->active_shaders & SHADER_TESS_EVAL_FLAG) {
cmd->bind.pipeline.tes_offset = emit_shader(cmd, &pipeline->tes);
}
if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) {
cmd->bind.pipeline.gs_offset = emit_shader(cmd, &pipeline->gs);
}
if (pipeline->active_shaders & SHADER_FRAGMENT_FLAG) {
cmd->bind.pipeline.fs_offset = emit_shader(cmd, &pipeline->fs);
}
if (pipeline->wa_flags & INTEL_CMD_WA_GEN7_POST_COMMAND_CS_STALL)
cmd_wa_gen7_post_command_cs_stall(cmd);
if (pipeline->wa_flags & INTEL_CMD_WA_GEN7_POST_COMMAND_DEPTH_STALL)
cmd_wa_gen7_post_command_depth_stall(cmd);
}
static void
viewport_get_guardband(const struct intel_gpu *gpu,
int center_x, int center_y,
int *min_gbx, int *max_gbx,
int *min_gby, int *max_gby)
{
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 234:
*
* "Per-Device Guardband Extents
*
* - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
* - Maximum Post-Clamp Delta (X or Y): 16K"
*
* "In addition, in order to be correctly rendered, objects must have a
* screenspace bounding box not exceeding 8K in the X or Y direction.
* This additional restriction must also be comprehended by software,
* i.e., enforced by use of clipping."
*
* From the Ivy Bridge PRM, volume 2 part 1, page 248:
*
* "Per-Device Guardband Extents
*
* - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
* - Maximum Post-Clamp Delta (X or Y): N/A"
*
* "In addition, in order to be correctly rendered, objects must have a
* screenspace bounding box not exceeding 8K in the X or Y direction.
* This additional restriction must also be comprehended by software,
* i.e., enforced by use of clipping."
*
* Combined, the bounding box of any object can not exceed 8K in both
* width and height.
*
* Below we set the guardband as a squre of length 8K, centered at where
* the viewport is. This makes sure all objects passing the GB test are
* valid to the renderer, and those failing the XY clipping have a
* better chance of passing the GB test.
*/
const int max_extent = (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 32768 : 16384;
const int half_len = 8192 / 2;
/* make sure the guardband is within the valid range */
if (center_x - half_len < -max_extent)
center_x = -max_extent + half_len;
else if (center_x + half_len > max_extent - 1)
center_x = max_extent - half_len;
if (center_y - half_len < -max_extent)
center_y = -max_extent + half_len;
else if (center_y + half_len > max_extent - 1)
center_y = max_extent - half_len;
*min_gbx = (float) (center_x - half_len);
*max_gbx = (float) (center_x + half_len);
*min_gby = (float) (center_y - half_len);
*max_gby = (float) (center_y + half_len);
}
static void
viewport_state_cmd(struct intel_dynamic_viewport *state,
const struct intel_gpu *gpu,
uint32_t count)
{
INTEL_GPU_ASSERT(gpu, 6, 7.5);
state->viewport_count = count;
assert(count <= INTEL_MAX_VIEWPORTS);
if (intel_gpu_gen(gpu) >= INTEL_GEN(7)) {
state->cmd_len = 16 * count;
state->cmd_clip_pos = 8;
} else {
state->cmd_len = 8 * count;
state->cmd_clip_pos = state->cmd_len;
state->cmd_len += 4 * count;
}
state->cmd_cc_pos = state->cmd_len;
state->cmd_len += 2 * count;
state->cmd_scissor_rect_pos = state->cmd_len;
state->cmd_len += 2 * count;
assert(sizeof(uint32_t) * state->cmd_len <= sizeof(state->cmd));
}
static void
set_viewport_state(
struct intel_cmd* cmd)
{
const struct intel_gpu *gpu = cmd->dev->gpu;
struct intel_dynamic_viewport *state = &cmd->bind.state.viewport;
const uint32_t sf_stride = (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 16 : 8;
const uint32_t clip_stride = (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 16 : 4;
uint32_t *sf_viewport, *clip_viewport, *cc_viewport, *scissor_rect;
uint32_t i;
INTEL_GPU_ASSERT(gpu, 6, 7.5);
viewport_state_cmd(state, gpu, cmd->bind.state.viewport.viewport_count);
sf_viewport = state->cmd;
clip_viewport = state->cmd + state->cmd_clip_pos;
cc_viewport = state->cmd + state->cmd_cc_pos;
scissor_rect = state->cmd + state->cmd_scissor_rect_pos;
for (i = 0; i < cmd->bind.state.viewport.viewport_count; i++) {
const VkViewport *viewport = &cmd->bind.state.viewport.viewports[i];
uint32_t *dw = NULL;
float translate[3], scale[3];
int min_gbx, max_gbx, min_gby, max_gby;
scale[0] = viewport->width / 2.0f;
scale[1] = viewport->height / 2.0f;
scale[2] = viewport->maxDepth - viewport->minDepth;
translate[0] = viewport->x + scale[0];
translate[1] = viewport->y + scale[1];
translate[2] = viewport->minDepth;
viewport_get_guardband(gpu, (int) translate[0], (int) translate[1],
&min_gbx, &max_gbx, &min_gby, &max_gby);
/* SF_VIEWPORT */
dw = sf_viewport;
dw[0] = u_fui(scale[0]);
dw[1] = u_fui(scale[1]);
dw[2] = u_fui(scale[2]);
dw[3] = u_fui(translate[0]);
dw[4] = u_fui(translate[1]);
dw[5] = u_fui(translate[2]);
dw[6] = 0;
dw[7] = 0;
sf_viewport += sf_stride;
/* CLIP_VIEWPORT */
dw = clip_viewport;
dw[0] = u_fui(((float) min_gbx - translate[0]) / fabsf(scale[0]));
dw[1] = u_fui(((float) max_gbx - translate[0]) / fabsf(scale[0]));
dw[2] = u_fui(((float) min_gby - translate[1]) / fabsf(scale[1]));
dw[3] = u_fui(((float) max_gby - translate[1]) / fabsf(scale[1]));
clip_viewport += clip_stride;
/* CC_VIEWPORT */
dw = cc_viewport;
dw[0] = u_fui(viewport->minDepth);
dw[1] = u_fui(viewport->maxDepth);
cc_viewport += 2;
}
for (i = 0; i < cmd->bind.state.viewport.viewport_count; i++) {
const VkRect2D *scissor = &cmd->bind.state.viewport.scissors[i];
/* SCISSOR_RECT */
int16_t max_x, max_y;
uint32_t *dw = NULL;
max_x = (scissor->offset.x + scissor->extent.width - 1) & 0xffff;
max_y = (scissor->offset.y + scissor->extent.height - 1) & 0xffff;
dw = scissor_rect;
if (scissor->extent.width && scissor->extent.height) {
dw[0] = (scissor->offset.y & 0xffff) << 16 |
(scissor->offset.x & 0xffff);
dw[1] = max_y << 16 | max_x;
} else {
dw[0] = 1 << 16 | 1;
dw[1] = 0;
}
scissor_rect += 2;
}
}
static void emit_bounded_states(struct intel_cmd *cmd)
{
set_viewport_state(cmd);
emit_msaa(cmd);
emit_graphics_pipeline(cmd);
emit_rt(cmd);
emit_ds(cmd);
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
gen7_cc_states(cmd);
gen7_viewport_states(cmd);
gen7_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS,
&cmd->bind.pipeline.graphics->vs);
gen7_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS,
&cmd->bind.pipeline.graphics->gs);
gen7_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS,
&cmd->bind.pipeline.graphics->fs);
gen7_3DSTATE_GS(cmd);
gen6_3DSTATE_CLIP(cmd);
gen7_3DSTATE_SF(cmd);
gen7_3DSTATE_WM(cmd);
gen7_3DSTATE_PS(cmd);
} else {
gen6_cc_states(cmd);
gen6_viewport_states(cmd);
gen6_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS,
&cmd->bind.pipeline.graphics->vs);
gen6_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS,
&cmd->bind.pipeline.graphics->gs);
gen6_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS,
&cmd->bind.pipeline.graphics->fs);
gen6_3DSTATE_GS(cmd);
gen6_3DSTATE_CLIP(cmd);
gen6_3DSTATE_SF(cmd);
gen6_3DSTATE_WM(cmd);
}
emit_shader_resources(cmd);
cmd_wa_gen6_pre_depth_stall_write(cmd);
gen6_3DSTATE_VERTEX_BUFFERS(cmd);
gen6_3DSTATE_VS(cmd);
}
static uint32_t gen6_meta_DEPTH_STENCIL_STATE(struct intel_cmd *cmd,
const struct intel_cmd_meta *meta)
{
const uint8_t cmd_align = GEN6_ALIGNMENT_DEPTH_STENCIL_STATE;
const uint8_t cmd_len = 3;
uint32_t dw[3];
CMD_ASSERT(cmd, 6, 7.5);
/* TODO: aspect is now a mask, can you do both? */
if (meta->ds.aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
dw[0] = 0;
dw[1] = 0;
if (meta->ds.op == INTEL_CMD_META_DS_RESOLVE) {
dw[2] = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
GEN6_COMPAREFUNCTION_NEVER << 27 |
GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
} else {
dw[2] = GEN6_COMPAREFUNCTION_ALWAYS << 27 |
GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
}
} else if (meta->ds.aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
dw[0] = GEN6_ZS_DW0_STENCIL_TEST_ENABLE |
(GEN6_COMPAREFUNCTION_ALWAYS) << 28 |
(GEN6_STENCILOP_KEEP) << 25 |
(GEN6_STENCILOP_KEEP) << 22 |
(GEN6_STENCILOP_REPLACE) << 19 |
GEN6_ZS_DW0_STENCIL_WRITE_ENABLE |
GEN6_ZS_DW0_STENCIL1_ENABLE |
(GEN6_COMPAREFUNCTION_ALWAYS) << 12 |
(GEN6_STENCILOP_KEEP) << 9 |
(GEN6_STENCILOP_KEEP) << 6 |
(GEN6_STENCILOP_REPLACE) << 3;
dw[1] = 0xff << GEN6_ZS_DW1_STENCIL0_VALUEMASK__SHIFT |
0xff << GEN6_ZS_DW1_STENCIL0_WRITEMASK__SHIFT |
0xff << GEN6_ZS_DW1_STENCIL1_VALUEMASK__SHIFT |
0xff << GEN6_ZS_DW1_STENCIL1_WRITEMASK__SHIFT;
dw[2] = 0;
}
return cmd_state_write(cmd, INTEL_CMD_ITEM_DEPTH_STENCIL,
cmd_align, cmd_len, dw);
}
static void gen6_meta_dynamic_states(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
uint32_t blend_offset, ds_offset, cc_offset, cc_vp_offset, *dw;
CMD_ASSERT(cmd, 6, 7.5);
blend_offset = 0;
ds_offset = 0;
cc_offset = 0;
cc_vp_offset = 0;
if (meta->mode == INTEL_CMD_META_FS_RECT) {
/* BLEND_STATE */
blend_offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_BLEND,
GEN6_ALIGNMENT_BLEND_STATE, 2, &dw);
dw[0] = 0;
dw[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT | 0x3;
}
if (meta->mode != INTEL_CMD_META_VS_POINTS) {
if (meta->ds.aspect == VK_IMAGE_ASPECT_DEPTH_BIT ||
meta->ds.aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
const uint32_t blend_color[4] = { 0, 0, 0, 0 };
uint32_t stencil_ref = (meta->ds.stencil_ref & 0xff) << 24 |
(meta->ds.stencil_ref & 0xff) << 16;
/* DEPTH_STENCIL_STATE */
ds_offset = gen6_meta_DEPTH_STENCIL_STATE(cmd, meta);
/* COLOR_CALC_STATE */
cc_offset = gen6_COLOR_CALC_STATE(cmd,
stencil_ref, blend_color);
/* CC_VIEWPORT */
cc_vp_offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_CC_VIEWPORT,
GEN6_ALIGNMENT_CC_VIEWPORT, 2, &dw);
dw[0] = u_fui(0.0f);
dw[1] = u_fui(1.0f);
} else {
/* DEPTH_STENCIL_STATE */
ds_offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_DEPTH_STENCIL,
GEN6_ALIGNMENT_DEPTH_STENCIL_STATE,
GEN6_DEPTH_STENCIL_STATE__SIZE, &dw);
memset(dw, 0, sizeof(*dw) * GEN6_DEPTH_STENCIL_STATE__SIZE);
}
}
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS,
blend_offset);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS,
ds_offset);
gen7_3dstate_pointer(cmd,
GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, cc_offset);
gen7_3dstate_pointer(cmd,
GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
cc_vp_offset);
} else {
/* 3DSTATE_CC_STATE_POINTERS */
gen6_3DSTATE_CC_STATE_POINTERS(cmd, blend_offset, ds_offset, cc_offset);
/* 3DSTATE_VIEWPORT_STATE_POINTERS */
cmd_batch_pointer(cmd, 4, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) | (4 - 2) |
GEN6_VP_PTR_DW0_CC_CHANGED;
dw[1] = 0;
dw[2] = 0;
dw[3] = cc_vp_offset;
}
}
static void gen6_meta_surface_states(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
uint32_t binding_table[2] = { 0, 0 };
uint32_t offset;
const uint32_t sba_offset =
cmd->writers[INTEL_CMD_WRITER_SURFACE].sba_offset;
CMD_ASSERT(cmd, 6, 7.5);
if (meta->mode == INTEL_CMD_META_DEPTH_STENCIL_RECT)
return;
/* SURFACE_STATEs */
if (meta->src.valid) {
offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_SURFACE,
GEN6_ALIGNMENT_SURFACE_STATE,
meta->src.surface_len, meta->src.surface);
cmd_reserve_reloc(cmd, 1);
if (meta->src.reloc_flags & INTEL_CMD_RELOC_TARGET_IS_WRITER) {
cmd_surface_reloc_writer(cmd, offset, 1,
meta->src.reloc_target, meta->src.reloc_offset);
} else {
cmd_surface_reloc(cmd, offset, 1,
(struct intel_bo *) meta->src.reloc_target,
meta->src.reloc_offset, meta->src.reloc_flags);
}
binding_table[0] = offset - sba_offset;
}
if (meta->dst.valid) {
offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_SURFACE,
GEN6_ALIGNMENT_SURFACE_STATE,
meta->dst.surface_len, meta->dst.surface);
cmd_reserve_reloc(cmd, 1);
cmd_surface_reloc(cmd, offset, 1,
(struct intel_bo *) meta->dst.reloc_target,
meta->dst.reloc_offset, meta->dst.reloc_flags);
binding_table[1] = offset - sba_offset;
}
/* BINDING_TABLE */
offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_BINDING_TABLE,
GEN6_ALIGNMENT_BINDING_TABLE_STATE,
2, binding_table);
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
const int subop = (meta->mode == INTEL_CMD_META_VS_POINTS) ?
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_VS :
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS;
gen7_3dstate_pointer(cmd, subop, offset - sba_offset);
} else {
/* 3DSTATE_BINDING_TABLE_POINTERS */
if (meta->mode == INTEL_CMD_META_VS_POINTS)
gen6_3DSTATE_BINDING_TABLE_POINTERS(cmd, offset - sba_offset, 0, 0);
else
gen6_3DSTATE_BINDING_TABLE_POINTERS(cmd, 0, 0, offset - sba_offset);
}
}
static void gen6_meta_urb(struct intel_cmd *cmd)
{
const int vs_entry_count = (cmd->dev->gpu->gt == 2) ? 256 : 128;
uint32_t *dw;
CMD_ASSERT(cmd, 6, 6);
/* 3DSTATE_URB */
cmd_batch_pointer(cmd, 3, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (3 - 2);
dw[1] = vs_entry_count << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
dw[2] = 0;
}
static void gen7_meta_urb(struct intel_cmd *cmd)
{
const int pcb_alloc = (cmd->dev->gpu->gt == 3) ? 16 : 8;
const int urb_offset = pcb_alloc / 8;
int vs_entry_count;
uint32_t *dw;
CMD_ASSERT(cmd, 7, 7.5);
cmd_wa_gen7_pre_vs_depth_stall_write(cmd);
switch (cmd_gen(cmd)) {
case INTEL_GEN(7.5):
vs_entry_count = (cmd->dev->gpu->gt >= 2) ? 1664 : 640;
break;
case INTEL_GEN(7):
default:
vs_entry_count = (cmd->dev->gpu->gt == 2) ? 704 : 512;
break;
}
/* 3DSTATE_URB_x */
cmd_batch_pointer(cmd, 8, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (2 - 2);
dw[1] = urb_offset << GEN7_URB_DW1_OFFSET__SHIFT |
vs_entry_count;
dw += 2;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (2 - 2);
dw[1] = urb_offset << GEN7_URB_DW1_OFFSET__SHIFT;
dw += 2;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (2 - 2);
dw[1] = urb_offset << GEN7_URB_DW1_OFFSET__SHIFT;
dw += 2;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (2 - 2);
dw[1] = urb_offset << GEN7_URB_DW1_OFFSET__SHIFT;
dw += 2;
}
static void gen6_meta_vf(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
uint32_t vb_start, vb_end, vb_stride;
int ve_format, ve_z_source;
uint32_t *dw;
uint32_t pos;
CMD_ASSERT(cmd, 6, 7.5);
switch (meta->mode) {
case INTEL_CMD_META_VS_POINTS:
cmd_batch_pointer(cmd, 3, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (3 - 2);
dw[1] = GEN6_VE_DW0_VALID;
dw[2] = GEN6_VFCOMP_STORE_VID << GEN6_VE_DW1_COMP0__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
return;
break;
case INTEL_CMD_META_FS_RECT:
{
uint32_t vertices[3][2];
vertices[0][0] = meta->dst.x + meta->width;
vertices[0][1] = meta->dst.y + meta->height;
vertices[1][0] = meta->dst.x;
vertices[1][1] = meta->dst.y + meta->height;
vertices[2][0] = meta->dst.x;
vertices[2][1] = meta->dst.y;
vb_start = cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32,
sizeof(vertices) / 4, (const uint32_t *) vertices);
vb_end = vb_start + sizeof(vertices) - 1;
vb_stride = sizeof(vertices[0]);
ve_z_source = GEN6_VFCOMP_STORE_0;
ve_format = GEN6_FORMAT_R32G32_USCALED;
}
break;
case INTEL_CMD_META_DEPTH_STENCIL_RECT:
{
float vertices[3][3];
vertices[0][0] = (float) (meta->dst.x + meta->width);
vertices[0][1] = (float) (meta->dst.y + meta->height);
vertices[0][2] = u_uif(meta->clear_val[0]);
vertices[1][0] = (float) meta->dst.x;
vertices[1][1] = (float) (meta->dst.y + meta->height);
vertices[1][2] = u_uif(meta->clear_val[0]);
vertices[2][0] = (float) meta->dst.x;
vertices[2][1] = (float) meta->dst.y;
vertices[2][2] = u_uif(meta->clear_val[0]);
vb_start = cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32,
sizeof(vertices) / 4, (const uint32_t *) vertices);
vb_end = vb_start + sizeof(vertices) - 1;
vb_stride = sizeof(vertices[0]);
ve_z_source = GEN6_VFCOMP_STORE_SRC;
ve_format = GEN6_FORMAT_R32G32B32_FLOAT;
}
break;
default:
assert(!"unknown meta mode");
return;
break;
}
/* 3DSTATE_VERTEX_BUFFERS */
pos = cmd_batch_pointer(cmd, 5, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (5 - 2);
dw[1] = vb_stride;
if (cmd_gen(cmd) >= INTEL_GEN(7))
dw[1] |= GEN7_VB_DW0_ADDR_MODIFIED;
cmd_reserve_reloc(cmd, 2);
cmd_batch_reloc_writer(cmd, pos + 2, INTEL_CMD_WRITER_STATE, vb_start);
cmd_batch_reloc_writer(cmd, pos + 3, INTEL_CMD_WRITER_STATE, vb_end);
dw[4] = 0;
/* 3DSTATE_VERTEX_ELEMENTS */
cmd_batch_pointer(cmd, 5, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (5 - 2);
dw[1] = GEN6_VE_DW0_VALID;
dw[2] = GEN6_VFCOMP_STORE_0 << GEN6_VE_DW1_COMP0__SHIFT | /* Reserved */
GEN6_VFCOMP_STORE_0 << GEN6_VE_DW1_COMP1__SHIFT | /* Render Target Array Index */
GEN6_VFCOMP_STORE_0 << GEN6_VE_DW1_COMP2__SHIFT | /* Viewport Index */
GEN6_VFCOMP_STORE_0 << GEN6_VE_DW1_COMP3__SHIFT; /* Point Width */
dw[3] = GEN6_VE_DW0_VALID |
ve_format << GEN6_VE_DW0_FORMAT__SHIFT;
dw[4] = GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP1__SHIFT |
ve_z_source << GEN6_VE_DW1_COMP2__SHIFT |
GEN6_VFCOMP_STORE_1_FP << GEN6_VE_DW1_COMP3__SHIFT;
}
static uint32_t gen6_meta_vs_constants(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
/* one GPR */
uint32_t consts[8];
uint32_t const_count;
CMD_ASSERT(cmd, 6, 7.5);
switch (meta->shader_id) {
case INTEL_DEV_META_VS_FILL_MEM:
consts[0] = meta->dst.x;
consts[1] = meta->clear_val[0];
const_count = 2;
break;
case INTEL_DEV_META_VS_COPY_MEM:
case INTEL_DEV_META_VS_COPY_MEM_UNALIGNED:
consts[0] = meta->dst.x;
consts[1] = meta->src.x;
const_count = 2;
break;
case INTEL_DEV_META_VS_COPY_R8_TO_MEM:
case INTEL_DEV_META_VS_COPY_R16_TO_MEM:
case INTEL_DEV_META_VS_COPY_R32_TO_MEM:
case INTEL_DEV_META_VS_COPY_R32G32_TO_MEM:
case INTEL_DEV_META_VS_COPY_R32G32B32A32_TO_MEM:
consts[0] = meta->src.x;
consts[1] = meta->src.y;
consts[2] = meta->width;
consts[3] = meta->dst.x;
const_count = 4;
break;
default:
assert(!"unknown meta shader id");
const_count = 0;
break;
}
/* this can be skipped but it makes state dumping prettier */
memset(&consts[const_count], 0, sizeof(consts[0]) * (8 - const_count));
return cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32, 8, consts);
}
static void gen6_meta_vs(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
const struct intel_pipeline_shader *sh =
intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
uint32_t offset, *dw;
CMD_ASSERT(cmd, 6, 7.5);
if (meta->mode != INTEL_CMD_META_VS_POINTS) {
uint32_t cmd_len;
/* 3DSTATE_CONSTANT_VS */
cmd_len = (cmd_gen(cmd) >= INTEL_GEN(7)) ? 7 : 5;
cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | (cmd_len - 2);
memset(&dw[1], 0, sizeof(*dw) * (cmd_len - 1));
/* 3DSTATE_VS */
cmd_batch_pointer(cmd, 6, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (6 - 2);
memset(&dw[1], 0, sizeof(*dw) * (6 - 1));
return;
}
assert(meta->dst.valid && sh->uses == INTEL_SHADER_USE_VID);
/* 3DSTATE_CONSTANT_VS */
offset = gen6_meta_vs_constants(cmd);
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | (7 - 2);
dw[1] = 1 << GEN7_CONSTANT_DW1_BUFFER0_READ_LEN__SHIFT;
dw[2] = 0;
dw[3] = offset | GEN7_MOCS_L3_WB;
dw[4] = 0;
dw[5] = 0;
dw[6] = 0;
} else {
cmd_batch_pointer(cmd, 5, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | (5 - 2) |
1 << GEN6_CONSTANT_DW0_BUFFER_ENABLES__SHIFT;
dw[1] = offset;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
}
/* 3DSTATE_VS */
offset = emit_shader(cmd, sh);
cmd_batch_pointer(cmd, 6, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (6 - 2);
dw[1] = offset;
dw[2] = GEN6_THREADDISP_SPF |
(sh->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
sh->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
dw[3] = 0; /* scratch */
dw[4] = sh->urb_grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
1 << GEN6_VS_DW4_URB_READ_LEN__SHIFT;
dw[5] = GEN6_VS_DW5_CACHE_DISABLE |
GEN6_VS_DW5_VS_ENABLE;
if (cmd_gen(cmd) >= INTEL_GEN(7.5))
dw[5] |= (sh->max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
else
dw[5] |= (sh->max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
assert(!sh->per_thread_scratch_size);
}
static void gen6_meta_disabled(struct intel_cmd *cmd)
{
uint32_t *dw;
CMD_ASSERT(cmd, 6, 6);
/* 3DSTATE_CONSTANT_GS */
cmd_batch_pointer(cmd, 5, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) | (5 - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
/* 3DSTATE_GS */
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (7 - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
dw[5] = GEN6_GS_DW5_STATISTICS;
dw[6] = 0;
/* 3DSTATE_SF */
cmd_batch_pointer(cmd, 20, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (20 - 2);
dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
memset(&dw[2], 0, 18 * sizeof(*dw));
}
static void gen7_meta_disabled(struct intel_cmd *cmd)
{
uint32_t *dw;
CMD_ASSERT(cmd, 7, 7.5);
/* 3DSTATE_CONSTANT_HS */
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_CONSTANT_HS) | (7 - 2);
memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
/* 3DSTATE_HS */
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (7 - 2);
memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
/* 3DSTATE_TE */
cmd_batch_pointer(cmd, 4, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (4 - 2);
memset(&dw[1], 0, sizeof(*dw) * (4 - 1));
/* 3DSTATE_CONSTANT_DS */
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_CONSTANT_DS) | (7 - 2);
memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
/* 3DSTATE_DS */
cmd_batch_pointer(cmd, 6, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (6 - 2);
memset(&dw[1], 0, sizeof(*dw) * (6 - 1));
/* 3DSTATE_CONSTANT_GS */
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) | (7 - 2);
memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
/* 3DSTATE_GS */
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (7 - 2);
memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
/* 3DSTATE_STREAMOUT */
cmd_batch_pointer(cmd, 3, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (3 - 2);
memset(&dw[1], 0, sizeof(*dw) * (3 - 1));
/* 3DSTATE_SF */
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (7 - 2);
memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
/* 3DSTATE_SBE */
cmd_batch_pointer(cmd, 14, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (14 - 2);
dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
memset(&dw[2], 0, sizeof(*dw) * (14 - 2));
}
static void gen6_meta_clip(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
uint32_t *dw;
/* 3DSTATE_CLIP */
cmd_batch_pointer(cmd, 4, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (4 - 2);
dw[1] = 0;
if (meta->mode == INTEL_CMD_META_VS_POINTS) {
dw[2] = GEN6_CLIP_DW2_CLIP_ENABLE |
GEN6_CLIP_DW2_CLIPMODE_REJECT_ALL;
} else {
dw[2] = 0;
}
dw[3] = 0;
}
static void gen6_meta_wm(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
uint32_t *dw;
CMD_ASSERT(cmd, 6, 7.5);
cmd_wa_gen6_pre_multisample_depth_flush(cmd);
/* 3DSTATE_MULTISAMPLE */
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
cmd_batch_pointer(cmd, 4, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (4 - 2);
dw[1] =
(meta->sample_count <= 1) ? GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1 :
(meta->sample_count <= 4) ? GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4 :
GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
dw[2] = 0;
dw[3] = 0;
} else {
cmd_batch_pointer(cmd, 3, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (3 - 2);
dw[1] = (meta->sample_count <= 1) ? GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1 :
GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
dw[2] = 0;
}
/* 3DSTATE_SAMPLE_MASK */
cmd_batch_pointer(cmd, 2, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (2 - 2);
dw[1] = (1 << meta->sample_count) - 1;
/* 3DSTATE_DRAWING_RECTANGLE */
cmd_batch_pointer(cmd, 4, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) | (4 - 2);
if (meta->mode == INTEL_CMD_META_VS_POINTS) {
/* unused */
dw[1] = 0;
dw[2] = 0;
} else {
dw[1] = meta->dst.y << 16 | meta->dst.x;
dw[2] = (meta->dst.y + meta->height - 1) << 16 |
(meta->dst.x + meta->width - 1);
}
dw[3] = 0;
}
static uint32_t gen6_meta_ps_constants(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
uint32_t offset_x, offset_y;
/* one GPR */
uint32_t consts[8];
uint32_t const_count;
CMD_ASSERT(cmd, 6, 7.5);
/* underflow is fine here */
offset_x = meta->src.x - meta->dst.x;
offset_y = meta->src.y - meta->dst.y;
switch (meta->shader_id) {
case INTEL_DEV_META_FS_COPY_MEM:
case INTEL_DEV_META_FS_COPY_1D:
case INTEL_DEV_META_FS_COPY_1D_ARRAY:
case INTEL_DEV_META_FS_COPY_2D:
case INTEL_DEV_META_FS_COPY_2D_ARRAY:
case INTEL_DEV_META_FS_COPY_2D_MS:
consts[0] = offset_x;
consts[1] = offset_y;
consts[2] = meta->src.layer;
consts[3] = meta->src.lod;
const_count = 4;
break;
case INTEL_DEV_META_FS_COPY_1D_TO_MEM:
case INTEL_DEV_META_FS_COPY_1D_ARRAY_TO_MEM:
case INTEL_DEV_META_FS_COPY_2D_TO_MEM:
case INTEL_DEV_META_FS_COPY_2D_ARRAY_TO_MEM:
case INTEL_DEV_META_FS_COPY_2D_MS_TO_MEM:
consts[0] = offset_x;
consts[1] = offset_y;
consts[2] = meta->src.layer;
consts[3] = meta->src.lod;
consts[4] = meta->src.x;
consts[5] = meta->width;
const_count = 6;
break;
case INTEL_DEV_META_FS_COPY_MEM_TO_IMG:
consts[0] = offset_x;
consts[1] = offset_y;
consts[2] = meta->width;
const_count = 3;
break;
case INTEL_DEV_META_FS_CLEAR_COLOR:
consts[0] = meta->clear_val[0];
consts[1] = meta->clear_val[1];
consts[2] = meta->clear_val[2];
consts[3] = meta->clear_val[3];
const_count = 4;
break;
case INTEL_DEV_META_FS_CLEAR_DEPTH:
consts[0] = meta->clear_val[0];
consts[1] = meta->clear_val[1];
const_count = 2;
break;
case INTEL_DEV_META_FS_RESOLVE_2X:
case INTEL_DEV_META_FS_RESOLVE_4X:
case INTEL_DEV_META_FS_RESOLVE_8X:
case INTEL_DEV_META_FS_RESOLVE_16X:
consts[0] = offset_x;
consts[1] = offset_y;
const_count = 2;
break;
default:
assert(!"unknown meta shader id");
const_count = 0;
break;
}
/* this can be skipped but it makes state dumping prettier */
memset(&consts[const_count], 0, sizeof(consts[0]) * (8 - const_count));
return cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32, 8, consts);
}
static void gen6_meta_ps(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
const struct intel_pipeline_shader *sh =
intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
uint32_t offset, *dw;
CMD_ASSERT(cmd, 6, 6);
if (meta->mode != INTEL_CMD_META_FS_RECT) {
/* 3DSTATE_CONSTANT_PS */
cmd_batch_pointer(cmd, 5, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (5 - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
/* 3DSTATE_WM */
cmd_batch_pointer(cmd, 9, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (9 - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
switch (meta->ds.op) {
case INTEL_CMD_META_DS_HIZ_CLEAR:
dw[4] = GEN6_WM_DW4_DEPTH_CLEAR;
break;
case INTEL_CMD_META_DS_HIZ_RESOLVE:
dw[4] = GEN6_WM_DW4_HIZ_RESOLVE;
break;
case INTEL_CMD_META_DS_RESOLVE:
dw[4] = GEN6_WM_DW4_DEPTH_RESOLVE;
break;
default:
dw[4] = 0;
break;
}
dw[5] = (sh->max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
dw[6] = 0;
dw[7] = 0;
dw[8] = 0;
return;
}
/* a normal color write */
assert(meta->dst.valid && !sh->uses);
/* 3DSTATE_CONSTANT_PS */
offset = gen6_meta_ps_constants(cmd);
cmd_batch_pointer(cmd, 5, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (5 - 2) |
1 << GEN6_CONSTANT_DW0_BUFFER_ENABLES__SHIFT;
dw[1] = offset;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
/* 3DSTATE_WM */
offset = emit_shader(cmd, sh);
cmd_batch_pointer(cmd, 9, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (9 - 2);
dw[1] = offset;
dw[2] = (sh->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
sh->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
dw[3] = 0; /* scratch */
dw[4] = sh->urb_grf_start << GEN6_WM_DW4_URB_GRF_START0__SHIFT;
dw[5] = (sh->max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT |
GEN6_WM_DW5_PS_DISPATCH_ENABLE |
GEN6_PS_DISPATCH_16 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
dw[6] = sh->in_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
GEN6_WM_DW6_PS_POSOFFSET_NONE |
GEN6_WM_DW6_ZW_INTERP_PIXEL |
sh->barycentric_interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT |
GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
if (meta->sample_count > 1) {
dw[6] |= GEN6_WM_DW6_MSRASTMODE_ON_PATTERN |
GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
} else {
dw[6] |= GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL |
GEN6_WM_DW6_MSDISPMODE_PERSAMPLE;
}
dw[7] = 0;
dw[8] = 0;
assert(!sh->per_thread_scratch_size);
}
static void gen7_meta_ps(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
const struct intel_pipeline_shader *sh =
intel_dev_get_meta_shader(cmd->dev, meta->shader_id);
uint32_t offset, *dw;
CMD_ASSERT(cmd, 7, 7.5);
if (meta->mode != INTEL_CMD_META_FS_RECT) {
/* 3DSTATE_WM */
cmd_batch_pointer(cmd, 3, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (3 - 2);
switch (meta->ds.op) {
case INTEL_CMD_META_DS_HIZ_CLEAR:
dw[1] = GEN7_WM_DW1_DEPTH_CLEAR;
break;
case INTEL_CMD_META_DS_HIZ_RESOLVE:
dw[1] = GEN7_WM_DW1_HIZ_RESOLVE;
break;
case INTEL_CMD_META_DS_RESOLVE:
dw[1] = GEN7_WM_DW1_DEPTH_RESOLVE;
break;
default:
dw[1] = 0;
break;
}
dw[2] = 0;
/* 3DSTATE_CONSTANT_GS */
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (7 - 2);
memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
/* 3DSTATE_PS */
cmd_batch_pointer(cmd, 8, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (8 - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
/* required to avoid hangs */
dw[4] = GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT |
(sh->max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
dw[5] = 0;
dw[6] = 0;
dw[7] = 0;
return;
}
/* a normal color write */
assert(meta->dst.valid && !sh->uses);
/* 3DSTATE_WM */
cmd_batch_pointer(cmd, 3, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (3 - 2);
dw[1] = GEN7_WM_DW1_PS_DISPATCH_ENABLE |
GEN7_WM_DW1_ZW_INTERP_PIXEL |
sh->barycentric_interps << GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT |
GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
dw[2] = 0;
/* 3DSTATE_CONSTANT_PS */
offset = gen6_meta_ps_constants(cmd);
cmd_batch_pointer(cmd, 7, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (7 - 2);
dw[1] = 1 << GEN7_CONSTANT_DW1_BUFFER0_READ_LEN__SHIFT;
dw[2] = 0;
dw[3] = offset | GEN7_MOCS_L3_WB;
dw[4] = 0;
dw[5] = 0;
dw[6] = 0;
/* 3DSTATE_PS */
offset = emit_shader(cmd, sh);
cmd_batch_pointer(cmd, 8, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (8 - 2);
dw[1] = offset;
dw[2] = (sh->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
sh->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
dw[3] = 0; /* scratch */
dw[4] = GEN7_PS_DW4_PUSH_CONSTANT_ENABLE |
GEN7_PS_DW4_POSOFFSET_NONE |
GEN6_PS_DISPATCH_16 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
if (cmd_gen(cmd) >= INTEL_GEN(7.5)) {
dw[4] |= (sh->max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
dw[4] |= ((1 << meta->sample_count) - 1) <<
GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
} else {
dw[4] |= (sh->max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
}
dw[5] = sh->urb_grf_start << GEN7_PS_DW5_URB_GRF_START0__SHIFT;
dw[6] = 0;
dw[7] = 0;
assert(!sh->per_thread_scratch_size);
}
static void gen6_meta_depth_buffer(struct intel_cmd *cmd)
{
const struct intel_cmd_meta *meta = cmd->bind.meta;
const struct intel_att_view *view = &meta->ds.view;
CMD_ASSERT(cmd, 6, 7.5);
if (!view) {
/* all zeros */
static const struct intel_att_view null_view;
view = &null_view;
}
cmd_wa_gen6_pre_ds_flush(cmd);
gen6_3DSTATE_DEPTH_BUFFER(cmd, view, meta->ds.optimal);
gen6_3DSTATE_STENCIL_BUFFER(cmd, view, meta->ds.optimal);
gen6_3DSTATE_HIER_DEPTH_BUFFER(cmd, view, meta->ds.optimal);
if (cmd_gen(cmd) >= INTEL_GEN(7))
gen7_3DSTATE_CLEAR_PARAMS(cmd, 0);
else
gen6_3DSTATE_CLEAR_PARAMS(cmd, 0);
}
static bool cmd_alloc_dset_data(struct intel_cmd *cmd,
struct intel_cmd_dset_data *data,
const struct intel_pipeline_layout *pipeline_layout)
{
if (data->set_offset_count < pipeline_layout->layout_count) {
if (data->set_offsets)
intel_free(cmd, data->set_offsets);
data->set_offsets = intel_alloc(cmd,
sizeof(data->set_offsets[0]) * pipeline_layout->layout_count,
sizeof(data->set_offsets[0]), VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!data->set_offsets) {
cmd_fail(cmd, VK_ERROR_OUT_OF_HOST_MEMORY);
data->set_offset_count = 0;
return false;
}
data->set_offset_count = pipeline_layout->layout_count;
}
if (data->dynamic_offset_count < pipeline_layout->total_dynamic_desc_count) {
if (data->dynamic_offsets)
intel_free(cmd, data->dynamic_offsets);
data->dynamic_offsets = intel_alloc(cmd,
sizeof(data->dynamic_offsets[0]) * pipeline_layout->total_dynamic_desc_count,
sizeof(data->dynamic_offsets[0]), VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!data->dynamic_offsets) {
cmd_fail(cmd, VK_ERROR_OUT_OF_HOST_MEMORY);
data->dynamic_offset_count = 0;
return false;
}
data->dynamic_offset_count = pipeline_layout->total_dynamic_desc_count;
}
return true;
}
static void cmd_bind_dynamic_state(struct intel_cmd *cmd,
const struct intel_pipeline *pipeline)
{
VkFlags use_flags = pipeline->state.use_pipeline_dynamic_state;
if (!use_flags) {
return;
}
cmd->bind.state.use_pipeline_dynamic_state = use_flags;
if (use_flags & INTEL_USE_PIPELINE_DYNAMIC_VIEWPORT) {
const struct intel_dynamic_viewport *viewport = &pipeline->state.viewport;
intel_set_viewport(cmd, viewport->first_viewport, viewport->viewport_count, viewport->viewports);
}
if (use_flags & INTEL_USE_PIPELINE_DYNAMIC_SCISSOR) {
const struct intel_dynamic_viewport *viewport = &pipeline->state.viewport;
intel_set_scissor(cmd, viewport->first_scissor, viewport->scissor_count, viewport->scissors);
}
if (use_flags & INTEL_USE_PIPELINE_DYNAMIC_LINE_WIDTH) {
intel_set_line_width(cmd, pipeline->state.line_width.line_width);
}
if (use_flags & INTEL_USE_PIPELINE_DYNAMIC_DEPTH_BIAS) {
const struct intel_dynamic_depth_bias *s = &pipeline->state.depth_bias;
intel_set_depth_bias(cmd, s->depth_bias, s->depth_bias_clamp, s->slope_scaled_depth_bias);
}
if (use_flags & INTEL_USE_PIPELINE_DYNAMIC_BLEND_CONSTANTS) {
const struct intel_dynamic_blend *s = &pipeline->state.blend;
intel_set_blend_constants(cmd, s->blend_const);
}
if (use_flags & INTEL_USE_PIPELINE_DYNAMIC_DEPTH_BOUNDS) {
const struct intel_dynamic_depth_bounds *s = &pipeline->state.depth_bounds;
intel_set_depth_bounds(cmd, s->min_depth_bounds, s->max_depth_bounds);
}
if (use_flags & INTEL_USE_PIPELINE_DYNAMIC_STENCIL_COMPARE_MASK) {
const struct intel_dynamic_stencil *s = &pipeline->state.stencil;
intel_set_stencil_compare_mask(cmd, VK_STENCIL_FACE_FRONT_BIT, s->front.stencil_compare_mask);
intel_set_stencil_compare_mask(cmd, VK_STENCIL_FACE_BACK_BIT, s->back.stencil_compare_mask);
}
if (use_flags & INTEL_USE_PIPELINE_DYNAMIC_STENCIL_WRITE_MASK) {
const struct intel_dynamic_stencil *s = &pipeline->state.stencil;
intel_set_stencil_write_mask(cmd, VK_STENCIL_FACE_FRONT_BIT, s->front.stencil_write_mask);
intel_set_stencil_write_mask(cmd, VK_STENCIL_FACE_BACK_BIT, s->back.stencil_write_mask);
}
if (use_flags & INTEL_USE_PIPELINE_DYNAMIC_STENCIL_REFERENCE) {
const struct intel_dynamic_stencil *s = &pipeline->state.stencil;
intel_set_stencil_reference(cmd, VK_STENCIL_FACE_FRONT_BIT, s->front.stencil_reference);
intel_set_stencil_reference(cmd, VK_STENCIL_FACE_BACK_BIT, s->back.stencil_reference);
}
}
static void cmd_bind_graphics_pipeline(struct intel_cmd *cmd,
const struct intel_pipeline *pipeline)
{
cmd->bind.pipeline.graphics = pipeline;
cmd_bind_dynamic_state(cmd, pipeline);
cmd_alloc_dset_data(cmd, &cmd->bind.dset.graphics_data,
pipeline->pipeline_layout);
}
static void cmd_bind_compute_pipeline(struct intel_cmd *cmd,
const struct intel_pipeline *pipeline)
{
cmd->bind.pipeline.compute = pipeline;
cmd_alloc_dset_data(cmd, &cmd->bind.dset.compute_data,
pipeline->pipeline_layout);
}
static void cmd_copy_dset_data(struct intel_cmd *cmd,
struct intel_cmd_dset_data *data,
const struct intel_pipeline_layout *pipeline_layout,
uint32_t index,
const struct intel_desc_set *set,
const uint32_t *dynamic_offsets)
{
const struct intel_desc_layout *layout = pipeline_layout->layouts[index];
assert(index < data->set_offset_count);
data->set_offsets[index] = set->region_begin;
if (layout->dynamic_desc_count) {
assert(pipeline_layout->dynamic_desc_indices[index] +
layout->dynamic_desc_count - 1 < data->dynamic_offset_count);
memcpy(&data->dynamic_offsets[pipeline_layout->dynamic_desc_indices[index]],
dynamic_offsets,
sizeof(dynamic_offsets[0]) * layout->dynamic_desc_count);
}
}
static void cmd_bind_vertex_data(struct intel_cmd *cmd,
const struct intel_buf *buf,
VkDeviceSize offset, uint32_t binding)
{
/* TODOVV: verify */
assert(!(binding >= ARRAY_SIZE(cmd->bind.vertex.buf)) && "binding exceeds buf size");
cmd->bind.vertex.buf[binding] = buf;
cmd->bind.vertex.offset[binding] = offset;
}
static void cmd_bind_index_data(struct intel_cmd *cmd,
const struct intel_buf *buf,
VkDeviceSize offset, VkIndexType type)
{
cmd->bind.index.buf = buf;
cmd->bind.index.offset = offset;
cmd->bind.index.type = type;
}
static uint32_t cmd_get_max_surface_write(const struct intel_cmd *cmd)
{
const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics;
struct intel_pipeline_rmap *rmaps[5] = {
pipeline->vs.rmap,
pipeline->tcs.rmap,
pipeline->tes.rmap,
pipeline->gs.rmap,
pipeline->fs.rmap,
};
uint32_t max_write;
int i;
STATIC_ASSERT(GEN6_ALIGNMENT_SURFACE_STATE >= GEN6_SURFACE_STATE__SIZE);
STATIC_ASSERT(GEN6_ALIGNMENT_SURFACE_STATE >=
GEN6_ALIGNMENT_BINDING_TABLE_STATE);
/* pad first */
max_write = GEN6_ALIGNMENT_SURFACE_STATE;
for (i = 0; i < ARRAY_SIZE(rmaps); i++) {
const struct intel_pipeline_rmap *rmap = rmaps[i];
const uint32_t surface_count = (rmap) ?
rmap->rt_count + rmap->texture_resource_count +
rmap->resource_count + rmap->uav_count : 0;
if (surface_count) {
/* SURFACE_STATEs */
max_write += GEN6_ALIGNMENT_SURFACE_STATE * surface_count;
/* BINDING_TABLE_STATE */
max_write += u_align(sizeof(uint32_t) * surface_count,
GEN6_ALIGNMENT_SURFACE_STATE);
}
}
return max_write;
}
static void cmd_adjust_state_base_address(struct intel_cmd *cmd)
{
struct intel_cmd_writer *writer = &cmd->writers[INTEL_CMD_WRITER_SURFACE];
const uint32_t cur_surface_offset = writer->used - writer->sba_offset;
uint32_t max_surface_write;
/* enough for src and dst SURFACE_STATEs plus BINDING_TABLE_STATE */
if (cmd->bind.meta)
max_surface_write = 64 * sizeof(uint32_t);
else
max_surface_write = cmd_get_max_surface_write(cmd);
/* there is a 64KB limit on BINDING_TABLE_STATEs */
if (cur_surface_offset + max_surface_write > 64 * 1024) {
/* SBA expects page-aligned addresses */
writer->sba_offset = writer->used & ~0xfff;
assert((writer->used & 0xfff) + max_surface_write <= 64 * 1024);
cmd_batch_state_base_address(cmd);
}
}
static void cmd_draw(struct intel_cmd *cmd,
uint32_t vertex_start,
uint32_t vertex_count,
uint32_t instance_start,
uint32_t instance_count,
bool indexed,
uint32_t vertex_base)
{
const struct intel_pipeline *p = cmd->bind.pipeline.graphics;
const uint32_t surface_writer_used U_ASSERT_ONLY =
cmd->writers[INTEL_CMD_WRITER_SURFACE].used;
cmd_adjust_state_base_address(cmd);
emit_bounded_states(cmd);
/* sanity check on cmd_get_max_surface_write() */
assert(cmd->writers[INTEL_CMD_WRITER_SURFACE].used -
surface_writer_used <= cmd_get_max_surface_write(cmd));
if (indexed) {
assert(!(p->primitive_restart && !gen6_can_primitive_restart(cmd)) && "Primitive restart unsupported on this device");
if (cmd_gen(cmd) >= INTEL_GEN(7.5)) {
gen75_3DSTATE_VF(cmd, p->primitive_restart,
p->primitive_restart_index);
gen6_3DSTATE_INDEX_BUFFER(cmd, cmd->bind.index.buf,
cmd->bind.index.offset, cmd->bind.index.type,
false);
} else {
gen6_3DSTATE_INDEX_BUFFER(cmd, cmd->bind.index.buf,
cmd->bind.index.offset, cmd->bind.index.type,
p->primitive_restart);
}
} else {
assert(!vertex_base);
}
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
gen7_3DPRIMITIVE(cmd, p->prim_type, indexed, vertex_count,
vertex_start, instance_count, instance_start, vertex_base);
} else {
gen6_3DPRIMITIVE(cmd, p->prim_type, indexed, vertex_count,
vertex_start, instance_count, instance_start, vertex_base);
}
cmd->bind.draw_count++;
cmd->bind.render_pass_changed = false;
/* need to re-emit all workarounds */
cmd->bind.wa_flags = 0;
if (intel_debug & INTEL_DEBUG_NOCACHE)
cmd_batch_flush_all(cmd);
}
void cmd_draw_meta(struct intel_cmd *cmd, const struct intel_cmd_meta *meta)
{
cmd->bind.meta = meta;
cmd_adjust_state_base_address(cmd);
cmd_wa_gen6_pre_depth_stall_write(cmd);
cmd_wa_gen6_pre_command_scoreboard_stall(cmd);
gen6_meta_dynamic_states(cmd);
gen6_meta_surface_states(cmd);
if (cmd_gen(cmd) >= INTEL_GEN(7)) {
gen7_meta_urb(cmd);
gen6_meta_vf(cmd);
gen6_meta_vs(cmd);
gen7_meta_disabled(cmd);
gen6_meta_clip(cmd);
gen6_meta_wm(cmd);
gen7_meta_ps(cmd);
gen6_meta_depth_buffer(cmd);
cmd_wa_gen7_post_command_cs_stall(cmd);
cmd_wa_gen7_post_command_depth_stall(cmd);
if (meta->mode == INTEL_CMD_META_VS_POINTS) {
gen7_3DPRIMITIVE(cmd, GEN6_3DPRIM_POINTLIST, false,
meta->width * meta->height, 0, 1, 0, 0);
} else {
gen7_3DPRIMITIVE(cmd, GEN6_3DPRIM_RECTLIST, false, 3, 0, 1, 0, 0);
}
} else {
gen6_meta_urb(cmd);
gen6_meta_vf(cmd);
gen6_meta_vs(cmd);
gen6_meta_disabled(cmd);
gen6_meta_clip(cmd);
gen6_meta_wm(cmd);
gen6_meta_ps(cmd);
gen6_meta_depth_buffer(cmd);
if (meta->mode == INTEL_CMD_META_VS_POINTS) {
gen6_3DPRIMITIVE(cmd, GEN6_3DPRIM_POINTLIST, false,
meta->width * meta->height, 0, 1, 0, 0);
} else {
gen6_3DPRIMITIVE(cmd, GEN6_3DPRIM_RECTLIST, false, 3, 0, 1, 0, 0);
}
}
cmd->bind.draw_count++;
/* need to re-emit all workarounds */
cmd->bind.wa_flags = 0;
cmd->bind.meta = NULL;
/* make the normal path believe the render pass has changed */
cmd->bind.render_pass_changed = true;
if (intel_debug & INTEL_DEBUG_NOCACHE)
cmd_batch_flush_all(cmd);
}
static void cmd_exec(struct intel_cmd *cmd, struct intel_bo *bo)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
uint32_t pos;
assert(!(cmd_gen(cmd) < INTEL_GEN(7.5)) && "Invalid GPU version");
pos = cmd_batch_pointer(cmd, cmd_len, &dw);
dw[0] = GEN6_MI_CMD(MI_BATCH_BUFFER_START) | (cmd_len - 2) |
GEN75_MI_BATCH_BUFFER_START_DW0_SECOND_LEVEL |
GEN75_MI_BATCH_BUFFER_START_DW0_NON_PRIVILEGED |
GEN6_MI_BATCH_BUFFER_START_DW0_USE_PPGTT;
cmd_batch_reloc(cmd, pos + 1, bo, 0, 0);
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdBindPipeline(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipeline pipeline)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_COMPUTE:
cmd_bind_compute_pipeline(cmd, intel_pipeline(pipeline));
break;
case VK_PIPELINE_BIND_POINT_GRAPHICS:
cmd_bind_graphics_pipeline(cmd, intel_pipeline(pipeline));
break;
default:
assert(!"unsupported pipelineBindPoint");
break;
}
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorSets(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout layout,
uint32_t firstSet,
uint32_t descriptorSetCount,
const VkDescriptorSet* pDescriptorSets,
uint32_t dynamicOffsetCount,
const uint32_t* pDynamicOffsets)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
const struct intel_pipeline_layout *pipeline_layout;
struct intel_cmd_dset_data *data = NULL;
uint32_t offset_count = 0;
uint32_t i;
pipeline_layout = intel_pipeline_layout(layout);
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_COMPUTE:
data = &cmd->bind.dset.compute_data;
break;
case VK_PIPELINE_BIND_POINT_GRAPHICS:
data = &cmd->bind.dset.graphics_data;
break;
default:
assert(!"unsupported pipelineBindPoint");
break;
}
cmd_alloc_dset_data(cmd, data, pipeline_layout);
for (i = 0; i < descriptorSetCount; i++) {
struct intel_desc_set *dset = intel_desc_set(pDescriptorSets[i]);
offset_count += pipeline_layout->layouts[firstSet + i]->dynamic_desc_count;
if (offset_count <= dynamicOffsetCount) {
cmd_copy_dset_data(cmd, data, pipeline_layout, firstSet + i,
dset, pDynamicOffsets);
pDynamicOffsets += pipeline_layout->layouts[firstSet + i]->dynamic_desc_count;
}
}
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdBindVertexBuffers(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
for (uint32_t i = 0; i < bindingCount; i++) {
struct intel_buf *buf = intel_buf(pBuffers[i]);
cmd_bind_vertex_data(cmd, buf, pOffsets[i], firstBinding + i);
}
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdBindIndexBuffer(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
VkIndexType indexType)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
struct intel_buf *buf = intel_buf(buffer);
cmd_bind_index_data(cmd, buf, offset, indexType);
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdDraw(
VkCommandBuffer commandBuffer,
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
cmd_draw(cmd, firstVertex, vertexCount,
firstInstance, instanceCount, false, 0);
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexed(
VkCommandBuffer commandBuffer,
uint32_t indexCount,
uint32_t instanceCount,
uint32_t firstIndex,
int32_t vertexOffset,
uint32_t firstInstance)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
cmd_draw(cmd, firstIndex, indexCount,
firstInstance, instanceCount, true, vertexOffset);
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirect(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
assert(0 && "vkCmdDrawIndirect not implemented");
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirect(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
assert(0 && "vkCmdDrawIndexedIndirect not implemented");
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdDispatch(
VkCommandBuffer commandBuffer,
uint32_t x,
uint32_t y,
uint32_t z)
{
assert(0 && "vkCmdDispatch not implemented");
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdDispatchIndirect(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset)
{
assert(0 && "vkCmdDisatchIndirect not implemented");
}
VKAPI_ATTR void VKAPI_CALL vkCmdPushConstants(
VkCommandBuffer commandBuffer,
VkPipelineLayout layout,
VkShaderStageFlags stageFlags,
uint32_t offset,
uint32_t size,
const void* pValues)
{
/* TODO: Implement */
}
VKAPI_ATTR void VKAPI_CALL vkGetRenderAreaGranularity(
VkDevice device,
VkRenderPass renderPass,
VkExtent2D* pGranularity)
{
pGranularity->height = 1;
pGranularity->width = 1;
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderPass(
VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo* pRenderPassBegin,
VkSubpassContents contents)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
const struct intel_render_pass *rp =
intel_render_pass(pRenderPassBegin->renderPass);
const struct intel_fb *fb = intel_fb(pRenderPassBegin->framebuffer);
const struct intel_att_view *view;
uint32_t i;
/* TODOVV: */
assert(!(!cmd->primary || rp->attachment_count != fb->view_count) && "Invalid RenderPass");
cmd_begin_render_pass(cmd, rp, fb, 0, contents);
for (i = 0; i < rp->attachment_count; i++) {
const struct intel_render_pass_attachment *att = &rp->attachments[i];
const VkClearValue *clear_val =
&pRenderPassBegin->pClearValues[i];
VkImageSubresourceRange range;
view = fb->views[i];
range.baseMipLevel = view->mipLevel;
range.levelCount = 1;
range.baseArrayLayer = view->baseArrayLayer;
range.layerCount = view->array_size;
range.aspectMask = 0;
if (view->is_rt) {
/* color */
if (att->clear_on_load) {
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
cmd_meta_clear_color_image(commandBuffer, view->img,
att->initial_layout, &clear_val->color, 1, &range);
}
} else {
/* depth/stencil */
if (att->clear_on_load) {
range.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
}
if (att->stencil_clear_on_load) {
range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
if (range.aspectMask) {
cmd_meta_clear_depth_stencil_image(commandBuffer,
view->img, att->initial_layout,
clear_val->depthStencil.depth, clear_val->depthStencil.stencil,
1, &range);
}
}
}
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdNextSubpass(
VkCommandBuffer commandBuffer,
VkSubpassContents contents)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
const struct intel_render_pass U_ASSERT_ONLY *rp = cmd->bind.render_pass;
/* TODOVV */
assert(!(cmd->bind.render_pass_subpass >= rp->subpasses +
rp->subpass_count - 1) && "Invalid RenderPassContents");
cmd->bind.render_pass_changed = true;
cmd->bind.render_pass_subpass++;
cmd->bind.render_pass_contents = contents;
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderPass(
VkCommandBuffer commandBuffer)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
cmd_end_render_pass(cmd);
}
ICD_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands(
VkCommandBuffer commandBuffer,
uint32_t commandBuffersCount,
const VkCommandBuffer* pCommandBuffers)
{
struct intel_cmd *cmd = intel_cmd(commandBuffer);
uint32_t i;
/* TODOVV */
assert(!(!cmd->bind.render_pass || cmd->bind.render_pass_contents !=
VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS) && "Invalid RenderPass");
for (i = 0; i < commandBuffersCount; i++) {
const struct intel_cmd *secondary = intel_cmd(pCommandBuffers[i]);
/* TODOVV: Move test to validation layer */
assert(!(secondary->primary) && "Cannot be primary command buffer");
cmd_exec(cmd, intel_cmd_get_batch(secondary, NULL));
}
if (i)
cmd_batch_state_base_address(cmd);
}