blob: f1bde067b05b2e0fd59e0e5060756e70bf8ef882 [file] [log] [blame]
/*
* XGL
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Courtney Goeltzenleuchter <courtney@lunarg.com>
* Chia-I Wu <olv@lunarg.com>
*/
#include "genhw/genhw.h"
#include "compiler/pipeline/pipeline_compiler_interface.h"
#include "cmd.h"
#include "format.h"
#include "shader.h"
#include "pipeline.h"
static int translate_blend_func(XGL_BLEND_FUNC func)
{
switch (func) {
case XGL_BLEND_FUNC_ADD: return GEN6_BLENDFUNCTION_ADD;
case XGL_BLEND_FUNC_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT;
case XGL_BLEND_FUNC_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
case XGL_BLEND_FUNC_MIN: return GEN6_BLENDFUNCTION_MIN;
case XGL_BLEND_FUNC_MAX: return GEN6_BLENDFUNCTION_MAX;
default:
assert(!"unknown blend func");
return GEN6_BLENDFUNCTION_ADD;
};
}
static int translate_blend(XGL_BLEND blend)
{
switch (blend) {
case XGL_BLEND_ZERO: return GEN6_BLENDFACTOR_ZERO;
case XGL_BLEND_ONE: return GEN6_BLENDFACTOR_ONE;
case XGL_BLEND_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR;
case XGL_BLEND_ONE_MINUS_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR;
case XGL_BLEND_DEST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR;
case XGL_BLEND_ONE_MINUS_DEST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR;
case XGL_BLEND_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA;
case XGL_BLEND_ONE_MINUS_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
case XGL_BLEND_DEST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA;
case XGL_BLEND_ONE_MINUS_DEST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA;
case XGL_BLEND_CONSTANT_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR;
case XGL_BLEND_ONE_MINUS_CONSTANT_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR;
case XGL_BLEND_CONSTANT_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA;
case XGL_BLEND_ONE_MINUS_CONSTANT_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
case XGL_BLEND_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
case XGL_BLEND_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR;
case XGL_BLEND_ONE_MINUS_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
case XGL_BLEND_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA;
case XGL_BLEND_ONE_MINUS_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
default:
assert(!"unknown blend factor");
return GEN6_BLENDFACTOR_ONE;
};
}
static int translate_compare_func(XGL_COMPARE_FUNC func)
{
switch (func) {
case XGL_COMPARE_NEVER: return GEN6_COMPAREFUNCTION_NEVER;
case XGL_COMPARE_LESS: return GEN6_COMPAREFUNCTION_LESS;
case XGL_COMPARE_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
case XGL_COMPARE_LESS_EQUAL: return GEN6_COMPAREFUNCTION_LEQUAL;
case XGL_COMPARE_GREATER: return GEN6_COMPAREFUNCTION_GREATER;
case XGL_COMPARE_NOT_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
case XGL_COMPARE_GREATER_EQUAL: return GEN6_COMPAREFUNCTION_GEQUAL;
case XGL_COMPARE_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS;
default:
assert(!"unknown compare_func");
return GEN6_COMPAREFUNCTION_NEVER;
}
}
static int translate_stencil_op(XGL_STENCIL_OP op)
{
switch (op) {
case XGL_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP;
case XGL_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO;
case XGL_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE;
case XGL_STENCIL_OP_INC_CLAMP: return GEN6_STENCILOP_INCRSAT;
case XGL_STENCIL_OP_DEC_CLAMP: return GEN6_STENCILOP_DECRSAT;
case XGL_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT;
case XGL_STENCIL_OP_INC_WRAP: return GEN6_STENCILOP_INCR;
case XGL_STENCIL_OP_DEC_WRAP: return GEN6_STENCILOP_DECR;
default:
assert(!"unknown stencil op");
return GEN6_STENCILOP_KEEP;
}
}
struct intel_pipeline_create_info {
XGL_GRAPHICS_PIPELINE_CREATE_INFO graphics;
XGL_PIPELINE_VERTEX_INPUT_CREATE_INFO vi;
XGL_PIPELINE_IA_STATE_CREATE_INFO ia;
XGL_PIPELINE_DS_STATE_CREATE_INFO db;
XGL_PIPELINE_CB_STATE_CREATE_INFO cb;
XGL_PIPELINE_RS_STATE_CREATE_INFO rs;
XGL_PIPELINE_TESS_STATE_CREATE_INFO tess;
XGL_PIPELINE_MS_STATE_CREATE_INFO ms;
XGL_PIPELINE_VP_STATE_CREATE_INFO vp;
XGL_PIPELINE_SHADER vs;
XGL_PIPELINE_SHADER tcs;
XGL_PIPELINE_SHADER tes;
XGL_PIPELINE_SHADER gs;
XGL_PIPELINE_SHADER fs;
XGL_COMPUTE_PIPELINE_CREATE_INFO compute;
};
/* in S1.3 */
struct intel_pipeline_sample_position {
int8_t x, y;
};
static uint8_t pack_sample_position(const struct intel_dev *dev,
const struct intel_pipeline_sample_position *pos)
{
return (pos->x + 8) << 4 | (pos->y + 8);
}
void intel_pipeline_init_default_sample_patterns(const struct intel_dev *dev,
uint8_t *pat_1x, uint8_t *pat_2x,
uint8_t *pat_4x, uint8_t *pat_8x,
uint8_t *pat_16x)
{
static const struct intel_pipeline_sample_position default_1x[1] = {
{ 0, 0 },
};
static const struct intel_pipeline_sample_position default_2x[2] = {
{ -4, -4 },
{ 4, 4 },
};
static const struct intel_pipeline_sample_position default_4x[4] = {
{ -2, -6 },
{ 6, -2 },
{ -6, 2 },
{ 2, 6 },
};
static const struct intel_pipeline_sample_position default_8x[8] = {
{ -1, 1 },
{ 1, 5 },
{ 3, -5 },
{ 5, 3 },
{ -7, -1 },
{ -3, -7 },
{ 7, -3 },
{ -5, 7 },
};
static const struct intel_pipeline_sample_position default_16x[16] = {
{ 0, 2 },
{ 3, 0 },
{ -3, -2 },
{ -2, -4 },
{ 4, 3 },
{ 5, 1 },
{ 6, -1 },
{ 2, -6 },
{ -4, 5 },
{ -5, -5 },
{ -1, -7 },
{ 7, -3 },
{ -7, 4 },
{ 1, -8 },
{ -6, 6 },
{ -8, 7 },
};
int i;
pat_1x[0] = pack_sample_position(dev, default_1x);
for (i = 0; i < 2; i++)
pat_2x[i] = pack_sample_position(dev, &default_2x[i]);
for (i = 0; i < 4; i++)
pat_4x[i] = pack_sample_position(dev, &default_4x[i]);
for (i = 0; i < 8; i++)
pat_8x[i] = pack_sample_position(dev, &default_8x[i]);
for (i = 0; i < 16; i++)
pat_16x[i] = pack_sample_position(dev, &default_16x[i]);
}
struct intel_pipeline_shader *intel_pipeline_shader_create_meta(struct intel_dev *dev,
enum intel_dev_meta_shader id)
{
struct intel_pipeline_shader *sh;
XGL_RESULT ret;
sh = intel_alloc(dev, sizeof(*sh), 0, XGL_SYSTEM_ALLOC_INTERNAL);
if (!sh)
return NULL;
memset(sh, 0, sizeof(*sh));
ret = intel_pipeline_shader_compile_meta(sh, dev->gpu, id);
if (ret != XGL_SUCCESS) {
intel_free(dev, sh);
return NULL;
}
switch (id) {
case INTEL_DEV_META_VS_FILL_MEM:
case INTEL_DEV_META_VS_COPY_MEM:
case INTEL_DEV_META_VS_COPY_MEM_UNALIGNED:
sh->max_threads = intel_gpu_get_max_threads(dev->gpu,
XGL_SHADER_STAGE_VERTEX);
break;
default:
sh->max_threads = intel_gpu_get_max_threads(dev->gpu,
XGL_SHADER_STAGE_FRAGMENT);
break;
}
return sh;
}
void intel_pipeline_shader_destroy(struct intel_dev *dev,
struct intel_pipeline_shader *sh)
{
intel_pipeline_shader_cleanup(sh, dev->gpu);
intel_free(dev, sh);
}
static XGL_RESULT pipeline_build_shader(struct intel_pipeline *pipeline,
const struct intel_desc_layout_chain *chain,
const XGL_PIPELINE_SHADER *sh_info,
struct intel_pipeline_shader *sh)
{
XGL_RESULT ret;
ret = intel_pipeline_shader_compile(sh,
pipeline->dev->gpu, chain, sh_info);
if (ret != XGL_SUCCESS)
return ret;
sh->max_threads =
intel_gpu_get_max_threads(pipeline->dev->gpu, sh_info->stage);
/* 1KB aligned */
sh->scratch_offset = u_align(pipeline->scratch_size, 1024);
pipeline->scratch_size = sh->scratch_offset +
sh->per_thread_scratch_size * sh->max_threads;
pipeline->active_shaders |= 1 << sh_info->stage;
return XGL_SUCCESS;
}
static XGL_RESULT pipeline_build_shaders(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
const struct intel_desc_layout_chain *chain =
intel_desc_layout_chain(info->graphics.pSetLayoutChain);
XGL_RESULT ret = XGL_SUCCESS;
if (ret == XGL_SUCCESS && info->vs.shader) {
ret = pipeline_build_shader(pipeline, chain,
&info->vs, &pipeline->vs);
}
if (ret == XGL_SUCCESS && info->tcs.shader) {
ret = pipeline_build_shader(pipeline, chain,
&info->tcs,&pipeline->tcs);
}
if (ret == XGL_SUCCESS && info->tes.shader) {
ret = pipeline_build_shader(pipeline, chain,
&info->tes,&pipeline->tes);
}
if (ret == XGL_SUCCESS && info->gs.shader) {
ret = pipeline_build_shader(pipeline, chain,
&info->gs, &pipeline->gs);
}
if (ret == XGL_SUCCESS && info->fs.shader) {
ret = pipeline_build_shader(pipeline, chain,
&info->fs, &pipeline->fs);
}
if (ret == XGL_SUCCESS && info->compute.cs.shader) {
chain = intel_desc_layout_chain(info->compute.setLayoutChain);
ret = pipeline_build_shader(pipeline, chain,
&info->compute.cs, &pipeline->cs);
}
return ret;
}
static uint32_t *pipeline_cmd_ptr(struct intel_pipeline *pipeline, int cmd_len)
{
uint32_t *ptr;
assert(pipeline->cmd_len + cmd_len < INTEL_PSO_CMD_ENTRIES);
ptr = &pipeline->cmds[pipeline->cmd_len];
pipeline->cmd_len += cmd_len;
return ptr;
}
static XGL_RESULT pipeline_build_ia(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info* info)
{
pipeline->topology = info->ia.topology;
pipeline->disable_vs_cache = info->ia.disableVertexReuse;
switch (info->ia.topology) {
case XGL_TOPOLOGY_POINT_LIST:
pipeline->prim_type = GEN6_3DPRIM_POINTLIST;
break;
case XGL_TOPOLOGY_LINE_LIST:
pipeline->prim_type = GEN6_3DPRIM_LINELIST;
break;
case XGL_TOPOLOGY_LINE_STRIP:
pipeline->prim_type = GEN6_3DPRIM_LINESTRIP;
break;
case XGL_TOPOLOGY_TRIANGLE_LIST:
pipeline->prim_type = GEN6_3DPRIM_TRILIST;
break;
case XGL_TOPOLOGY_TRIANGLE_STRIP:
pipeline->prim_type = GEN6_3DPRIM_TRISTRIP;
break;
case XGL_TOPOLOGY_TRIANGLE_FAN:
pipeline->prim_type = GEN6_3DPRIM_TRIFAN;
break;
case XGL_TOPOLOGY_LINE_LIST_ADJ:
pipeline->prim_type = GEN6_3DPRIM_LINELIST_ADJ;
break;
case XGL_TOPOLOGY_LINE_STRIP_ADJ:
pipeline->prim_type = GEN6_3DPRIM_LINESTRIP_ADJ;
break;
case XGL_TOPOLOGY_TRIANGLE_LIST_ADJ:
pipeline->prim_type = GEN6_3DPRIM_TRILIST_ADJ;
break;
case XGL_TOPOLOGY_TRIANGLE_STRIP_ADJ:
pipeline->prim_type = GEN6_3DPRIM_TRISTRIP_ADJ;
break;
case XGL_TOPOLOGY_PATCH:
if (!info->tess.patchControlPoints ||
info->tess.patchControlPoints > 32)
return XGL_ERROR_BAD_PIPELINE_DATA;
pipeline->prim_type = GEN7_3DPRIM_PATCHLIST_1 +
info->tess.patchControlPoints - 1;
break;
default:
return XGL_ERROR_BAD_PIPELINE_DATA;
}
if (info->ia.primitiveRestartEnable) {
pipeline->primitive_restart = true;
pipeline->primitive_restart_index = info->ia.primitiveRestartIndex;
} else {
pipeline->primitive_restart = false;
}
return XGL_SUCCESS;
}
static XGL_RESULT pipeline_build_rs_state(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info* info)
{
const XGL_PIPELINE_RS_STATE_CREATE_INFO *rs_state = &info->rs;
bool ccw;
pipeline->depthClipEnable = rs_state->depthClipEnable;
pipeline->rasterizerDiscardEnable = rs_state->rasterizerDiscardEnable;
pipeline->use_rs_point_size = !rs_state->programPointSize;
if (rs_state->provokingVertex == XGL_PROVOKING_VERTEX_FIRST) {
pipeline->provoking_vertex_tri = 0;
pipeline->provoking_vertex_trifan = 1;
pipeline->provoking_vertex_line = 0;
} else {
pipeline->provoking_vertex_tri = 2;
pipeline->provoking_vertex_trifan = 2;
pipeline->provoking_vertex_line = 1;
}
switch (rs_state->fillMode) {
case XGL_FILL_POINTS:
pipeline->cmd_sf_fill |= GEN7_SF_DW1_FRONTFACE_POINT |
GEN7_SF_DW1_BACKFACE_POINT;
break;
case XGL_FILL_WIREFRAME:
pipeline->cmd_sf_fill |= GEN7_SF_DW1_FRONTFACE_WIREFRAME |
GEN7_SF_DW1_BACKFACE_WIREFRAME;
break;
case XGL_FILL_SOLID:
default:
pipeline->cmd_sf_fill |= GEN7_SF_DW1_FRONTFACE_SOLID |
GEN7_SF_DW1_BACKFACE_SOLID;
break;
}
ccw = (rs_state->frontFace == XGL_FRONT_FACE_CCW);
/* flip the winding order */
if (info->vp.clipOrigin == XGL_COORDINATE_ORIGIN_LOWER_LEFT)
ccw = !ccw;
if (ccw) {
pipeline->cmd_sf_fill |= GEN7_SF_DW1_FRONTWINDING_CCW;
pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
}
switch (rs_state->cullMode) {
case XGL_CULL_NONE:
default:
pipeline->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_NONE;
pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_NONE;
break;
case XGL_CULL_FRONT:
pipeline->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_FRONT;
pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_FRONT;
break;
case XGL_CULL_BACK:
pipeline->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_BACK;
pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_BACK;
break;
case XGL_CULL_FRONT_AND_BACK:
pipeline->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_BOTH;
pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_BOTH;
break;
}
/* only GEN7+ needs cull mode in 3DSTATE_CLIP */
if (intel_gpu_gen(pipeline->dev->gpu) == INTEL_GEN(6))
pipeline->cmd_clip_cull = 0;
return XGL_SUCCESS;
}
static void pipeline_destroy(struct intel_obj *obj)
{
struct intel_pipeline *pipeline = intel_pipeline_from_obj(obj);
if (pipeline->active_shaders & SHADER_VERTEX_FLAG) {
intel_pipeline_shader_cleanup(&pipeline->vs, pipeline->dev->gpu);
}
if (pipeline->active_shaders & SHADER_TESS_CONTROL_FLAG) {
intel_pipeline_shader_cleanup(&pipeline->tcs, pipeline->dev->gpu);
}
if (pipeline->active_shaders & SHADER_TESS_EVAL_FLAG) {
intel_pipeline_shader_cleanup(&pipeline->tes, pipeline->dev->gpu);
}
if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) {
intel_pipeline_shader_cleanup(&pipeline->gs, pipeline->dev->gpu);
}
if (pipeline->active_shaders & SHADER_FRAGMENT_FLAG) {
intel_pipeline_shader_cleanup(&pipeline->fs, pipeline->dev->gpu);
}
if (pipeline->active_shaders & SHADER_COMPUTE_FLAG) {
intel_pipeline_shader_cleanup(&pipeline->cs, pipeline->dev->gpu);
}
intel_base_destroy(&pipeline->obj.base);
}
static XGL_RESULT pipeline_get_info(struct intel_base *base, int type,
size_t *size, void *data)
{
struct intel_pipeline *pipeline = intel_pipeline_from_base(base);
XGL_RESULT ret = XGL_SUCCESS;
switch (type) {
case XGL_INFO_TYPE_MEMORY_REQUIREMENTS:
{
XGL_MEMORY_REQUIREMENTS *mem_req = data;
*size = sizeof(XGL_MEMORY_REQUIREMENTS);
if (data) {
mem_req->size = pipeline->scratch_size;
mem_req->alignment = 1024;
mem_req->memType = XGL_MEMORY_TYPE_OTHER;
}
}
break;
default:
ret = intel_base_get_info(base, type, size, data);
break;
}
return ret;
}
static XGL_RESULT pipeline_validate(struct intel_pipeline *pipeline)
{
/*
* Validate required elements
*/
if (!(pipeline->active_shaders & SHADER_VERTEX_FLAG)) {
// TODO: Log debug message: Vertex Shader required.
return XGL_ERROR_BAD_PIPELINE_DATA;
}
/*
* Tessalation control and evaluation have to both have a shader defined or
* neither should have a shader defined.
*/
if (((pipeline->active_shaders & SHADER_TESS_CONTROL_FLAG) == 0) !=
((pipeline->active_shaders & SHADER_TESS_EVAL_FLAG) == 0) ) {
// TODO: Log debug message: Both Tess control and Tess eval are required to use tessalation
return XGL_ERROR_BAD_PIPELINE_DATA;
}
if ((pipeline->active_shaders & SHADER_COMPUTE_FLAG) &&
(pipeline->active_shaders & (SHADER_VERTEX_FLAG | SHADER_TESS_CONTROL_FLAG |
SHADER_TESS_EVAL_FLAG | SHADER_GEOMETRY_FLAG |
SHADER_FRAGMENT_FLAG))) {
// TODO: Log debug message: Can only specify compute shader when doing compute
return XGL_ERROR_BAD_PIPELINE_DATA;
}
/*
* XGL_TOPOLOGY_PATCH primitive topology is only valid for tessellation pipelines.
* Mismatching primitive topology and tessellation fails graphics pipeline creation.
*/
if (pipeline->active_shaders & (SHADER_TESS_CONTROL_FLAG | SHADER_TESS_EVAL_FLAG) &&
(pipeline->topology != XGL_TOPOLOGY_PATCH)) {
// TODO: Log debug message: Invalid topology used with tessalation shader.
return XGL_ERROR_BAD_PIPELINE_DATA;
}
if ((pipeline->topology == XGL_TOPOLOGY_PATCH) &&
(pipeline->active_shaders & ~(SHADER_TESS_CONTROL_FLAG | SHADER_TESS_EVAL_FLAG))) {
// TODO: Log debug message: Cannot use TOPOLOGY_PATCH on non-tessalation shader.
return XGL_ERROR_BAD_PIPELINE_DATA;
}
return XGL_SUCCESS;
}
static void pipeline_build_urb_alloc_gen6(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
const struct intel_gpu *gpu = pipeline->dev->gpu;
const int urb_size = ((gpu->gt == 2) ? 64 : 32) * 1024;
const struct intel_pipeline_shader *vs = &pipeline->vs;
const struct intel_pipeline_shader *gs = &pipeline->gs;
int vs_entry_size, gs_entry_size;
int vs_size, gs_size;
INTEL_GPU_ASSERT(gpu, 6, 6);
vs_entry_size = ((vs->in_count >= vs->out_count) ?
vs->in_count : vs->out_count);
gs_entry_size = (gs) ? gs->out_count : 0;
/* in bytes */
vs_entry_size *= sizeof(float) * 4;
gs_entry_size *= sizeof(float) * 4;
if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) {
vs_size = urb_size / 2;
gs_size = vs_size;
} else {
vs_size = urb_size;
gs_size = 0;
}
/* 3DSTATE_URB */
{
const uint8_t cmd_len = 3;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) |
(cmd_len - 2);
int vs_alloc_size, gs_alloc_size;
int vs_entry_count, gs_entry_count;
uint32_t *dw;
/* in 1024-bit rows */
vs_alloc_size = (vs_entry_size + 128 - 1) / 128;
gs_alloc_size = (gs_entry_size + 128 - 1) / 128;
/* valid range is [1, 5] */
if (!vs_alloc_size)
vs_alloc_size = 1;
if (!gs_alloc_size)
gs_alloc_size = 1;
assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
/* valid range is [24, 256], multiples of 4 */
vs_entry_count = (vs_size / 128 / vs_alloc_size) & ~3;
if (vs_entry_count > 256)
vs_entry_count = 256;
assert(vs_entry_count >= 24);
/* valid range is [0, 256], multiples of 4 */
gs_entry_count = (gs_size / 128 / gs_alloc_size) & ~3;
if (gs_entry_count > 256)
gs_entry_count = 256;
dw = pipeline_cmd_ptr(pipeline, cmd_len);
dw[0] = dw0;
dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
vs_entry_count << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
dw[2] = gs_entry_count << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
(gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
}
}
static void pipeline_build_urb_alloc_gen7(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
const struct intel_gpu *gpu = pipeline->dev->gpu;
const int urb_size = ((gpu->gt == 3) ? 512 :
(gpu->gt == 2) ? 256 : 128) * 1024;
const struct intel_pipeline_shader *vs = &pipeline->vs;
const struct intel_pipeline_shader *gs = &pipeline->gs;
/* some space is reserved for PCBs */
int urb_offset = ((gpu->gt == 3) ? 32 : 16) * 1024;
int vs_entry_size, gs_entry_size;
int vs_size, gs_size;
INTEL_GPU_ASSERT(gpu, 7, 7.5);
vs_entry_size = ((vs->in_count >= vs->out_count) ?
vs->in_count : vs->out_count);
gs_entry_size = (gs) ? gs->out_count : 0;
/* in bytes */
vs_entry_size *= sizeof(float) * 4;
gs_entry_size *= sizeof(float) * 4;
if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) {
vs_size = (urb_size - urb_offset) / 2;
gs_size = vs_size;
} else {
vs_size = urb_size - urb_offset;
gs_size = 0;
}
/* 3DSTATE_URB_* */
{
const uint8_t cmd_len = 2;
int vs_alloc_size, gs_alloc_size;
int vs_entry_count, gs_entry_count;
uint32_t *dw;
/* in 512-bit rows */
vs_alloc_size = (vs_entry_size + 64 - 1) / 64;
gs_alloc_size = (gs_entry_size + 64 - 1) / 64;
if (!vs_alloc_size)
vs_alloc_size = 1;
if (!gs_alloc_size)
gs_alloc_size = 1;
/* avoid performance decrease due to banking */
if (vs_alloc_size == 5)
vs_alloc_size = 6;
/* in multiples of 8 */
vs_entry_count = (vs_size / 64 / vs_alloc_size) & ~7;
assert(vs_entry_count >= 32);
gs_entry_count = (gs_size / 64 / gs_alloc_size) & ~7;
if (intel_gpu_gen(gpu) >= INTEL_GEN(7.5)) {
const int max_vs_entry_count =
(gpu->gt >= 2) ? 1664 : 640;
const int max_gs_entry_count =
(gpu->gt >= 2) ? 640 : 256;
if (vs_entry_count >= max_vs_entry_count)
vs_entry_count = max_vs_entry_count;
if (gs_entry_count >= max_gs_entry_count)
gs_entry_count = max_gs_entry_count;
} else {
const int max_vs_entry_count =
(gpu->gt == 2) ? 704 : 512;
const int max_gs_entry_count =
(gpu->gt == 2) ? 320 : 192;
if (vs_entry_count >= max_vs_entry_count)
vs_entry_count = max_vs_entry_count;
if (gs_entry_count >= max_gs_entry_count)
gs_entry_count = max_gs_entry_count;
}
dw = pipeline_cmd_ptr(pipeline, cmd_len*4);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2);
dw[1] = (urb_offset / 8192) << GEN7_URB_DW1_OFFSET__SHIFT |
(vs_alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
vs_entry_count;
dw += 2;
if (gs_size)
urb_offset += vs_size;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2);
dw[1] = (urb_offset / 8192) << GEN7_URB_DW1_OFFSET__SHIFT |
(gs_alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
gs_entry_count;
dw += 2;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2);
dw[1] = (urb_offset / 8192) << GEN7_URB_DW1_OFFSET__SHIFT;
dw += 2;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2);
dw[1] = (urb_offset / 8192) << GEN7_URB_DW1_OFFSET__SHIFT;
}
}
static void pipeline_build_vertex_elements(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
const struct intel_pipeline_shader *vs = &pipeline->vs;
uint8_t cmd_len;
uint32_t *dw;
uint32_t i, j;
uint32_t attr_count;
uint32_t attrs_processed;
int comps[4];
INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);
attr_count = u_popcountll(vs->inputs_read);
cmd_len = 1 + 2 * attr_count;
if (vs->uses & (INTEL_SHADER_USE_VID | INTEL_SHADER_USE_IID))
cmd_len += 2;
if (cmd_len == 1)
return;
dw = pipeline_cmd_ptr(pipeline, cmd_len);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
(cmd_len - 2);
dw++;
/* VERTEX_ELEMENT_STATE */
for (i = 0, attrs_processed = 0; attrs_processed < attr_count; i++) {
XGL_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION *attr = NULL;
/*
* The compiler will pack the shader references and then
* indicate which locations are used via the bitmask in
* vs->inputs_read.
*/
if (!(vs->inputs_read & (1L << i))) {
continue;
}
/*
* For each bit set in the vs->inputs_read we'll need
* to find the corresponding attribute record and then
* set up the next HW vertex element based on that attribute.
*/
for (j = 0; j < info->vi.attributeCount; j++) {
if (info->vi.pVertexAttributeDescriptions[j].location == i) {
attr = (XGL_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION *) &info->vi.pVertexAttributeDescriptions[j];
attrs_processed++;
break;
}
}
assert(attr != NULL);
const int format =
intel_format_translate_color(pipeline->dev->gpu, attr->format);
comps[0] = GEN6_VFCOMP_STORE_0;
comps[1] = GEN6_VFCOMP_STORE_0;
comps[2] = GEN6_VFCOMP_STORE_0;
comps[3] = icd_format_is_int(attr->format) ?
GEN6_VFCOMP_STORE_1_INT : GEN6_VFCOMP_STORE_1_FP;
switch (icd_format_get_channel_count(attr->format)) {
case 4: comps[3] = GEN6_VFCOMP_STORE_SRC; /* fall through */
case 3: comps[2] = GEN6_VFCOMP_STORE_SRC; /* fall through */
case 2: comps[1] = GEN6_VFCOMP_STORE_SRC; /* fall through */
case 1: comps[0] = GEN6_VFCOMP_STORE_SRC; break;
default:
break;
}
assert(attr->offsetInBytes <= 2047);
dw[0] = attr->binding << GEN6_VE_DW0_VB_INDEX__SHIFT |
GEN6_VE_DW0_VALID |
format << GEN6_VE_DW0_FORMAT__SHIFT |
attr->offsetInBytes;
dw[1] = comps[0] << GEN6_VE_DW1_COMP0__SHIFT |
comps[1] << GEN6_VE_DW1_COMP1__SHIFT |
comps[2] << GEN6_VE_DW1_COMP2__SHIFT |
comps[3] << GEN6_VE_DW1_COMP3__SHIFT;
dw += 2;
}
if (vs->uses & (INTEL_SHADER_USE_VID | INTEL_SHADER_USE_IID)) {
comps[0] = (vs->uses & INTEL_SHADER_USE_VID) ?
GEN6_VFCOMP_STORE_VID : GEN6_VFCOMP_STORE_0;
comps[1] = (vs->uses & INTEL_SHADER_USE_IID) ?
GEN6_VFCOMP_STORE_IID : GEN6_VFCOMP_NOSTORE;
comps[2] = GEN6_VFCOMP_NOSTORE;
comps[3] = GEN6_VFCOMP_NOSTORE;
dw[0] = GEN6_VE_DW0_VALID;
dw[1] = comps[0] << GEN6_VE_DW1_COMP0__SHIFT |
comps[1] << GEN6_VE_DW1_COMP1__SHIFT |
comps[2] << GEN6_VE_DW1_COMP2__SHIFT |
comps[3] << GEN6_VE_DW1_COMP3__SHIFT;
dw += 2;
}
}
static void pipeline_build_viewport(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
switch (info->vp.depthMode) {
case XGL_DEPTH_MODE_ZERO_TO_ONE:
pipeline->depth_zero_to_one = true;
break;
case XGL_DEPTH_MODE_NEGATIVE_ONE_TO_ONE:
default:
pipeline->depth_zero_to_one = false;
break;
}
}
static void pipeline_build_fragment_SBE(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
const struct intel_pipeline_shader *fs = &pipeline->fs;
const struct intel_pipeline_shader *vs = &pipeline->vs;
uint8_t cmd_len;
uint32_t *body;
uint32_t attr_skip, attr_count;
uint32_t vue_offset, vue_len;
uint32_t i;
INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);
cmd_len = 14;
if (intel_gpu_gen(pipeline->dev->gpu) >= INTEL_GEN(7))
body = pipeline_cmd_ptr(pipeline, cmd_len);
else
body = pipeline->cmd_3dstate_sbe;
assert(!fs->reads_user_clip || vs->enable_user_clip);
attr_skip = vs->outputs_offset;
if (vs->enable_user_clip != fs->reads_user_clip) {
attr_skip += 2;
}
assert(vs->out_count >= attr_skip);
attr_count = vs->out_count - attr_skip;
// LUNARG TODO: We currently are only handling 16 attrs;
// ultimately, we need to handle 32
assert(fs->in_count <= 16);
assert(attr_count <= 16);
vue_offset = attr_skip / 2;
vue_len = (attr_count + 1) / 2;
if (!vue_len)
vue_len = 1;
body[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) |
(cmd_len - 2);
// LUNARG TODO: If the attrs needed by the FS are exactly
// what is written by the VS, we don't need to enable
// swizzling, improving performance. Even if we swizzle,
// we can improve performance by reducing vue_len to
// just include the values needed by the FS:
// vue_len = ceiling((max_vs_out + 1)/2)
body[1] = GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE |
fs->in_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT |
vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
switch (info->rs.pointOrigin) {
case XGL_COORDINATE_ORIGIN_UPPER_LEFT:
body[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
break;
case XGL_COORDINATE_ORIGIN_LOWER_LEFT:
body[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT;
break;
default:
assert(!"unknown point origin");
break;
}
uint16_t vs_slot[fs->in_count];
int32_t fs_in = 0;
int32_t vs_out = - (vue_offset * 2 - vs->outputs_offset);
for (i=0; i < 64; i++) {
bool vsWrites = vs->outputs_written & (1L << i);
bool fsReads = fs->inputs_read & (1L << i);
if (fsReads) {
assert(vs_out >= 0);
assert(fs_in < fs->in_count);
vs_slot[fs_in] = vs_out;
if (!vsWrites) {
// If the vertex shader did not write this input, we cannot
// program the SBE to read it. Our choices are to allow it to
// read junk from a GRF, or get zero. We're choosing zero.
if (i >= fs->generic_input_start) {
vs_slot[fs_in] = GEN8_SBE_SWIZ_CONST_0000 |
GEN8_SBE_SWIZ_OVERRIDE_X |
GEN8_SBE_SWIZ_OVERRIDE_Y |
GEN8_SBE_SWIZ_OVERRIDE_Z |
GEN8_SBE_SWIZ_OVERRIDE_W;
}
}
fs_in += 1;
}
if (vsWrites) {
vs_out += 1;
}
}
for (i = 0; i < 8; i++) {
uint16_t hi, lo;
/* no attr swizzles */
if (i * 2 + 1 < fs->in_count) {
lo = vs_slot[i * 2];
hi = vs_slot[i * 2 + 1];
} else if (i * 2 < fs->in_count) {
lo = vs_slot[i * 2];
hi = 0;
} else {
hi = 0;
lo = 0;
}
body[2 + i] = hi << GEN8_SBE_SWIZ_HIGH__SHIFT | lo;
}
if (info->ia.topology == XGL_TOPOLOGY_POINT_LIST)
body[10] = fs->point_sprite_enables;
else
body[10] = 0;
body[11] = 0; /* constant interpolation enables */
body[12] = 0; /* WrapShortest enables */
body[13] = 0;
}
static void pipeline_build_gs(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
// gen7_emit_3DSTATE_GS done by cmd_pipeline
}
static void pipeline_build_hs(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
const uint8_t cmd_len = 7;
const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
uint32_t *dw;
INTEL_GPU_ASSERT(pipeline->dev->gpu, 7, 7.5);
dw = pipeline_cmd_ptr(pipeline, cmd_len);
dw[0] = dw0;
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
dw[5] = 0;
dw[6] = 0;
}
static void pipeline_build_te(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
const uint8_t cmd_len = 4;
const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (cmd_len - 2);
uint32_t *dw;
INTEL_GPU_ASSERT(pipeline->dev->gpu, 7, 7.5);
dw = pipeline_cmd_ptr(pipeline, cmd_len);
dw[0] = dw0;
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
}
static void pipeline_build_ds(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
const uint8_t cmd_len = 6;
const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
uint32_t *dw;
INTEL_GPU_ASSERT(pipeline->dev->gpu, 7, 7.5);
dw = pipeline_cmd_ptr(pipeline, cmd_len);
dw[0] = dw0;
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
dw[5] = 0;
}
static void pipeline_build_depth_stencil(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
pipeline->cmd_depth_stencil = 0;
if (info->db.stencilTestEnable) {
pipeline->cmd_depth_stencil = 1 << 31 |
translate_compare_func(info->db.front.stencilFunc) << 28 |
translate_stencil_op(info->db.front.stencilFailOp) << 25 |
translate_stencil_op(info->db.front.stencilDepthFailOp) << 22 |
translate_stencil_op(info->db.front.stencilPassOp) << 19 |
1 << 15 |
translate_compare_func(info->db.back.stencilFunc) << 12 |
translate_stencil_op(info->db.back.stencilFailOp) << 9 |
translate_stencil_op(info->db.back.stencilDepthFailOp) << 6 |
translate_stencil_op(info->db.back.stencilPassOp) << 3;
}
pipeline->stencilTestEnable = info->db.stencilTestEnable;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 360:
*
* "Enabling the Depth Test function without defining a Depth Buffer is
* UNDEFINED."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 375:
*
* "A Depth Buffer must be defined before enabling writes to it, or
* operation is UNDEFINED."
*
* TODO We do not check these yet.
*/
if (info->db.depthTestEnable) {
pipeline->cmd_depth_test = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
translate_compare_func(info->db.depthFunc) << 27;
} else {
pipeline->cmd_depth_test = GEN6_COMPAREFUNCTION_ALWAYS << 27;
}
if (info->db.depthWriteEnable)
pipeline->cmd_depth_test |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
}
static void pipeline_build_msaa(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
uint32_t cmd, cmd_len;
uint32_t *dw;
INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);
pipeline->sample_count = (info->ms.samples <= 1) ? 1 : info->ms.samples;
/* 3DSTATE_SAMPLE_MASK */
cmd = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK);
cmd_len = 2;
dw = pipeline_cmd_ptr(pipeline, cmd_len);
dw[0] = cmd | (cmd_len - 2);
dw[1] = info->ms.sampleMask & ((1 << pipeline->sample_count) - 1);
pipeline->cmd_sample_mask = dw[1];
}
static void pipeline_build_cb(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
uint32_t i;
INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);
STATIC_ASSERT(ARRAY_SIZE(pipeline->cmd_cb) >= INTEL_MAX_RENDER_TARGETS*2);
assert(info->cb.attachmentCount <= INTEL_MAX_RENDER_TARGETS);
uint32_t *dw = pipeline->cmd_cb;
for (i = 0; i < info->cb.attachmentCount; i++) {
const XGL_PIPELINE_CB_ATTACHMENT_STATE *att = &info->cb.pAttachments[i];
uint32_t dw0, dw1;
dw0 = 0;
dw1 = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
GEN6_RT_DW1_PRE_BLEND_CLAMP |
GEN6_RT_DW1_POST_BLEND_CLAMP;
if (att->blendEnable) {
dw0 = 1 << 31 |
translate_blend_func(att->blendFuncAlpha) << 26 |
translate_blend(att->srcBlendAlpha) << 20 |
translate_blend(att->destBlendAlpha) << 15 |
translate_blend_func(att->blendFuncColor) << 11 |
translate_blend(att->srcBlendColor) << 5 |
translate_blend(att->destBlendColor);
if (att->blendFuncAlpha != att->blendFuncColor ||
att->srcBlendAlpha != att->srcBlendColor ||
att->destBlendAlpha != att->destBlendColor)
dw0 |= 1 << 30;
pipeline->dual_source_blend_enable = icd_pipeline_cb_att_needs_dual_source_blending(att);
}
if (info->cb.logicOp != XGL_LOGIC_OP_COPY) {
int logicop;
switch (info->cb.logicOp) {
case XGL_LOGIC_OP_CLEAR: logicop = GEN6_LOGICOP_CLEAR; break;
case XGL_LOGIC_OP_AND: logicop = GEN6_LOGICOP_AND; break;
case XGL_LOGIC_OP_AND_REVERSE: logicop = GEN6_LOGICOP_AND_REVERSE; break;
case XGL_LOGIC_OP_AND_INVERTED: logicop = GEN6_LOGICOP_AND_INVERTED; break;
case XGL_LOGIC_OP_NOOP: logicop = GEN6_LOGICOP_NOOP; break;
case XGL_LOGIC_OP_XOR: logicop = GEN6_LOGICOP_XOR; break;
case XGL_LOGIC_OP_OR: logicop = GEN6_LOGICOP_OR; break;
case XGL_LOGIC_OP_NOR: logicop = GEN6_LOGICOP_NOR; break;
case XGL_LOGIC_OP_EQUIV: logicop = GEN6_LOGICOP_EQUIV; break;
case XGL_LOGIC_OP_INVERT: logicop = GEN6_LOGICOP_INVERT; break;
case XGL_LOGIC_OP_OR_REVERSE: logicop = GEN6_LOGICOP_OR_REVERSE; break;
case XGL_LOGIC_OP_COPY_INVERTED: logicop = GEN6_LOGICOP_COPY_INVERTED; break;
case XGL_LOGIC_OP_OR_INVERTED: logicop = GEN6_LOGICOP_OR_INVERTED; break;
case XGL_LOGIC_OP_NAND: logicop = GEN6_LOGICOP_NAND; break;
case XGL_LOGIC_OP_SET: logicop = GEN6_LOGICOP_SET; break;
default:
assert(!"unknown logic op");
logicop = GEN6_LOGICOP_CLEAR;
break;
}
dw1 |= GEN6_RT_DW1_LOGICOP_ENABLE |
logicop << GEN6_RT_DW1_LOGICOP_FUNC__SHIFT;
}
if (!(att->channelWriteMask & 0x1))
dw1 |= GEN6_RT_DW1_WRITE_DISABLE_R;
if (!(att->channelWriteMask & 0x2))
dw1 |= GEN6_RT_DW1_WRITE_DISABLE_G;
if (!(att->channelWriteMask & 0x4))
dw1 |= GEN6_RT_DW1_WRITE_DISABLE_B;
if (!(att->channelWriteMask & 0x8))
dw1 |= GEN6_RT_DW1_WRITE_DISABLE_A;
dw[2 * i] = dw0;
dw[2 * i + 1] = dw1;
}
for (i=info->cb.attachmentCount; i < INTEL_MAX_RENDER_TARGETS; i++)
{
dw[2 * i] = 0;
dw[2 * i + 1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
GEN6_RT_DW1_PRE_BLEND_CLAMP |
GEN6_RT_DW1_POST_BLEND_CLAMP |
GEN6_RT_DW1_WRITE_DISABLE_R |
GEN6_RT_DW1_WRITE_DISABLE_G |
GEN6_RT_DW1_WRITE_DISABLE_B |
GEN6_RT_DW1_WRITE_DISABLE_A;
}
}
static XGL_RESULT pipeline_build_all(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
XGL_RESULT ret;
ret = pipeline_build_shaders(pipeline, info);
if (ret != XGL_SUCCESS)
return ret;
if (info->vi.bindingCount > ARRAY_SIZE(pipeline->vb) ||
info->vi.attributeCount > ARRAY_SIZE(pipeline->vb))
return XGL_ERROR_BAD_PIPELINE_DATA;
pipeline->vb_count = info->vi.bindingCount;
memcpy(pipeline->vb, info->vi.pVertexBindingDescriptions,
sizeof(pipeline->vb[0]) * pipeline->vb_count);
pipeline_build_vertex_elements(pipeline, info);
pipeline_build_viewport(pipeline, info);
pipeline_build_fragment_SBE(pipeline, info);
pipeline_build_msaa(pipeline, info);
pipeline_build_depth_stencil(pipeline, info);
if (intel_gpu_gen(pipeline->dev->gpu) >= INTEL_GEN(7)) {
pipeline_build_urb_alloc_gen7(pipeline, info);
pipeline_build_gs(pipeline, info);
pipeline_build_hs(pipeline, info);
pipeline_build_te(pipeline, info);
pipeline_build_ds(pipeline, info);
pipeline->wa_flags = INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE |
INTEL_CMD_WA_GEN6_PRE_COMMAND_SCOREBOARD_STALL |
INTEL_CMD_WA_GEN7_PRE_VS_DEPTH_STALL_WRITE |
INTEL_CMD_WA_GEN7_POST_COMMAND_CS_STALL |
INTEL_CMD_WA_GEN7_POST_COMMAND_DEPTH_STALL;
} else {
pipeline_build_urb_alloc_gen6(pipeline, info);
pipeline->wa_flags = INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE |
INTEL_CMD_WA_GEN6_PRE_COMMAND_SCOREBOARD_STALL;
}
ret = pipeline_build_ia(pipeline, info);
if (ret == XGL_SUCCESS)
ret = pipeline_build_rs_state(pipeline, info);
if (ret == XGL_SUCCESS) {
pipeline->db_format = info->db.format;
pipeline_build_cb(pipeline, info);
pipeline->cb_state = info->cb;
pipeline->tess_state = info->tess;
}
return ret;
}
struct intel_pipeline_create_info_header {
XGL_STRUCTURE_TYPE struct_type;
const struct intel_pipeline_create_info_header *next;
};
static XGL_RESULT pipeline_create_info_init(struct intel_pipeline_create_info *info,
const struct intel_pipeline_create_info_header *header)
{
memset(info, 0, sizeof(*info));
/*
* Do we need to set safe defaults in case the app doesn't provide all of
* the necessary create infos?
*/
info->ms.samples = 1;
info->ms.sampleMask = 1;
while (header) {
const void *src = (const void *) header;
size_t size;
void *dst;
switch (header->struct_type) {
case XGL_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO:
size = sizeof(info->graphics);
dst = &info->graphics;
break;
case XGL_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO:
size = sizeof(info->vi);
dst = &info->vi;
break;
case XGL_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO:
size = sizeof(info->ia);
dst = &info->ia;
break;
case XGL_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO:
size = sizeof(info->db);
dst = &info->db;
break;
case XGL_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO:
size = sizeof(info->cb);
dst = &info->cb;
break;
case XGL_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO:
size = sizeof(info->rs);
dst = &info->rs;
break;
case XGL_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO:
size = sizeof(info->tess);
dst = &info->tess;
break;
case XGL_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO:
size = sizeof(info->ms);
dst = &info->ms;
break;
case XGL_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO:
size = sizeof(info->vp);
dst = &info->vp;
break;
case XGL_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO:
{
const XGL_PIPELINE_SHADER *shader =
(const XGL_PIPELINE_SHADER *) (header + 1);
src = (const void *) shader;
size = sizeof(*shader);
switch (shader->stage) {
case XGL_SHADER_STAGE_VERTEX:
dst = &info->vs;
break;
case XGL_SHADER_STAGE_TESS_CONTROL:
dst = &info->tcs;
break;
case XGL_SHADER_STAGE_TESS_EVALUATION:
dst = &info->tes;
break;
case XGL_SHADER_STAGE_GEOMETRY:
dst = &info->gs;
break;
case XGL_SHADER_STAGE_FRAGMENT:
dst = &info->fs;
break;
default:
return XGL_ERROR_BAD_PIPELINE_DATA;
break;
}
}
break;
case XGL_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO:
size = sizeof(info->compute);
dst = &info->compute;
break;
default:
return XGL_ERROR_BAD_PIPELINE_DATA;
break;
}
memcpy(dst, src, size);
header = header->next;
}
return XGL_SUCCESS;
}
static XGL_RESULT graphics_pipeline_create(struct intel_dev *dev,
const XGL_GRAPHICS_PIPELINE_CREATE_INFO *info_,
struct intel_pipeline **pipeline_ret)
{
struct intel_pipeline_create_info info;
struct intel_pipeline *pipeline;
XGL_RESULT ret;
ret = pipeline_create_info_init(&info,
(const struct intel_pipeline_create_info_header *) info_);
if (ret != XGL_SUCCESS)
return ret;
pipeline = (struct intel_pipeline *) intel_base_create(&dev->base.handle,
sizeof(*pipeline), dev->base.dbg,
XGL_DBG_OBJECT_GRAPHICS_PIPELINE, info_, 0);
if (!pipeline)
return XGL_ERROR_OUT_OF_MEMORY;
pipeline->dev = dev;
pipeline->obj.base.get_info = pipeline_get_info;
pipeline->obj.destroy = pipeline_destroy;
ret = pipeline_build_all(pipeline, &info);
if (ret == XGL_SUCCESS)
ret = pipeline_validate(pipeline);
if (ret != XGL_SUCCESS) {
pipeline_destroy(&pipeline->obj);
return ret;
}
*pipeline_ret = pipeline;
return XGL_SUCCESS;
}
ICD_EXPORT XGL_RESULT XGLAPI xglCreateGraphicsPipeline(
XGL_DEVICE device,
const XGL_GRAPHICS_PIPELINE_CREATE_INFO* pCreateInfo,
XGL_PIPELINE* pPipeline)
{
struct intel_dev *dev = intel_dev(device);
return graphics_pipeline_create(dev, pCreateInfo,
(struct intel_pipeline **) pPipeline);
}
ICD_EXPORT XGL_RESULT XGLAPI xglCreateGraphicsPipelineDerivative(
XGL_DEVICE device,
const XGL_GRAPHICS_PIPELINE_CREATE_INFO* pCreateInfo,
XGL_PIPELINE basePipeline,
XGL_PIPELINE* pPipeline)
{
struct intel_dev *dev = intel_dev(device);
/* TODO: Use basePipeline to optimize creation of derivative */
return graphics_pipeline_create(dev, pCreateInfo,
(struct intel_pipeline **) pPipeline);
}
ICD_EXPORT XGL_RESULT XGLAPI xglCreateComputePipeline(
XGL_DEVICE device,
const XGL_COMPUTE_PIPELINE_CREATE_INFO* pCreateInfo,
XGL_PIPELINE* pPipeline)
{
return XGL_ERROR_UNAVAILABLE;
}
ICD_EXPORT XGL_RESULT XGLAPI xglStorePipeline(
XGL_PIPELINE pipeline,
size_t* pDataSize,
void* pData)
{
return XGL_ERROR_UNAVAILABLE;
}
ICD_EXPORT XGL_RESULT XGLAPI xglLoadPipeline(
XGL_DEVICE device,
size_t dataSize,
const void* pData,
XGL_PIPELINE* pPipeline)
{
return XGL_ERROR_UNAVAILABLE;
}
ICD_EXPORT XGL_RESULT XGLAPI xglLoadPipelineDerivative(
XGL_DEVICE device,
size_t dataSize,
const void* pData,
XGL_PIPELINE basePipeline,
XGL_PIPELINE* pPipeline)
{
return XGL_ERROR_UNAVAILABLE;
}