Handle gaps in FS inputs.
Added logic to handle when user varyings are written by the VS but not read by the FS.
Also moved SBE command building from draw-time to pipeline-create.
diff --git a/icd/intel/pipeline.c b/icd/intel/pipeline.c
index bc834ca..6d2d502 100644
--- a/icd/intel/pipeline.c
+++ b/icd/intel/pipeline.c
@@ -453,7 +453,7 @@
INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);
- cmd_len = 1 + 2 * u_popcount(vs->user_attributes_read);
+ cmd_len = 1 + 2 * u_popcountll(vs->inputs_read);
if (vs->uses & (INTEL_SHADER_USE_VID | INTEL_SHADER_USE_IID))
cmd_len += 2;
@@ -468,7 +468,7 @@
/* VERTEX_ELEMENT_STATE */
for (i = 0; i < info->vi.attributeCount; i++) {
- if (!(vs->user_attributes_read & (1 << i)))
+ if (!(vs->inputs_read & (1L << i)))
continue;
const XGL_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION *attr =
&info->vi.pVertexAttributeDescriptions[i];
@@ -523,6 +523,97 @@
}
}
+static void pipeline_build_fragment_SBE(struct intel_pipeline *pipeline)
+{
+ const struct intel_pipeline_shader *fs = &pipeline->fs;
+ const struct intel_pipeline_shader *vs = &pipeline->vs;
+ uint8_t cmd_len;
+ uint32_t *body;
+ XGL_UINT attr_skip, attr_count;
+ XGL_UINT vue_offset, vue_len;
+ XGL_UINT i;
+
+ INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);
+
+ cmd_len = 14;
+
+ body = pipeline_cmd_ptr(pipeline, cmd_len);
+ pipeline->cmd_sbe_body_offset = body - pipeline->cmds + 1;
+
+ /* VS outputs VUE header and position additionally */
+ assert(vs->out_count >= fs->in_count + 2);
+ assert(!fs->reads_user_clip || vs->enable_user_clip);
+ attr_skip = vs->outputs_offset;
+ if (vs->enable_user_clip != fs->reads_user_clip) {
+ attr_skip += 2;
+ }
+ assert(vs->out_count >= attr_skip);
+ attr_count = vs->out_count - attr_skip;
+
+ // LUNARG TODO: We currently are only handling 16 attrs;
+ // ultimately, we need to handle 32
+ assert(fs->in_count <= 16);
+ assert(attr_count <= 16);
+
+ vue_offset = attr_skip / 2;
+ vue_len = (attr_count + 1) / 2;
+ if (!vue_len)
+ vue_len = 1;
+
+ body[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) |
+ (cmd_len - 2);
+
+ // LUNARG TODO: If the attrs needed by the FS are exactly
+ // what is written by the VS, we don't need to enable
+ // swizzling, improving performance. Even if we swizzle,
+ // we can improve performance by reducing vue_len to
+ // just include the values needed by the FS:
+ // vue_len = ceiling((max_vs_out + 1)/2)
+
+ body[1] = GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE |
+ fs->in_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
+ vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT |
+ vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
+
+ uint16_t vs_slot[fs->in_count];
+ XGL_INT fs_in = 0;
+ XGL_INT vs_out = - (vue_offset * 2 - vs->outputs_offset);
+ for (i=0; i < 64; i++) {
+ if (fs->inputs_read & (1L << i)) {
+ assert(vs_out >= 0);
+ assert(fs_in < fs->in_count);
+ vs_slot[fs_in] = vs_out;
+ fs_in += 1;
+ }
+ if (vs->outputs_written & (1L << i)) {
+ vs_out += 1;
+ }
+ }
+
+ for (i = 0; i < 8; i++) {
+ uint16_t hi, lo;
+
+ /* no attr swizzles */
+ if (i * 2 + 1 < fs->in_count) {
+ lo = vs_slot[i * 2];
+ hi = vs_slot[i * 2 + 1];
+ } else if (i * 2 < fs->in_count) {
+ lo = vs_slot[i * 2];
+ hi = 0;
+ } else {
+ hi = 0;
+ lo = 0;
+ }
+
+ body[2 + i] = hi << GEN7_SBE_ATTR_HIGH__SHIFT | lo;
+ }
+
+ body[10] = 0; /* point sprite enables */
+ body[11] = 0; /* constant interpolation enables */
+ body[12] = 0; /* WrapShortest enables */
+ body[13] = 0;
+}
+
static void pipeline_build_gs(struct intel_pipeline *pipeline,
const struct intel_pipeline_create_info *info)
{
@@ -600,6 +691,7 @@
sizeof(pipeline->vb[0]) * pipeline->vb_count);
pipeline_build_vertex_elements(pipeline, info);
+ pipeline_build_fragment_SBE(pipeline);
if (intel_gpu_gen(pipeline->dev->gpu) >= INTEL_GEN(7)) {
pipeline_build_urb_alloc_gen7(pipeline, info);