intel: Add const_alloc state to pipeline
diff --git a/icd/intel/pipeline.c b/icd/intel/pipeline.c
index 5782877..f39ea2a 100644
--- a/icd/intel/pipeline.c
+++ b/icd/intel/pipeline.c
@@ -478,6 +478,197 @@
}
}
+static void builder_build_push_const_alloc_gen7(struct intel_pipeline_builder *builder,
+ struct intel_pipeline *p)
+{
+ const uint8_t cmd_len = 2;
+ uint32_t offset = 0;
+ uint32_t size = 8192;
+ uint32_t *dw;
+ int end;
+
+ INTEL_GPU_ASSERT(builder->gpu, 7, 7);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 68:
+ *
+ * "(A table that says the maximum size of each constant buffer is
+ * 16KB")
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 115:
+ *
+ * "The sum of the Constant Buffer Offset and the Constant Buffer Size
+ * may not exceed the maximum value of the Constant Buffer Size."
+ *
+ * Thus, the valid range of buffer end is [0KB, 16KB].
+ */
+ end = (offset + size) / 1024;
+ if (end > 16) {
+ assert(!"invalid constant buffer end");
+ end = 16;
+ }
+
+ /* the valid range of buffer offset is [0KB, 15KB] */
+ offset = (offset + 1023) / 1024;
+ if (offset > 15) {
+ assert(!"invalid constant buffer offset");
+ offset = 15;
+ }
+
+ if (offset > end) {
+ assert(!size);
+ offset = end;
+ }
+
+ /* the valid range of buffer size is [0KB, 15KB] */
+ size = end - offset;
+ if (size > 15) {
+ assert(!"invalid constant buffer size");
+ size = 15;
+ }
+
+ dw = pipeline_cmd_ptr(p, cmd_len * 5);
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) | (cmd_len - 2);
+ dw[1] = offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
+ size << GEN7_PCB_ALLOC_ANY_DW1_SIZE__SHIFT;
+
+ dw += 2;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) | (cmd_len - 2);
+ dw[1] = size << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
+ size << GEN7_PCB_ALLOC_ANY_DW1_SIZE__SHIFT;
+
+ dw += 2;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) | (cmd_len - 2);
+ dw[1] = 0 << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
+ 0 << GEN7_PCB_ALLOC_ANY_DW1_SIZE__SHIFT;
+
+ dw += 2;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_DS) | (cmd_len - 2);
+ dw[1] = 0 << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
+ 0 << GEN7_PCB_ALLOC_ANY_DW1_SIZE__SHIFT;
+
+ dw += 2;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_GS) | (cmd_len - 2);
+ dw[1] = 0 << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
+ 0 << GEN7_PCB_ALLOC_ANY_DW1_SIZE__SHIFT;
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 292:
+ *
+ * "A PIPE_CONTOL command with the CS Stall bit set must be programmed
+ * in the ring after this instruction
+ * (3DSTATE_PUSH_CONSTANT_ALLOC_PS)."
+ */
+ p->post_pso_wa_flags |= GEN7_WA_MULTISAMPLE_FLUSH;
+ // gen7_wa_pipe_control_cs_stall(p, true, true);
+ // looks equivalent to: gen6_wa_wm_multisample_flush - this does more
+ // than the documentation seems to imply
+}
+
+static void
+gen7_emit_3dstate_constant(struct intel_pipeline_builder *builder,
+ struct intel_pipeline *pipeline,
+ int subop,
+ const uint32_t *bufs, const int *sizes,
+ int num_bufs)
+{
+ const uint8_t cmd_len = 7;
+ const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
+ GEN6_RENDER_SUBTYPE_3D |
+ subop |
+ (cmd_len - 2);
+ uint32_t *dw = pipeline_cmd_ptr(pipeline, cmd_len);
+ int total_read_length, i;
+
+ INTEL_GPU_ASSERT(builder->gpu, 7, 7);
+
+ /* VS, HS, DS, GS, and PS variants */
+ assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS &&
+ subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS &&
+ subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK);
+
+ assert(num_bufs <= 4);
+
+ dw[0] = cmd;
+ dw++;
+ dw[0] = 0;
+ dw[1] = 0;
+
+ total_read_length = 0;
+ for (i = 0; i < 4; i++) {
+ int read_len;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 112:
+ *
+ * "Constant buffers must be enabled in order from Constant Buffer 0
+ * to Constant Buffer 3 within this command. For example, it is
+ * not allowed to enable Constant Buffer 1 by programming a
+ * non-zero value in the VS Constant Buffer 1 Read Length without a
+ * non-zero value in VS Constant Buffer 0 Read Length."
+ */
+ if (i >= num_bufs || !sizes[i]) {
+ for (; i < 4; i++) {
+ assert(i >= num_bufs || !sizes[i]);
+ dw[2 + i] = 0;
+ }
+ break;
+ }
+
+ /* read lengths are in 256-bit units */
+ read_len = (sizes[i] + 31) / 32;
+ /* the lower 5 bits are used for memory object control state */
+ assert(bufs[i] % 32 == 0);
+
+ dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
+ dw[2 + i] = bufs[i];
+
+ total_read_length += read_len;
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 113:
+ *
+ * "The sum of all four read length fields must be less than or equal
+ * to the size of 64"
+ */
+ assert(total_read_length <= 64);
+}
+
+static void
+gen7_emit_3dstate_pointer(struct intel_pipeline_builder *builder,
+ struct intel_pipeline *p,
+ int subop, uint32_t pointer)
+{
+ const uint8_t cmd_len = 2;
+ const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER |
+ GEN6_RENDER_SUBTYPE_3D |
+ subop |
+ (cmd_len - 2);
+ uint32_t *dw;
+
+ INTEL_GPU_ASSERT(builder->gpu, 7, 7);
+
+ dw = pipeline_cmd_ptr(p, cmd_len);
+ dw[0] = dw0;
+ dw[1] = pointer;
+}
+
+static void gen7_pipeline_gs(struct intel_pipeline_builder *builder,
+ struct intel_pipeline *pipeline)
+{
+ /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
+ gen7_emit_3dstate_constant(builder, pipeline,
+ GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS,
+ 0, 0, 0);
+ // gen7_emit_3DSTATE_GS done by cmd_pipeline
+
+ /* 3DSTATE_BINDING_TABLE_POINTERS_GS */
+ // TODO: Do we want to track dirty state within a command buffer?
+ gen7_emit_3dstate_pointer(builder, pipeline,
+ GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_GS,
+ pipeline->gs_state.BINDING_TABLE_STATE);
+}
+
static XGL_RESULT builder_build_all(struct intel_pipeline_builder *builder,
struct intel_pipeline *pipeline)
{