intel: emit 3DSTATE_URB_*
The commands are stored in pipeline->cmd_urb_alloc at pipeline creation time,
and copied to intel_cmd when the pipeline is bound. Once our pipeline is more
complete, we will decide if we want multiple pipeline->cmd_*, or a single
pipeline->cmd.
diff --git a/icd/intel/pipeline.c b/icd/intel/pipeline.c
index 7dfc983..cee06d6 100644
--- a/icd/intel/pipeline.c
+++ b/icd/intel/pipeline.c
@@ -300,11 +300,190 @@
return XGL_SUCCESS;
}
-static XGL_RESULT builder_build(struct intel_pipeline_builder *builder,
- struct intel_pipeline *pipeline)
+static void builder_build_urb_alloc_gen6(struct intel_pipeline_builder *builder,
+ struct intel_pipeline *pipeline)
+{
+ const int urb_size = ((builder->gpu->gt == 2) ? 64 : 32) * 1024;
+ const struct intel_shader *vs = intel_shader(builder->vs.shader);
+ const struct intel_shader *gs = intel_shader(builder->gs.shader);
+ int vs_entry_size, gs_entry_size;
+ int vs_size, gs_size;
+
+ INTEL_GPU_ASSERT(builder->gpu, 6, 6);
+
+ vs_entry_size = ((vs->in_count >= vs->out_count) ?
+ vs->in_count : vs->out_count);
+ gs_entry_size = (gs) ? gs->out_count : 0;
+
+ /* in bytes */
+ vs_entry_size *= sizeof(float) * 4;
+ gs_entry_size *= sizeof(float) * 4;
+
+ if (gs) {
+ vs_size = urb_size / 2;
+ gs_size = vs_size;
+ } else {
+ vs_size = urb_size;
+ gs_size = 0;
+ }
+
+ /* 3DSTATE_URB */
+ {
+ const uint8_t cmd_len = 3;
+ const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) |
+ (cmd_len - 2);
+ int vs_alloc_size, gs_alloc_size;
+ int vs_entry_count, gs_entry_count;
+ uint32_t *dw;
+
+ /* in 1024-bit rows */
+ vs_alloc_size = (vs_entry_size + 128 - 1) / 128;
+ gs_alloc_size = (gs_entry_size + 128 - 1) / 128;
+
+ /* valid range is [1, 5] */
+ if (!vs_alloc_size)
+ vs_alloc_size = 1;
+ if (!gs_alloc_size)
+ gs_alloc_size = 1;
+ assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
+
+ /* valid range is [24, 256], multiples of 4 */
+ vs_entry_count = (vs_size / 128 / vs_alloc_size) & ~3;
+ if (vs_entry_count > 256)
+ vs_entry_count = 256;
+ assert(vs_entry_count >= 24);
+
+ /* valid range is [0, 256], multiples of 4 */
+ gs_entry_count = (gs_size / 128 / gs_alloc_size) & ~3;
+ if (gs_entry_count > 256)
+ gs_entry_count = 256;
+
+ STATIC_ASSERT(ARRAY_SIZE(pipeline->cmd_urb_alloc) >= cmd_len);
+ pipeline->cmd_urb_alloc_len = cmd_len;
+ dw = pipeline->cmd_urb_alloc;
+
+ dw[0] = dw0;
+ dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
+ vs_entry_count << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
+ dw[2] = gs_entry_count << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
+ (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
+ }
+}
+
+static void builder_build_urb_alloc_gen7(struct intel_pipeline_builder *builder,
+ struct intel_pipeline *pipeline)
+{
+ const int urb_size = ((builder->gpu->gt == 3) ? 512 :
+ (builder->gpu->gt == 2) ? 256 : 128) * 1024;
+ const struct intel_shader *vs = intel_shader(builder->vs.shader);
+ const struct intel_shader *gs = intel_shader(builder->gs.shader);
+ /* some space is reserved for PCBs */
+ int urb_offset = ((builder->gpu->gt == 3) ? 32 : 16) * 1024;
+ int vs_entry_size, gs_entry_size;
+ int vs_size, gs_size;
+
+ INTEL_GPU_ASSERT(builder->gpu, 7, 7.5);
+
+ vs_entry_size = ((vs->in_count >= vs->out_count) ?
+ vs->in_count : vs->out_count);
+ gs_entry_size = (gs) ? gs->out_count : 0;
+
+ /* in bytes */
+ vs_entry_size *= sizeof(float) * 4;
+ gs_entry_size *= sizeof(float) * 4;
+
+ if (gs) {
+ vs_size = (urb_size - urb_offset) / 2;
+ gs_size = vs_size;
+ } else {
+ vs_size = urb_size - urb_offset;
+ gs_size = 0;
+ }
+
+ /* 3DSTATE_URB_* */
+ {
+ const uint8_t cmd_len = 2;
+ int vs_alloc_size, gs_alloc_size;
+ int vs_entry_count, gs_entry_count;
+ uint32_t *dw;
+
+ /* in 512-bit rows */
+ vs_alloc_size = (vs_entry_size + 64 - 1) / 64;
+ gs_alloc_size = (gs_entry_size + 64 - 1) / 64;
+
+ if (!vs_alloc_size)
+ vs_alloc_size = 1;
+ if (!gs_alloc_size)
+ gs_alloc_size = 1;
+
+ /* avoid performance decrease due to banking */
+ if (vs_alloc_size == 5)
+ vs_alloc_size = 6;
+
+ /* in multiples of 8 */
+ vs_entry_count = (vs_size / 64 / vs_alloc_size) & ~7;
+ assert(vs_entry_count >= 32);
+
+ gs_entry_count = (gs_size / 64 / gs_alloc_size) & ~7;
+
+ if (intel_gpu_gen(builder->gpu) >= INTEL_GEN(7.5)) {
+ const int max_vs_entry_count =
+ (builder->gpu->gt >= 2) ? 1644 : 640;
+ const int max_gs_entry_count =
+ (builder->gpu->gt >= 2) ? 640 : 256;
+ if (vs_entry_count >= max_vs_entry_count)
+ vs_entry_count = max_vs_entry_count;
+ if (gs_entry_count >= max_gs_entry_count)
+ gs_entry_count = max_gs_entry_count;
+ } else {
+ const int max_vs_entry_count =
+ (builder->gpu->gt == 2) ? 704 : 512;
+ const int max_gs_entry_count =
+ (builder->gpu->gt == 2) ? 320 : 192;
+ if (vs_entry_count >= max_vs_entry_count)
+ vs_entry_count = max_vs_entry_count;
+ if (gs_entry_count >= max_gs_entry_count)
+ gs_entry_count = max_gs_entry_count;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(pipeline->cmd_urb_alloc) >= cmd_len * 4);
+ pipeline->cmd_urb_alloc_len = cmd_len * 4;
+
+ dw = pipeline->cmd_urb_alloc;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2);
+ dw[1] = (urb_offset / 8192) << GEN7_URB_ANY_DW1_OFFSET__SHIFT |
+ (vs_alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT |
+ vs_entry_count;
+
+ dw += 2;
+ if (gs_size)
+ urb_offset += vs_size;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2);
+ dw[1] = (urb_offset / 8192) << GEN7_URB_ANY_DW1_OFFSET__SHIFT |
+ (gs_alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT |
+ gs_entry_count;
+
+ dw += 2;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2);
+ dw[1] = (urb_offset / 8192) << GEN7_URB_ANY_DW1_OFFSET__SHIFT;
+
+ dw += 2;
+ dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2);
+ dw[1] = (urb_offset / 8192) << GEN7_URB_ANY_DW1_OFFSET__SHIFT;
+ }
+}
+
+static XGL_RESULT builder_build_all(struct intel_pipeline_builder *builder,
+ struct intel_pipeline *pipeline)
{
XGL_RESULT ret;
+ if (intel_gpu_gen(builder->gpu) >= INTEL_GEN(7)) {
+ builder_build_urb_alloc_gen7(builder, pipeline);
+ } else {
+ builder_build_urb_alloc_gen6(builder, pipeline);
+ }
+
ret = pipeline_ia_state(pipeline, &builder->ia);
if (ret == XGL_SUCCESS)
@@ -440,7 +619,7 @@
pipeline->obj.destroy = pipeline_destroy;
pipeline->total_size = 0;
- ret = builder_build(&builder, pipeline);
+ ret = builder_build_all(&builder, pipeline);
if (ret == XGL_SUCCESS)
ret = builder_validate(&builder, pipeline);
if (ret != XGL_SUCCESS) {