intel/blorp: Add support for gen4-5 SF programs

As part of enabling support for SF programs, we plumb the SF URB size
through to emit_urb_config.  For now, it's always zero but, on gen4, it
may be something larger.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
index f9727f7..1f29b93 100644
--- a/src/intel/blorp/blorp.c
+++ b/src/intel/blorp/blorp.c
@@ -216,6 +216,68 @@
    return program;
 }
 
+struct blorp_sf_key {
+   enum blorp_shader_type shader_type; /* Must be BLORP_SHADER_TYPE_GEN4_SF */
+
+   struct brw_sf_prog_key key;
+};
+
+bool
+blorp_ensure_sf_program(struct blorp_context *blorp,
+                        struct blorp_params *params)
+{
+   const struct brw_wm_prog_data *wm_prog_data = params->wm_prog_data;
+   assert(params->wm_prog_data);
+
+   /* Gen6+ doesn't need a strips and fans program */
+   if (blorp->compiler->devinfo->gen >= 6)
+      return true;
+
+   struct blorp_sf_key key = {
+      .shader_type = BLORP_SHADER_TYPE_GEN4_SF,
+   };
+
+   /* Everything gets compacted in vertex setup, so we just need a
+    * pass-through for the correct number of input varyings.
+    */
+   const uint64_t slots_valid = VARYING_BIT_POS |
+      ((1ull << wm_prog_data->num_varying_inputs) - 1) << VARYING_SLOT_VAR0;
+
+   key.key.attrs = slots_valid;
+   key.key.primitive = BRW_SF_PRIM_TRIANGLES;
+   key.key.contains_flat_varying = wm_prog_data->contains_flat_varying;
+
+   STATIC_ASSERT(sizeof(key.key.interp_mode) ==
+                 sizeof(wm_prog_data->interp_mode));
+   memcpy(key.key.interp_mode, wm_prog_data->interp_mode,
+          sizeof(key.key.interp_mode));
+
+   if (blorp->lookup_shader(blorp, &key, sizeof(key),
+                            &params->sf_prog_kernel, &params->sf_prog_data))
+      return true;
+
+   void *mem_ctx = ralloc_context(NULL);
+
+   const unsigned *program;
+   unsigned program_size;
+
+   struct brw_vue_map vue_map;
+   brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false);
+
+   struct brw_sf_prog_data prog_data_tmp;
+   program = brw_compile_sf(blorp->compiler, mem_ctx, &key.key,
+                            &prog_data_tmp, &vue_map, &program_size);
+
+   bool result =
+      blorp->upload_shader(blorp, &key, sizeof(key), program, program_size,
+                           (void *)&prog_data_tmp, sizeof(prog_data_tmp),
+                           &params->sf_prog_kernel, &params->sf_prog_data);
+
+   ralloc_free(mem_ctx);
+
+   return result;
+}
+
 void
 blorp_gen6_hiz_op(struct blorp_batch *batch,
                   struct blorp_surf *surf, unsigned level, unsigned layer,
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 8408ebc..1f8ea49 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1835,6 +1835,9 @@
    if (!brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key))
       return 0;
 
+   if (!blorp_ensure_sf_program(batch->blorp, params))
+      return 0;
+
    unsigned result = 0;
    unsigned max_surface_size = get_max_surface_size(devinfo, params);
    if (params->src.surf.logical_level0_px.width > max_surface_size ||
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index a9eb6b9..40a1a10 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -419,6 +419,9 @@
                                       use_simd16_replicated_data))
       return;
 
+   if (!blorp_ensure_sf_program(batch->blorp, &params))
+      return;
+
    while (num_layers > 0) {
       brw_blorp_surface_info_init(batch->blorp, &params.dst, surf, level,
                                   start_layer, format, true);
diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
index c6c14f5..058dedc 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -73,7 +73,8 @@
                     struct blorp_address address, uint32_t delta);
 
 static void
-blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size);
+blorp_emit_urb_config(struct blorp_batch *batch,
+                      unsigned vs_entry_size, unsigned sf_entry_size);
 
 /***** BEGIN blorp_exec implementation ******/
 
@@ -180,7 +181,10 @@
    /* The URB size is expressed in units of 64 bytes (512 bits) */
    const unsigned vs_entry_size = DIV_ROUND_UP(total_needed, 64);
 
-   blorp_emit_urb_config(batch, vs_entry_size);
+   const unsigned sf_entry_size =
+      params->sf_prog_data ? params->sf_prog_data->urb_entry_size : 0;
+
+   blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size);
 }
 
 static void
diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h
index 5f3362a..b3a5b2b 100644
--- a/src/intel/blorp/blorp_priv.h
+++ b/src/intel/blorp/blorp_priv.h
@@ -194,6 +194,8 @@
    unsigned num_layers;
    uint32_t vs_prog_kernel;
    struct brw_vs_prog_data *vs_prog_data;
+   uint32_t sf_prog_kernel;
+   struct brw_sf_prog_data *sf_prog_data;
    uint32_t wm_prog_kernel;
    struct brw_wm_prog_data *wm_prog_data;
 
@@ -207,6 +209,7 @@
    BLORP_SHADER_TYPE_BLIT,
    BLORP_SHADER_TYPE_CLEAR,
    BLORP_SHADER_TYPE_LAYER_OFFSET_VS,
+   BLORP_SHADER_TYPE_GEN4_SF,
 };
 
 struct brw_blorp_blit_prog_key
@@ -341,6 +344,10 @@
                  struct brw_vs_prog_data *vs_prog_data,
                  unsigned *program_size);
 
+bool
+blorp_ensure_sf_program(struct blorp_context *blorp,
+                        struct blorp_params *params);
+
 /** \} */
 
 #ifdef __cplusplus
diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c
index 71ed707..eb33899 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -148,11 +148,14 @@
 }
 
 static void
-blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size)
+blorp_emit_urb_config(struct blorp_batch *batch,
+                      unsigned vs_entry_size, unsigned sf_entry_size)
 {
    struct anv_device *device = batch->blorp->driver_ctx;
    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
 
+   assert(sf_entry_size == 0);
+
    const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 };
 
    genX(emit_urb_setup)(device, &cmd_buffer->batch,
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 7157420..72ac274 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -155,7 +155,8 @@
 }
 
 static void
-blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size)
+blorp_emit_urb_config(struct blorp_batch *batch,
+                      unsigned vs_entry_size, unsigned sf_entry_size)
 {
    assert(batch->blorp->driver_ctx == batch->driver_batch);
    struct brw_context *brw = batch->driver_batch;