intel: disable PS for depth/stencil clears

We can set the Z coordinate of the clear rectangle to depth clear value and
set stencil reference value to stencil clear value.  Neither requires PS to be
enabled.
diff --git a/icd/intel/cmd_pipeline.c b/icd/intel/cmd_pipeline.c
index 17ce552..f8f23ea 100644
--- a/icd/intel/cmd_pipeline.c
+++ b/icd/intel/cmd_pipeline.c
@@ -2180,33 +2180,64 @@
 static void gen6_meta_vf(struct intel_cmd *cmd)
 {
     const struct intel_cmd_meta *meta = cmd->bind.meta;
-    XGL_UINT vertices[3][2];
-    uint32_t offset, *dw;
+    uint32_t vb_start, vb_end, vb_stride;
+    int ve_format, ve_z_source;
+    uint32_t *dw;
     XGL_UINT pos;
 
     CMD_ASSERT(cmd, 6, 7.5);
 
     /* write vertices */
-    vertices[0][0] = meta->dst.x + meta->width;
-    vertices[0][1] = meta->dst.y + meta->height;
-    vertices[1][0] = meta->dst.x;
-    vertices[1][1] = meta->dst.y + meta->height;
-    vertices[2][0] = meta->dst.x;
-    vertices[2][1] = meta->dst.y;
-    offset = cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32,
-            sizeof(vertices) / 4, (const uint32_t *) vertices);
+    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH) {
+        XGL_FLOAT vertices[3][3];
+
+        vertices[0][0] = (XGL_FLOAT) (meta->dst.x + meta->width);
+        vertices[0][1] = (XGL_FLOAT) (meta->dst.y + meta->height);
+        vertices[0][2] = u_uif(meta->clear_val[0]);
+        vertices[1][0] = (XGL_FLOAT) meta->dst.x;
+        vertices[1][1] = (XGL_FLOAT) (meta->dst.y + meta->height);
+        vertices[1][2] = u_uif(meta->clear_val[0]);
+        vertices[2][0] = (XGL_FLOAT) meta->dst.x;
+        vertices[2][1] = (XGL_FLOAT) meta->dst.y;
+        vertices[2][2] = u_uif(meta->clear_val[0]);
+
+        vb_start = cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32,
+                sizeof(vertices) / 4, (const uint32_t *) vertices);
+
+        vb_end = vb_start + sizeof(vertices) - 1;
+        vb_stride = sizeof(vertices[0]);
+        ve_z_source = GEN6_VFCOMP_STORE_SRC;
+        ve_format = GEN6_FORMAT_R32G32B32_FLOAT;
+    } else {
+        XGL_UINT vertices[3][2];
+
+        vertices[0][0] = meta->dst.x + meta->width;
+        vertices[0][1] = meta->dst.y + meta->height;
+        vertices[1][0] = meta->dst.x;
+        vertices[1][1] = meta->dst.y + meta->height;
+        vertices[2][0] = meta->dst.x;
+        vertices[2][1] = meta->dst.y;
+
+        vb_start = cmd_state_write(cmd, INTEL_CMD_ITEM_BLOB, 32,
+                sizeof(vertices) / 4, (const uint32_t *) vertices);
+
+        vb_end = vb_start + sizeof(vertices) - 1;
+        vb_stride = sizeof(vertices[0]);
+        ve_z_source = GEN6_VFCOMP_STORE_0;
+        ve_format = GEN6_FORMAT_R32G32_USCALED;
+    }
 
     /* 3DSTATE_VERTEX_BUFFERS */
     pos = cmd_batch_pointer(cmd, 5, &dw);
+
     dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (5 - 2);
-    dw[1] = sizeof(vertices[0]);
+    dw[1] = vb_stride;
     if (cmd_gen(cmd) >= INTEL_GEN(7))
         dw[1] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
 
     cmd_reserve_reloc(cmd, 2);
-    cmd_batch_reloc_writer(cmd, pos + 2, INTEL_CMD_WRITER_STATE, offset);
-    cmd_batch_reloc_writer(cmd, pos + 3, INTEL_CMD_WRITER_STATE,
-            offset + sizeof(vertices) - 1);
+    cmd_batch_reloc_writer(cmd, pos + 2, INTEL_CMD_WRITER_STATE, vb_start);
+    cmd_batch_reloc_writer(cmd, pos + 3, INTEL_CMD_WRITER_STATE, vb_end);
 
     dw[4] = 0;
 
@@ -2219,15 +2250,16 @@
             GEN6_VFCOMP_STORE_0 << GEN6_VE_STATE_DW1_COMP2__SHIFT | /* Viewport Index */
             GEN6_VFCOMP_STORE_0 << GEN6_VE_STATE_DW1_COMP3__SHIFT;  /* Point Width */
     dw[3] = GEN6_VE_STATE_DW0_VALID |
-            GEN6_FORMAT_R32G32_USCALED << GEN6_VE_STATE_DW0_FORMAT__SHIFT;
+            ve_format << GEN6_VE_STATE_DW0_FORMAT__SHIFT;
     dw[4] = GEN6_VFCOMP_STORE_SRC  << GEN6_VE_STATE_DW1_COMP0__SHIFT |
             GEN6_VFCOMP_STORE_SRC  << GEN6_VE_STATE_DW1_COMP1__SHIFT |
-            GEN6_VFCOMP_STORE_0    << GEN6_VE_STATE_DW1_COMP2__SHIFT |
+            ve_z_source            << GEN6_VE_STATE_DW1_COMP2__SHIFT |
             GEN6_VFCOMP_STORE_1_FP << GEN6_VE_STATE_DW1_COMP3__SHIFT;
 }
 
 static void gen6_meta_disabled(struct intel_cmd *cmd)
 {
+    const struct intel_cmd_meta *meta = cmd->bind.meta;
     uint32_t *dw;
 
     CMD_ASSERT(cmd, 6, 6);
@@ -2279,10 +2311,33 @@
     dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (20 - 2);
     dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
     memset(&dw[2], 0, 18 * sizeof(*dw));
+
+    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH) {
+        /* 3DSTATE_CONSTANT_PS */
+        cmd_batch_pointer(cmd, 5, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (5 - 2);
+        dw[1] = 0;
+        dw[2] = 0;
+        dw[3] = 0;
+        dw[4] = 0;
+
+        /* 3DSTATE_WM */
+        cmd_batch_pointer(cmd, 9, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (9 - 2);
+        dw[1] = 0;
+        dw[2] = 0;
+        dw[3] = 0;
+        dw[4] = 0;
+        dw[5] = (40 - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
+        dw[6] = 0;
+        dw[7] = 0;
+        dw[8] = 0;
+    }
 }
 
 static void gen7_meta_disabled(struct intel_cmd *cmd)
 {
+    const struct intel_cmd_meta *meta = cmd->bind.meta;
     uint32_t *dw;
 
     CMD_ASSERT(cmd, 7, 7.5);
@@ -2352,6 +2407,30 @@
     dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (14 - 2);
     dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
     memset(&dw[2], 0, sizeof(*dw) * (14 - 2));
+
+    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH) {
+        /* 3DSTATE_WM */
+        cmd_batch_pointer(cmd, 3, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (3 - 2);
+        memset(&dw[1], 0, sizeof(*dw) * (3 - 1));
+
+        /* 3DSTATE_CONSTANT_GS */
+        cmd_batch_pointer(cmd, 7, &dw);
+        dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | (7 - 2);
+        memset(&dw[1], 0, sizeof(*dw) * (7 - 1));
+
+        /* 3DSTATE_PS */
+        cmd_batch_pointer(cmd, 8, &dw);
+        dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (8 - 2);
+        dw[1] = 0;
+        dw[2] = 0;
+        dw[3] = 0;
+        dw[4] = GEN7_PS_DW4_8_PIXEL_DISPATCH | /* required to avoid hangs */
+                (48 - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+        dw[5] = 0;
+        dw[6] = 0;
+        dw[7] = 0;
+    }
 }
 
 static void gen6_meta_wm(struct intel_cmd *cmd)
@@ -2481,6 +2560,11 @@
 
     CMD_ASSERT(cmd, 6, 6);
 
+    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH)
+        return;
+    /* a normal color write */
+    assert(meta->dst.valid && !sh->uses);
+
     /* 3DSTATE_CONSTANT_PS */
     offset = gen6_meta_ps_constants(cmd);
     cmd_batch_pointer(cmd, 5, &dw);
@@ -2528,6 +2612,11 @@
 
     CMD_ASSERT(cmd, 7, 7.5);
 
+    if (meta->shader_id == INTEL_DEV_META_FS_CLEAR_DEPTH)
+        return;
+    /* a normal color write */
+    assert(meta->dst.valid && !sh->uses);
+
     /* 3DSTATE_WM */
     cmd_batch_pointer(cmd, 3, &dw);
     dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (3 - 2);